import json
import re
from collections import defaultdict
import time
import os
import sys
import logging
import csv
from collections import defaultdict
import sys
# ========== 配置日志 ==========
log_file = f"model.json"
logging.basicConfig(
level=logging.INFO,
format='%(message)s',
handlers=[
logging.FileHandler(log_file, mode='w', encoding='utf-8'),
logging.StreamHandler(sys.stdout)
]
)
def log(*args, **kwargs):
message = " ".join(str(arg) for arg in args)
logging.info(message)
# ========== 时间字符串解析 ==========
def parse_time(time_str):
if not time_str or time_str == '0':
return 0
if time_str.startswith("<"):
return 0
match = re.search(r'\d+', time_str)
if not match:
sys.exit("error: not find valid number!")
return float(match.group())
def analyze_and_save_layer_by_mod_qat(json_data, output_csv="layer_timing.csv"):
grouped_by_mod_prefix = defaultdict(list)
os.makedirs(output_csv,exist_ok=True)
for layer in json_data.get("layer details", []):
layer_name = layer.get("layer name", "")
computing_time = parse_time(layer.get("computing cost (no DDR)", "0"))
loading_time = parse_time(layer.get("load cost", "0"))
storeing_time = parse_time(layer.get("store cost", "0"))
total_time = computing_time + loading_time + storeing_time
# 提取 Mod 字段
mod_match = re.search(r'"Mod":\s*"([^"]+)"', layer_name)
if not mod_match:
mod_match=layer_name
mod_path=mod_match
else:
mod_path = mod_match.group(1)
mod_prefix = mod_path.split('.')[0] if '.' in mod_path else mod_path
grouped_by_mod_prefix[mod_prefix].append({
"mod_prefix": mod_prefix,
"mod_path": mod_path,
"total_time": total_time,
"compute": computing_time,
"load": loading_time,
"store": storeing_time
})
# 打印输出 + 按总耗时排序
# print("\n每个模块前缀下各算子详细耗时(按耗时降序):\n")
csv_rows = []
for mod_prefix, layers in grouped_by_mod_prefix.items():
sorted_layers = sorted(layers, key=lambda x: x["total_time"], reverse=True)
# print(f"模块: {mod_prefix} (共 {len(sorted_layers)} 层)")
# print("-" * 60)
for idx, layer in enumerate(sorted_layers, 1):
# print(f"{idx}. Layer: {layer['mod_path']}")
# print(f" 总耗时: {layer['total_time']:.2f} us")
# print(f" 计算: {layer['compute']:.2f} us | Load: {layer['load']:.2f} us | Store: {layer['store']:.2f} us")
csv_rows.append([
mod_prefix,
layer["mod_path"],
f"{layer['total_time']:.2f}",
f"{layer['compute']:.2f}",
f"{layer['load']:.2f}",
f"{layer['store']:.2f}"
])
# print("-" * 60)
# 保存为 CSV 文件
with open(output_csv, 'w', newline='', encoding='utf-8') as csvfile:
writer = csv.writer(csvfile)
writer.writerow(["mod_prefix", "mod_path", "total_time(us)", "compute(us)", "load(us)", "store(us)"])
writer.writerows(csv_rows)
print(f"\n✅ 每个 mod_prefix 内算子已按耗时降序排序,并保存到: {output_csv}")
from collections import defaultdict
import csv
import re
import os
from collections import defaultdict
def analyze_selected_mod_prefixes(json_data, selected_prefixes=None, csv_output_dir='./'):
os.makedirs(csv_output_dir, exist_ok=True)
grouped_by_custom_prefix = defaultdict(list)
for layer in json_data.get("layer details", []):
layer_name = layer.get("layer name", "")
computing_time = parse_time(layer.get("computing cost (no DDR)", "0"))
loading_time = parse_time(layer.get("load cost", "0"))
storeing_time = parse_time(layer.get("store cost", "0"))
total_time = computing_time + loading_time + storeing_time
if selected_prefixes:
for prefix in selected_prefixes:
if layer_name.strip().startswith(prefix):
grouped_by_custom_prefix[prefix].append({
"layer_name": layer_name,
"total_time": total_time,
"compute": computing_time,
"load": loading_time,
"store": storeing_time
})
break
else:
mod_match = re.search(r'"Mod":\s*"([^"]+)"', layer_name)
if not mod_match:
continue
mod_path = mod_match.group(1)
mod_prefix = mod_path.split('.')[0]
grouped_by_custom_prefix[mod_prefix].append({
"layer_name": layer_name,
"mod_path": mod_path,
"total_time": total_time,
"compute": computing_time,
"load": loading_time,
"store": storeing_time
})
for prefix, ops in grouped_by_custom_prefix.items():
ops_sorted = sorted(ops, key=lambda x: x["total_time"], reverse=True)
# 保护文件名不与已有目录冲突
sanitized_prefix = prefix.replace("/", "_")
filename = f"{sanitized_prefix}_timing_report.csv"
csv_path = os.path.join(csv_output_dir, filename)
# 如果路径是个目录,加后缀防冲突
if os.path.isdir(csv_path):
csv_path += "_file.csv"
with open(csv_path, mode='w', newline='', encoding='utf-8') as f:
writer = csv.writer(f)
writer.writerow(["layer_name", "total_time(us)", "compute(us)", "load(us)", "store(us)"])
for op in ops_sorted:
writer.writerow([op["layer_name"], f"{op['total_time']:.2f}", f"{op['compute']:.2f}", f"{op['load']:.2f}", f"{op['store']:.2f}"])
# ========== 分析层与类型耗时 ==========
def analyze_json_data(json_data, model=True):
layer_costs = []
type_costs = defaultdict(float)
type_compute_costs = defaultdict(float)
type_ddr_costs = defaultdict(float)
total_computing = 0
total_loading = 0
total_storing = 0
for layer in json_data.get("layer details", []):
layer_name = layer.get("layer name", "")
computing_cost = layer.get("computing cost (no DDR)", "0")
load_cost = layer.get("load cost", "0")
store_cost = layer.get("store cost", "0")
computing_time = parse_time(computing_cost)
loading_time = parse_time(load_cost)
storeing_time = parse_time(store_cost)
total_computing += computing_time
total_loading += loading_time
total_storing += storeing_time
total_time = computing_time + loading_time + storeing_time
ddr_time = loading_time + storeing_time
layer_costs.append((layer_name, total_time))
# if "model.head.det_head" in layer_name:
# print(layer_name)
op_match = re.search(r'\"OP\":\s*\"([^\"]+)\"', layer_name)
if op_match:
op_type = op_match.group(1)
restr = re.match(r'^([^./]+[./][^./]+)', op_type)
else:
# op_type = layer_name
last_part = layer_name.split('/')[-1]
# 提取前缀中的字母部分,比如 Where1 -> Where
match = re.match(r"([A-Za-z]+)", last_part)
op_type = match.group(1) if match else None
restr=None
if restr and model:
key = restr.group(1)
else:
key = op_type
if "HzCalibration" in layer_name:
key="HzCalibration"
type_costs[key] += total_time
type_compute_costs[key] += computing_time
type_ddr_costs[key] += ddr_time
return layer_costs, type_costs, total_computing, total_loading, total_storing, type_compute_costs, type_ddr_costs
# ========== 主函数 ==========
def main(path):
filename = path
args=sys.argv
with open(filename, 'r', encoding='utf-8') as file:
data = json.load(file)
log("="*30, "模型信息", "="*30)
log("计算量: ", round(data['summary']['model info']['original conv ops'] / 10**9, 2), 'Gops')
log("延迟: ", round(1000 / data['summary']['performance']['FPS'], 2), "ms")
log("HBDK版s本号: ", data['summary']['model info']['compiling HBDK version'])
log("BPU March: ", data['summary']['model info']['BPU march'])
log("平均带宽: ", round(data['summary']['DDR access data']['DDR bytes per second'] / 2**30, 2), 'GB/s')
json_data = data['summary']
layer_costs, type_costs, total_computing, total_loading, total_storing, type_compute_costs, type_ddr_costs = analyze_json_data(json_data)
top_layers = sorted(layer_costs, key=lambda x: x[1], reverse=True)[:20]
sorted_types = sorted(type_costs.items(), key=lambda x: x[1], reverse=True)
log("\n模型总耗时统计:")
log(f"计算耗时(no DDR): {total_computing:.2f} us")
log(f"load耗时: {total_loading:.2f} us")
log(f"store耗时: {total_storing:.2f} us")
log(f"总耗时: {total_computing + total_loading + total_storing:.2f} us")
log("="*70)
log("\n按类型统计的总耗时:")
log(f"{'op_type':<71} {'总耗时(us)':<15} {'计算耗时(us)':<15} {'ddr耗时(us)':<15}")
log("-" * 150)
sum_time = 0
sum_compute_costs = 0
sum_ddr_costs = 0
for op_type, time in sorted_types:
sum_time += type_costs[op_type]
sum_compute_costs += type_compute_costs[op_type]
sum_ddr_costs += type_ddr_costs[op_type]
log(f"{op_type:<70}: {type_costs[op_type]:>10.2f} {type_compute_costs[op_type]:>15.2f} {type_ddr_costs[op_type]:>15.2f}")
log('-' * 150)
log(f"sum_time: {sum_time:.2f} us sum_compute_costs: {sum_compute_costs:.2f} us sum_ddr_cost: {sum_ddr_costs:.2f} us")
log("\n耗时Top 20的layer: ")
for i, (layer_name, time) in enumerate(top_layers, 1):
log(f"{i}. 耗时: {time:.2f} us")
log(f" Layer名称: {layer_name}")
log()
# import pdb;pdb.set_trace()
# if args[1]== "qat":
# analyze_and_save_layer_by_mod_qat(json_data)
# elif args[1]=="ptq":
# analyze_and_save_layer_by_mod_ptq(json_data)
# selected_prefixes = ["lightmap_/img_backbone","lightmap_/img_neck","lightmap_/tf_local_encoder","lightmap_/frustum_to_voxel/","sparse4d_model.head", "sparse4d_model.img_neck"]
# analyze_selected_mod_prefixes(json_data,selected_prefixes)
# ========== 入口 ==========
if __name__ == '__main__':
path = 'perf.json'
main(path)
评论