import jsonimport refrom collections import defaultdictimport timeimport osimport sysimport loggingimport csvfrom collections import defaultdictimport sys# ========== 配置日志 ==========log_file = f"model.json"logging.basicConfig( level=logging.INFO, format='%(message)s', handlers=[ logging.FileHandler(log_file, mode='w', encoding='utf-8'), logging.StreamHandler(sys.stdout) ])
def log(*args, **kwargs): message = " ".join(str(arg) for arg in args) logging.info(message)
# ========== 时间字符串解析 ==========def parse_time(time_str): if not time_str or time_str == '0': return 0 if time_str.startswith("<"): return 0 match = re.search(r'\d+', time_str) if not match: sys.exit("error: not find valid number!") return float(match.group())
def analyze_and_save_layer_by_mod_qat(json_data, output_csv="layer_timing.csv"): grouped_by_mod_prefix = defaultdict(list) os.makedirs(output_csv,exist_ok=True) for layer in json_data.get("layer details", []): layer_name = layer.get("layer name", "") computing_time = parse_time(layer.get("computing cost (no DDR)", "0")) loading_time = parse_time(layer.get("load cost", "0")) storeing_time = parse_time(layer.get("store cost", "0")) total_time = computing_time + loading_time + storeing_time
# 提取 Mod 字段 mod_match = re.search(r'"Mod":\s*"([^"]+)"', layer_name) if not mod_match: mod_match=layer_name mod_path=mod_match else: mod_path = mod_match.group(1) mod_prefix = mod_path.split('.')[0] if '.' in mod_path else mod_path
grouped_by_mod_prefix[mod_prefix].append({ "mod_prefix": mod_prefix, "mod_path": mod_path, "total_time": total_time, "compute": computing_time, "load": loading_time, "store": storeing_time })
# 打印输出 + 按总耗时排序 # print("\n每个模块前缀下各算子详细耗时(按耗时降序):\n") csv_rows = [] for mod_prefix, layers in grouped_by_mod_prefix.items(): sorted_layers = sorted(layers, key=lambda x: x["total_time"], reverse=True) # print(f"模块: {mod_prefix} (共 {len(sorted_layers)} 层)") # print("-" * 60) for idx, layer in enumerate(sorted_layers, 1): # print(f"{idx}. Layer: {layer['mod_path']}") # print(f" 总耗时: {layer['total_time']:.2f} us") # print(f" 计算: {layer['compute']:.2f} us | Load: {layer['load']:.2f} us | Store: {layer['store']:.2f} us") csv_rows.append([ mod_prefix, layer["mod_path"], f"{layer['total_time']:.2f}", f"{layer['compute']:.2f}", f"{layer['load']:.2f}", f"{layer['store']:.2f}" ]) # print("-" * 60)
# 保存为 CSV 文件 with open(output_csv, 'w', newline='', encoding='utf-8') as csvfile: writer = csv.writer(csvfile) writer.writerow(["mod_prefix", "mod_path", "total_time(us)", "compute(us)", "load(us)", "store(us)"]) writer.writerows(csv_rows)
print(f"\n✅ 每个 mod_prefix 内算子已按耗时降序排序,并保存到: {output_csv}")
from collections import defaultdict
import csvimport reimport osfrom collections import defaultdict
def analyze_selected_mod_prefixes(json_data, selected_prefixes=None, csv_output_dir='./'): os.makedirs(csv_output_dir, exist_ok=True) grouped_by_custom_prefix = defaultdict(list)
for layer in json_data.get("layer details", []): layer_name = layer.get("layer name", "") computing_time = parse_time(layer.get("computing cost (no DDR)", "0")) loading_time = parse_time(layer.get("load cost", "0")) storeing_time = parse_time(layer.get("store cost", "0")) total_time = computing_time + loading_time + storeing_time
if selected_prefixes: for prefix in selected_prefixes: if layer_name.strip().startswith(prefix): grouped_by_custom_prefix[prefix].append({ "layer_name": layer_name, "total_time": total_time, "compute": computing_time, "load": loading_time, "store": storeing_time }) break else: mod_match = re.search(r'"Mod":\s*"([^"]+)"', layer_name) if not mod_match: continue mod_path = mod_match.group(1) mod_prefix = mod_path.split('.')[0] grouped_by_custom_prefix[mod_prefix].append({ "layer_name": layer_name, "mod_path": mod_path, "total_time": total_time, "compute": computing_time, "load": loading_time, "store": storeing_time })
for prefix, ops in grouped_by_custom_prefix.items(): ops_sorted = sorted(ops, key=lambda x: x["total_time"], reverse=True) # 保护文件名不与已有目录冲突 sanitized_prefix = prefix.replace("/", "_") filename = f"{sanitized_prefix}_timing_report.csv" csv_path = os.path.join(csv_output_dir, filename) # 如果路径是个目录,加后缀防冲突 if os.path.isdir(csv_path): csv_path += "_file.csv" with open(csv_path, mode='w', newline='', encoding='utf-8') as f: writer = csv.writer(f) writer.writerow(["layer_name", "total_time(us)", "compute(us)", "load(us)", "store(us)"]) for op in ops_sorted: writer.writerow([op["layer_name"], f"{op['total_time']:.2f}", f"{op['compute']:.2f}", f"{op['load']:.2f}", f"{op['store']:.2f}"])
# ========== 分析层与类型耗时 ==========def analyze_json_data(json_data, model=True): layer_costs = [] type_costs = defaultdict(float) type_compute_costs = defaultdict(float) type_ddr_costs = defaultdict(float) total_computing = 0 total_loading = 0 total_storing = 0 for layer in json_data.get("layer details", []): layer_name = layer.get("layer name", "") computing_cost = layer.get("computing cost (no DDR)", "0") load_cost = layer.get("load cost", "0") store_cost = layer.get("store cost", "0")
computing_time = parse_time(computing_cost) loading_time = parse_time(load_cost) storeing_time = parse_time(store_cost)
total_computing += computing_time total_loading += loading_time total_storing += storeing_time
total_time = computing_time + loading_time + storeing_time ddr_time = loading_time + storeing_time layer_costs.append((layer_name, total_time)) # if "model.head.det_head" in layer_name: # print(layer_name) op_match = re.search(r'\"OP\":\s*\"([^\"]+)\"', layer_name) if op_match: op_type = op_match.group(1) restr = re.match(r'^([^./]+[./][^./]+)', op_type) else: # op_type = layer_name last_part = layer_name.split('/')[-1] # 提取前缀中的字母部分,比如 Where1 -> Where match = re.match(r"([A-Za-z]+)", last_part) op_type = match.group(1) if match else None restr=None if restr and model: key = restr.group(1) else: key = op_type if "HzCalibration" in layer_name: key="HzCalibration" type_costs[key] += total_time type_compute_costs[key] += computing_time type_ddr_costs[key] += ddr_time
return layer_costs, type_costs, total_computing, total_loading, total_storing, type_compute_costs, type_ddr_costs
# ========== 主函数 ==========def main(path): filename = path args=sys.argv with open(filename, 'r', encoding='utf-8') as file: data = json.load(file) log("="*30, "模型信息", "="*30) log("计算量: ", round(data['summary']['model info']['original conv ops'] / 10**9, 2), 'Gops') log("延迟: ", round(1000 / data['summary']['performance']['FPS'], 2), "ms") log("HBDK版s本号: ", data['summary']['model info']['compiling HBDK version']) log("BPU March: ", data['summary']['model info']['BPU march']) log("平均带宽: ", round(data['summary']['DDR access data']['DDR bytes per second'] / 2**30, 2), 'GB/s') json_data = data['summary'] layer_costs, type_costs, total_computing, total_loading, total_storing, type_compute_costs, type_ddr_costs = analyze_json_data(json_data) top_layers = sorted(layer_costs, key=lambda x: x[1], reverse=True)[:20] sorted_types = sorted(type_costs.items(), key=lambda x: x[1], reverse=True) log("\n模型总耗时统计:") log(f"计算耗时(no DDR): {total_computing:.2f} us") log(f"load耗时: {total_loading:.2f} us") log(f"store耗时: {total_storing:.2f} us") log(f"总耗时: {total_computing + total_loading + total_storing:.2f} us") log("="*70) log("\n按类型统计的总耗时:") log(f"{'op_type':<71} {'总耗时(us)':<15} {'计算耗时(us)':<15} {'ddr耗时(us)':<15}") log("-" * 150)
sum_time = 0 sum_compute_costs = 0 sum_ddr_costs = 0 for op_type, time in sorted_types: sum_time += type_costs[op_type] sum_compute_costs += type_compute_costs[op_type] sum_ddr_costs += type_ddr_costs[op_type] log(f"{op_type:<70}: {type_costs[op_type]:>10.2f} {type_compute_costs[op_type]:>15.2f} {type_ddr_costs[op_type]:>15.2f}") log('-' * 150) log(f"sum_time: {sum_time:.2f} us sum_compute_costs: {sum_compute_costs:.2f} us sum_ddr_cost: {sum_ddr_costs:.2f} us")
log("\n耗时Top 20的layer: ") for i, (layer_name, time) in enumerate(top_layers, 1): log(f"{i}. 耗时: {time:.2f} us") log(f" Layer名称: {layer_name}") log() # import pdb;pdb.set_trace() # if args[1]== "qat": # analyze_and_save_layer_by_mod_qat(json_data) # elif args[1]=="ptq": # analyze_and_save_layer_by_mod_ptq(json_data) # selected_prefixes = ["lightmap_/img_backbone","lightmap_/img_neck","lightmap_/tf_local_encoder","lightmap_/frustum_to_voxel/","sparse4d_model.head", "sparse4d_model.img_neck"] # analyze_selected_mod_prefixes(json_data,selected_prefixes)
# ========== 入口 ==========if __name__ == '__main__': path = 'perf.json' main(path)
评论