123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300 |
- # Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
- #
- # Licensed under the Apache License, Version 2.0 (the "License");
- # you may not use this file except in compliance with the License.
- # You may obtain a copy of the License at
- #
- # http://www.apache.org/licenses/LICENSE-2.0
- #
- # Unless required by applicable law or agreed to in writing, software
- # distributed under the License is distributed on an "AS IS" BASIS,
- # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- # See the License for the specific language governing permissions and
- # limitations under the License.
- import os
- import re
- import argparse
- import pandas as pd
- def parse_args():
- """
- parse input args
- """
- parser = argparse.ArgumentParser()
- parser.add_argument(
- "--log_path",
- type=str,
- default="./output_pipeline",
- help="benchmark log path")
- parser.add_argument(
- "--output_name",
- type=str,
- default="benchmark_excel.xlsx",
- help="output excel file name")
- parser.add_argument(
- "--analysis_trt", dest="analysis_trt", action='store_true')
- parser.add_argument(
- "--analysis_mkl", dest="analysis_mkl", action='store_true')
- return parser.parse_args()
- def find_all_logs(path_walk):
- """
- find all .log files from target dir
- """
- for root, ds, files in os.walk(path_walk):
- for file_name in files:
- if re.match(r'.*.log', file_name):
- full_path = os.path.join(root, file_name)
- yield file_name, full_path
- def process_log(file_name):
- """
- process log to dict
- """
- output_dict = {}
- with open(file_name, 'r') as f:
- for i, data in enumerate(f.readlines()):
- if i == 0:
- continue
- line_lists = data.split(" ")
- # conf info
- if "runtime_device:" in line_lists:
- pos_buf = line_lists.index("runtime_device:")
- output_dict["runtime_device"] = line_lists[pos_buf + 1].strip()
- if "ir_optim:" in line_lists:
- pos_buf = line_lists.index("ir_optim:")
- output_dict["ir_optim"] = line_lists[pos_buf + 1].strip()
- if "enable_memory_optim:" in line_lists:
- pos_buf = line_lists.index("enable_memory_optim:")
- output_dict["enable_memory_optim"] = line_lists[pos_buf +
- 1].strip()
- if "enable_tensorrt:" in line_lists:
- pos_buf = line_lists.index("enable_tensorrt:")
- output_dict["enable_tensorrt"] = line_lists[pos_buf + 1].strip()
- if "precision:" in line_lists:
- pos_buf = line_lists.index("precision:")
- output_dict["precision"] = line_lists[pos_buf + 1].strip()
- if "enable_mkldnn:" in line_lists:
- pos_buf = line_lists.index("enable_mkldnn:")
- output_dict["enable_mkldnn"] = line_lists[pos_buf + 1].strip()
- if "cpu_math_library_num_threads:" in line_lists:
- pos_buf = line_lists.index("cpu_math_library_num_threads:")
- output_dict["cpu_math_library_num_threads"] = line_lists[
- pos_buf + 1].strip()
- # model info
- if "model_name:" in line_lists:
- pos_buf = line_lists.index("model_name:")
- output_dict["model_name"] = list(
- filter(None, line_lists[pos_buf + 1].strip().split('/')))[
- -1]
- # data info
- if "batch_size:" in line_lists:
- pos_buf = line_lists.index("batch_size:")
- output_dict["batch_size"] = line_lists[pos_buf + 1].strip()
- if "input_shape:" in line_lists:
- pos_buf = line_lists.index("input_shape:")
- output_dict["input_shape"] = line_lists[pos_buf + 1].strip()
- # perf info
- if "cpu_rss(MB):" in line_lists:
- pos_buf = line_lists.index("cpu_rss(MB):")
- output_dict["cpu_rss(MB)"] = line_lists[pos_buf + 1].strip(
- ).split(',')[0]
- if "gpu_rss(MB):" in line_lists:
- pos_buf = line_lists.index("gpu_rss(MB):")
- output_dict["gpu_rss(MB)"] = line_lists[pos_buf + 1].strip(
- ).split(',')[0]
- if "gpu_util:" in line_lists:
- pos_buf = line_lists.index("gpu_util:")
- output_dict["gpu_util"] = line_lists[pos_buf + 1].strip().split(
- ',')[0]
- if "preproce_time(ms):" in line_lists:
- pos_buf = line_lists.index("preproce_time(ms):")
- output_dict["preproce_time(ms)"] = line_lists[
- pos_buf + 1].strip().split(',')[0]
- if "inference_time(ms):" in line_lists:
- pos_buf = line_lists.index("inference_time(ms):")
- output_dict["inference_time(ms)"] = line_lists[
- pos_buf + 1].strip().split(',')[0]
- if "postprocess_time(ms):" in line_lists:
- pos_buf = line_lists.index("postprocess_time(ms):")
- output_dict["postprocess_time(ms)"] = line_lists[
- pos_buf + 1].strip().split(',')[0]
- return output_dict
- def filter_df_merge(cpu_df, filter_column=None):
- """
- process cpu data frame, merge by 'model_name', 'batch_size'
- Args:
- cpu_df ([type]): [description]
- """
- if not filter_column:
- raise Exception(
- "please assign filter_column for filter_df_merge function")
- df_lists = []
- filter_column_lists = []
- for k, v in cpu_df.groupby(filter_column, dropna=True):
- filter_column_lists.append(k)
- df_lists.append(v)
- final_output_df = df_lists[-1]
- # merge same model
- for i in range(len(df_lists) - 1):
- left_suffix = cpu_df[filter_column].unique()[0]
- right_suffix = df_lists[i][filter_column].unique()[0]
- print(left_suffix, right_suffix)
- if not pd.isnull(right_suffix):
- final_output_df = pd.merge(
- final_output_df,
- df_lists[i],
- how='left',
- left_on=['model_name', 'batch_size'],
- right_on=['model_name', 'batch_size'],
- suffixes=('', '_{0}_{1}'.format(filter_column, right_suffix)))
- # rename default df columns
- origin_column_names = list(cpu_df.columns.values)
- origin_column_names.remove(filter_column)
- suffix = final_output_df[filter_column].unique()[0]
- for name in origin_column_names:
- final_output_df.rename(
- columns={name: "{0}_{1}_{2}".format(name, filter_column, suffix)},
- inplace=True)
- final_output_df.rename(
- columns={
- filter_column: "{0}_{1}_{2}".format(filter_column, filter_column,
- suffix)
- },
- inplace=True)
- final_output_df.sort_values(
- by=[
- "model_name_{0}_{1}".format(filter_column, suffix),
- "batch_size_{0}_{1}".format(filter_column, suffix)
- ],
- inplace=True)
- return final_output_df
- def trt_perf_analysis(raw_df):
- """
- sperate raw dataframe to a list of dataframe
- compare tensorrt percision performance
- """
- # filter df by gpu, compare tensorrt and gpu
- # define default dataframe for gpu performance analysis
- gpu_df = raw_df.loc[raw_df['runtime_device'] == 'gpu']
- new_df = filter_df_merge(gpu_df, "precision")
- # calculate qps diff percentail
- infer_fp32 = "inference_time(ms)_precision_fp32"
- infer_fp16 = "inference_time(ms)_precision_fp16"
- infer_int8 = "inference_time(ms)_precision_int8"
- new_df["fp32_fp16_diff"] = new_df[[infer_fp32, infer_fp16]].apply(
- lambda x: (float(x[infer_fp16]) - float(x[infer_fp32])) / float(x[infer_fp32]),
- axis=1)
- new_df["fp32_gpu_diff"] = new_df[["inference_time(ms)", infer_fp32]].apply(
- lambda x: (float(x[infer_fp32]) - float(x[infer_fp32])) / float(x["inference_time(ms)"]),
- axis=1)
- new_df["fp16_int8_diff"] = new_df[[infer_fp16, infer_int8]].apply(
- lambda x: (float(x[infer_int8]) - float(x[infer_fp16])) / float(x[infer_fp16]),
- axis=1)
- return new_df
- def mkl_perf_analysis(raw_df):
- """
- sperate raw dataframe to a list of dataframe
- compare mkldnn performance with not enable mkldnn
- """
- # filter df by cpu, compare mkl and cpu
- # define default dataframe for cpu mkldnn analysis
- cpu_df = raw_df.loc[raw_df['runtime_device'] == 'cpu']
- mkl_compare_df = cpu_df.loc[cpu_df['cpu_math_library_num_threads'] == '1']
- thread_compare_df = cpu_df.loc[cpu_df['enable_mkldnn'] == 'True']
- # define dataframe need to be analyzed
- output_mkl_df = filter_df_merge(mkl_compare_df, 'enable_mkldnn')
- output_thread_df = filter_df_merge(thread_compare_df,
- 'cpu_math_library_num_threads')
- # calculate performance diff percentail
- # compare mkl performance with cpu
- enable_mkldnn = "inference_time(ms)_enable_mkldnn_True"
- disable_mkldnn = "inference_time(ms)_enable_mkldnn_False"
- output_mkl_df["mkl_infer_diff"] = output_mkl_df[[
- enable_mkldnn, disable_mkldnn
- ]].apply(
- lambda x: (float(x[enable_mkldnn]) - float(x[disable_mkldnn])) / float(x[disable_mkldnn]),
- axis=1)
- cpu_enable_mkldnn = "cpu_rss(MB)_enable_mkldnn_True"
- cpu_disable_mkldnn = "cpu_rss(MB)_enable_mkldnn_False"
- output_mkl_df["mkl_cpu_rss_diff"] = output_mkl_df[[
- cpu_enable_mkldnn, cpu_disable_mkldnn
- ]].apply(
- lambda x: (float(x[cpu_enable_mkldnn]) - float(x[cpu_disable_mkldnn])) / float(x[cpu_disable_mkldnn]),
- axis=1)
- # compare cpu_multi_thread performance with cpu
- num_threads_1 = "inference_time(ms)_cpu_math_library_num_threads_1"
- num_threads_6 = "inference_time(ms)_cpu_math_library_num_threads_6"
- output_thread_df["mkl_infer_diff"] = output_thread_df[[
- num_threads_6, num_threads_1
- ]].apply(
- lambda x: (float(x[num_threads_6]) - float(x[num_threads_1])) / float(x[num_threads_1]),
- axis=1)
- cpu_num_threads_1 = "cpu_rss(MB)_cpu_math_library_num_threads_1"
- cpu_num_threads_6 = "cpu_rss(MB)_cpu_math_library_num_threads_6"
- output_thread_df["mkl_cpu_rss_diff"] = output_thread_df[[
- cpu_num_threads_6, cpu_num_threads_1
- ]].apply(
- lambda x: (float(x[cpu_num_threads_6]) - float(x[cpu_num_threads_1])) / float(x[cpu_num_threads_1]),
- axis=1)
- return output_mkl_df, output_thread_df
- def main():
- """
- main
- """
- args = parse_args()
- # create empty DataFrame
- origin_df = pd.DataFrame(columns=[
- "model_name", "batch_size", "input_shape", "runtime_device", "ir_optim",
- "enable_memory_optim", "enable_tensorrt", "precision", "enable_mkldnn",
- "cpu_math_library_num_threads", "preproce_time(ms)",
- "inference_time(ms)", "postprocess_time(ms)", "cpu_rss(MB)",
- "gpu_rss(MB)", "gpu_util"
- ])
- for file_name, full_path in find_all_logs(args.log_path):
- dict_log = process_log(full_path)
- origin_df = origin_df.append(dict_log, ignore_index=True)
- raw_df = origin_df.sort_values(by='model_name')
- raw_df.sort_values(by=["model_name", "batch_size"], inplace=True)
- raw_df.to_excel(args.output_name)
- if args.analysis_trt:
- trt_df = trt_perf_analysis(raw_df)
- trt_df.to_excel("trt_analysis_{}".format(args.output_name))
- if args.analysis_mkl:
- mkl_df, thread_df = mkl_perf_analysis(raw_df)
- mkl_df.to_excel("mkl_enable_analysis_{}".format(args.output_name))
- thread_df.to_excel("mkl_threads_analysis_{}".format(args.output_name))
- if __name__ == "__main__":
- main()
|