log_parser_excel.py 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300
  1. # Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
  2. #
  3. # Licensed under the Apache License, Version 2.0 (the "License");
  4. # you may not use this file except in compliance with the License.
  5. # You may obtain a copy of the License at
  6. #
  7. # http://www.apache.org/licenses/LICENSE-2.0
  8. #
  9. # Unless required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an "AS IS" BASIS,
  11. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. # See the License for the specific language governing permissions and
  13. # limitations under the License.
  14. import os
  15. import re
  16. import argparse
  17. import pandas as pd
  18. def parse_args():
  19. """
  20. parse input args
  21. """
  22. parser = argparse.ArgumentParser()
  23. parser.add_argument(
  24. "--log_path",
  25. type=str,
  26. default="./output_pipeline",
  27. help="benchmark log path")
  28. parser.add_argument(
  29. "--output_name",
  30. type=str,
  31. default="benchmark_excel.xlsx",
  32. help="output excel file name")
  33. parser.add_argument(
  34. "--analysis_trt", dest="analysis_trt", action='store_true')
  35. parser.add_argument(
  36. "--analysis_mkl", dest="analysis_mkl", action='store_true')
  37. return parser.parse_args()
  38. def find_all_logs(path_walk):
  39. """
  40. find all .log files from target dir
  41. """
  42. for root, ds, files in os.walk(path_walk):
  43. for file_name in files:
  44. if re.match(r'.*.log', file_name):
  45. full_path = os.path.join(root, file_name)
  46. yield file_name, full_path
  47. def process_log(file_name):
  48. """
  49. process log to dict
  50. """
  51. output_dict = {}
  52. with open(file_name, 'r') as f:
  53. for i, data in enumerate(f.readlines()):
  54. if i == 0:
  55. continue
  56. line_lists = data.split(" ")
  57. # conf info
  58. if "runtime_device:" in line_lists:
  59. pos_buf = line_lists.index("runtime_device:")
  60. output_dict["runtime_device"] = line_lists[pos_buf + 1].strip()
  61. if "ir_optim:" in line_lists:
  62. pos_buf = line_lists.index("ir_optim:")
  63. output_dict["ir_optim"] = line_lists[pos_buf + 1].strip()
  64. if "enable_memory_optim:" in line_lists:
  65. pos_buf = line_lists.index("enable_memory_optim:")
  66. output_dict["enable_memory_optim"] = line_lists[pos_buf +
  67. 1].strip()
  68. if "enable_tensorrt:" in line_lists:
  69. pos_buf = line_lists.index("enable_tensorrt:")
  70. output_dict["enable_tensorrt"] = line_lists[pos_buf + 1].strip()
  71. if "precision:" in line_lists:
  72. pos_buf = line_lists.index("precision:")
  73. output_dict["precision"] = line_lists[pos_buf + 1].strip()
  74. if "enable_mkldnn:" in line_lists:
  75. pos_buf = line_lists.index("enable_mkldnn:")
  76. output_dict["enable_mkldnn"] = line_lists[pos_buf + 1].strip()
  77. if "cpu_math_library_num_threads:" in line_lists:
  78. pos_buf = line_lists.index("cpu_math_library_num_threads:")
  79. output_dict["cpu_math_library_num_threads"] = line_lists[
  80. pos_buf + 1].strip()
  81. # model info
  82. if "model_name:" in line_lists:
  83. pos_buf = line_lists.index("model_name:")
  84. output_dict["model_name"] = list(
  85. filter(None, line_lists[pos_buf + 1].strip().split('/')))[
  86. -1]
  87. # data info
  88. if "batch_size:" in line_lists:
  89. pos_buf = line_lists.index("batch_size:")
  90. output_dict["batch_size"] = line_lists[pos_buf + 1].strip()
  91. if "input_shape:" in line_lists:
  92. pos_buf = line_lists.index("input_shape:")
  93. output_dict["input_shape"] = line_lists[pos_buf + 1].strip()
  94. # perf info
  95. if "cpu_rss(MB):" in line_lists:
  96. pos_buf = line_lists.index("cpu_rss(MB):")
  97. output_dict["cpu_rss(MB)"] = line_lists[pos_buf + 1].strip(
  98. ).split(',')[0]
  99. if "gpu_rss(MB):" in line_lists:
  100. pos_buf = line_lists.index("gpu_rss(MB):")
  101. output_dict["gpu_rss(MB)"] = line_lists[pos_buf + 1].strip(
  102. ).split(',')[0]
  103. if "gpu_util:" in line_lists:
  104. pos_buf = line_lists.index("gpu_util:")
  105. output_dict["gpu_util"] = line_lists[pos_buf + 1].strip().split(
  106. ',')[0]
  107. if "preproce_time(ms):" in line_lists:
  108. pos_buf = line_lists.index("preproce_time(ms):")
  109. output_dict["preproce_time(ms)"] = line_lists[
  110. pos_buf + 1].strip().split(',')[0]
  111. if "inference_time(ms):" in line_lists:
  112. pos_buf = line_lists.index("inference_time(ms):")
  113. output_dict["inference_time(ms)"] = line_lists[
  114. pos_buf + 1].strip().split(',')[0]
  115. if "postprocess_time(ms):" in line_lists:
  116. pos_buf = line_lists.index("postprocess_time(ms):")
  117. output_dict["postprocess_time(ms)"] = line_lists[
  118. pos_buf + 1].strip().split(',')[0]
  119. return output_dict
  120. def filter_df_merge(cpu_df, filter_column=None):
  121. """
  122. process cpu data frame, merge by 'model_name', 'batch_size'
  123. Args:
  124. cpu_df ([type]): [description]
  125. """
  126. if not filter_column:
  127. raise Exception(
  128. "please assign filter_column for filter_df_merge function")
  129. df_lists = []
  130. filter_column_lists = []
  131. for k, v in cpu_df.groupby(filter_column, dropna=True):
  132. filter_column_lists.append(k)
  133. df_lists.append(v)
  134. final_output_df = df_lists[-1]
  135. # merge same model
  136. for i in range(len(df_lists) - 1):
  137. left_suffix = cpu_df[filter_column].unique()[0]
  138. right_suffix = df_lists[i][filter_column].unique()[0]
  139. print(left_suffix, right_suffix)
  140. if not pd.isnull(right_suffix):
  141. final_output_df = pd.merge(
  142. final_output_df,
  143. df_lists[i],
  144. how='left',
  145. left_on=['model_name', 'batch_size'],
  146. right_on=['model_name', 'batch_size'],
  147. suffixes=('', '_{0}_{1}'.format(filter_column, right_suffix)))
  148. # rename default df columns
  149. origin_column_names = list(cpu_df.columns.values)
  150. origin_column_names.remove(filter_column)
  151. suffix = final_output_df[filter_column].unique()[0]
  152. for name in origin_column_names:
  153. final_output_df.rename(
  154. columns={name: "{0}_{1}_{2}".format(name, filter_column, suffix)},
  155. inplace=True)
  156. final_output_df.rename(
  157. columns={
  158. filter_column: "{0}_{1}_{2}".format(filter_column, filter_column,
  159. suffix)
  160. },
  161. inplace=True)
  162. final_output_df.sort_values(
  163. by=[
  164. "model_name_{0}_{1}".format(filter_column, suffix),
  165. "batch_size_{0}_{1}".format(filter_column, suffix)
  166. ],
  167. inplace=True)
  168. return final_output_df
  169. def trt_perf_analysis(raw_df):
  170. """
  171. sperate raw dataframe to a list of dataframe
  172. compare tensorrt percision performance
  173. """
  174. # filter df by gpu, compare tensorrt and gpu
  175. # define default dataframe for gpu performance analysis
  176. gpu_df = raw_df.loc[raw_df['runtime_device'] == 'gpu']
  177. new_df = filter_df_merge(gpu_df, "precision")
  178. # calculate qps diff percentail
  179. infer_fp32 = "inference_time(ms)_precision_fp32"
  180. infer_fp16 = "inference_time(ms)_precision_fp16"
  181. infer_int8 = "inference_time(ms)_precision_int8"
  182. new_df["fp32_fp16_diff"] = new_df[[infer_fp32, infer_fp16]].apply(
  183. lambda x: (float(x[infer_fp16]) - float(x[infer_fp32])) / float(x[infer_fp32]),
  184. axis=1)
  185. new_df["fp32_gpu_diff"] = new_df[["inference_time(ms)", infer_fp32]].apply(
  186. lambda x: (float(x[infer_fp32]) - float(x[infer_fp32])) / float(x["inference_time(ms)"]),
  187. axis=1)
  188. new_df["fp16_int8_diff"] = new_df[[infer_fp16, infer_int8]].apply(
  189. lambda x: (float(x[infer_int8]) - float(x[infer_fp16])) / float(x[infer_fp16]),
  190. axis=1)
  191. return new_df
  192. def mkl_perf_analysis(raw_df):
  193. """
  194. sperate raw dataframe to a list of dataframe
  195. compare mkldnn performance with not enable mkldnn
  196. """
  197. # filter df by cpu, compare mkl and cpu
  198. # define default dataframe for cpu mkldnn analysis
  199. cpu_df = raw_df.loc[raw_df['runtime_device'] == 'cpu']
  200. mkl_compare_df = cpu_df.loc[cpu_df['cpu_math_library_num_threads'] == '1']
  201. thread_compare_df = cpu_df.loc[cpu_df['enable_mkldnn'] == 'True']
  202. # define dataframe need to be analyzed
  203. output_mkl_df = filter_df_merge(mkl_compare_df, 'enable_mkldnn')
  204. output_thread_df = filter_df_merge(thread_compare_df,
  205. 'cpu_math_library_num_threads')
  206. # calculate performance diff percentail
  207. # compare mkl performance with cpu
  208. enable_mkldnn = "inference_time(ms)_enable_mkldnn_True"
  209. disable_mkldnn = "inference_time(ms)_enable_mkldnn_False"
  210. output_mkl_df["mkl_infer_diff"] = output_mkl_df[[
  211. enable_mkldnn, disable_mkldnn
  212. ]].apply(
  213. lambda x: (float(x[enable_mkldnn]) - float(x[disable_mkldnn])) / float(x[disable_mkldnn]),
  214. axis=1)
  215. cpu_enable_mkldnn = "cpu_rss(MB)_enable_mkldnn_True"
  216. cpu_disable_mkldnn = "cpu_rss(MB)_enable_mkldnn_False"
  217. output_mkl_df["mkl_cpu_rss_diff"] = output_mkl_df[[
  218. cpu_enable_mkldnn, cpu_disable_mkldnn
  219. ]].apply(
  220. lambda x: (float(x[cpu_enable_mkldnn]) - float(x[cpu_disable_mkldnn])) / float(x[cpu_disable_mkldnn]),
  221. axis=1)
  222. # compare cpu_multi_thread performance with cpu
  223. num_threads_1 = "inference_time(ms)_cpu_math_library_num_threads_1"
  224. num_threads_6 = "inference_time(ms)_cpu_math_library_num_threads_6"
  225. output_thread_df["mkl_infer_diff"] = output_thread_df[[
  226. num_threads_6, num_threads_1
  227. ]].apply(
  228. lambda x: (float(x[num_threads_6]) - float(x[num_threads_1])) / float(x[num_threads_1]),
  229. axis=1)
  230. cpu_num_threads_1 = "cpu_rss(MB)_cpu_math_library_num_threads_1"
  231. cpu_num_threads_6 = "cpu_rss(MB)_cpu_math_library_num_threads_6"
  232. output_thread_df["mkl_cpu_rss_diff"] = output_thread_df[[
  233. cpu_num_threads_6, cpu_num_threads_1
  234. ]].apply(
  235. lambda x: (float(x[cpu_num_threads_6]) - float(x[cpu_num_threads_1])) / float(x[cpu_num_threads_1]),
  236. axis=1)
  237. return output_mkl_df, output_thread_df
  238. def main():
  239. """
  240. main
  241. """
  242. args = parse_args()
  243. # create empty DataFrame
  244. origin_df = pd.DataFrame(columns=[
  245. "model_name", "batch_size", "input_shape", "runtime_device", "ir_optim",
  246. "enable_memory_optim", "enable_tensorrt", "precision", "enable_mkldnn",
  247. "cpu_math_library_num_threads", "preproce_time(ms)",
  248. "inference_time(ms)", "postprocess_time(ms)", "cpu_rss(MB)",
  249. "gpu_rss(MB)", "gpu_util"
  250. ])
  251. for file_name, full_path in find_all_logs(args.log_path):
  252. dict_log = process_log(full_path)
  253. origin_df = origin_df.append(dict_log, ignore_index=True)
  254. raw_df = origin_df.sort_values(by='model_name')
  255. raw_df.sort_values(by=["model_name", "batch_size"], inplace=True)
  256. raw_df.to_excel(args.output_name)
  257. if args.analysis_trt:
  258. trt_df = trt_perf_analysis(raw_df)
  259. trt_df.to_excel("trt_analysis_{}".format(args.output_name))
  260. if args.analysis_mkl:
  261. mkl_df, thread_df = mkl_perf_analysis(raw_df)
  262. mkl_df.to_excel("mkl_enable_analysis_{}".format(args.output_name))
  263. thread_df.to_excel("mkl_threads_analysis_{}".format(args.output_name))
  264. if __name__ == "__main__":
  265. main()