action_infer.py 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321
  1. # Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
  2. #
  3. # Licensed under the Apache License, Version 2.0 (the "License");
  4. # you may not use this file except in compliance with the License.
  5. # You may obtain a copy of the License at
  6. #
  7. # http://www.apache.org/licenses/LICENSE-2.0
  8. #
  9. # Unless required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an "AS IS" BASIS,
  11. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. # See the License for the specific language governing permissions and
  13. # limitations under the License.
  14. import os
  15. import yaml
  16. import glob
  17. import cv2
  18. import numpy as np
  19. import math
  20. import paddle
  21. import sys
  22. from collections import Sequence
  23. # add deploy path of PadleDetection to sys.path
  24. parent_path = os.path.abspath(os.path.join(__file__, *(['..'] * 2)))
  25. sys.path.insert(0, parent_path)
  26. from paddle.inference import Config, create_predictor
  27. from utils import argsparser, Timer, get_current_memory_mb
  28. from benchmark_utils import PaddleInferBenchmark
  29. from infer import Detector, print_arguments
  30. class ActionRecognizer(Detector):
  31. """
  32. Args:
  33. model_dir (str): root path of model.pdiparams, model.pdmodel and infer_cfg.yml
  34. device (str): Choose the device you want to run, it can be: CPU/GPU/XPU, default is CPU
  35. run_mode (str): mode of running(paddle/trt_fp32/trt_fp16)
  36. batch_size (int): size of pre batch in inference
  37. trt_min_shape (int): min shape for dynamic shape in trt
  38. trt_max_shape (int): max shape for dynamic shape in trt
  39. trt_opt_shape (int): opt shape for dynamic shape in trt
  40. trt_calib_mode (bool): If the model is produced by TRT offline quantitative
  41. calibration, trt_calib_mode need to set True
  42. cpu_threads (int): cpu threads
  43. enable_mkldnn (bool): whether to open MKLDNN
  44. threshold (float): The threshold of score for visualization
  45. window_size(int): Temporal size of skeleton feature.
  46. random_pad (bool): Whether do random padding when frame length < window_size.
  47. """
  48. def __init__(self,
  49. model_dir,
  50. device='CPU',
  51. run_mode='paddle',
  52. batch_size=1,
  53. trt_min_shape=1,
  54. trt_max_shape=1280,
  55. trt_opt_shape=640,
  56. trt_calib_mode=False,
  57. cpu_threads=1,
  58. enable_mkldnn=False,
  59. output_dir='output',
  60. threshold=0.5,
  61. window_size=100,
  62. random_pad=False):
  63. assert batch_size == 1, "ActionRecognizer only support batch_size=1 now."
  64. super(ActionRecognizer, self).__init__(
  65. model_dir=model_dir,
  66. device=device,
  67. run_mode=run_mode,
  68. batch_size=batch_size,
  69. trt_min_shape=trt_min_shape,
  70. trt_max_shape=trt_max_shape,
  71. trt_opt_shape=trt_opt_shape,
  72. trt_calib_mode=trt_calib_mode,
  73. cpu_threads=cpu_threads,
  74. enable_mkldnn=enable_mkldnn,
  75. output_dir=output_dir,
  76. threshold=threshold,
  77. delete_shuffle_pass=True)
  78. def predict(self, repeats=1):
  79. '''
  80. Args:
  81. repeats (int): repeat number for prediction
  82. Returns:
  83. results (dict):
  84. '''
  85. # model prediction
  86. output_names = self.predictor.get_output_names()
  87. for i in range(repeats):
  88. self.predictor.run()
  89. output_tensor = self.predictor.get_output_handle(output_names[0])
  90. np_output = output_tensor.copy_to_cpu()
  91. result = dict(output=np_output)
  92. return result
  93. def predict_skeleton(self, skeleton_list, run_benchmark=False, repeats=1):
  94. results = []
  95. for i, skeleton in enumerate(skeleton_list):
  96. if run_benchmark:
  97. # preprocess
  98. inputs = self.preprocess(skeleton) # warmup
  99. self.det_times.preprocess_time_s.start()
  100. inputs = self.preprocess(skeleton)
  101. self.det_times.preprocess_time_s.end()
  102. # model prediction
  103. result = self.predict(repeats=repeats) # warmup
  104. self.det_times.inference_time_s.start()
  105. result = self.predict(repeats=repeats)
  106. self.det_times.inference_time_s.end(repeats=repeats)
  107. # postprocess
  108. result_warmup = self.postprocess(inputs, result) # warmup
  109. self.det_times.postprocess_time_s.start()
  110. result = self.postprocess(inputs, result)
  111. self.det_times.postprocess_time_s.end()
  112. self.det_times.img_num += len(skeleton)
  113. cm, gm, gu = get_current_memory_mb()
  114. self.cpu_mem += cm
  115. self.gpu_mem += gm
  116. self.gpu_util += gu
  117. else:
  118. # preprocess
  119. self.det_times.preprocess_time_s.start()
  120. inputs = self.preprocess(skeleton)
  121. self.det_times.preprocess_time_s.end()
  122. # model prediction
  123. self.det_times.inference_time_s.start()
  124. result = self.predict()
  125. self.det_times.inference_time_s.end()
  126. # postprocess
  127. self.det_times.postprocess_time_s.start()
  128. result = self.postprocess(inputs, result)
  129. self.det_times.postprocess_time_s.end()
  130. self.det_times.img_num += len(skeleton)
  131. results.append(result)
  132. return results
  133. def predict_skeleton_with_mot(self, skeleton_with_mot, run_benchmark=False):
  134. """
  135. skeleton_with_mot (dict): includes individual skeleton sequences, which shape is [C, T, K, 1]
  136. and its corresponding track id.
  137. """
  138. skeleton_list = skeleton_with_mot["skeleton"]
  139. mot_id = skeleton_with_mot["mot_id"]
  140. act_res = self.predict_skeleton(skeleton_list, run_benchmark, repeats=1)
  141. results = list(zip(mot_id, act_res))
  142. return results
  143. def preprocess(self, data):
  144. preprocess_ops = []
  145. for op_info in self.pred_config.preprocess_infos:
  146. new_op_info = op_info.copy()
  147. op_type = new_op_info.pop('type')
  148. preprocess_ops.append(eval(op_type)(**new_op_info))
  149. input_lst = []
  150. data = action_preprocess(data, preprocess_ops)
  151. input_lst.append(data)
  152. input_names = self.predictor.get_input_names()
  153. inputs = {}
  154. inputs['data_batch_0'] = np.stack(input_lst, axis=0).astype('float32')
  155. for i in range(len(input_names)):
  156. input_tensor = self.predictor.get_input_handle(input_names[i])
  157. input_tensor.copy_from_cpu(inputs[input_names[i]])
  158. return inputs
  159. def postprocess(self, inputs, result):
  160. # postprocess output of predictor
  161. output_logit = result['output'][0]
  162. classes = np.argpartition(output_logit, -1)[-1:]
  163. classes = classes[np.argsort(-output_logit[classes])]
  164. scores = output_logit[classes]
  165. result = {'class': classes, 'score': scores}
  166. return result
  167. def action_preprocess(input, preprocess_ops):
  168. """
  169. input (str | numpy.array): if input is str, it should be a legal file path with numpy array saved.
  170. Otherwise it should be numpy.array as direct input.
  171. return (numpy.array)
  172. """
  173. if isinstance(input, str):
  174. assert os.path.isfile(input) is not None, "{0} not exists".format(input)
  175. data = np.load(input)
  176. else:
  177. data = input
  178. for operator in preprocess_ops:
  179. data = operator(data)
  180. return data
  181. class AutoPadding(object):
  182. """
  183. Sample or Padding frame skeleton feature.
  184. Args:
  185. window_size (int): Temporal size of skeleton feature.
  186. random_pad (bool): Whether do random padding when frame length < window size. Default: False.
  187. """
  188. def __init__(self, window_size=100, random_pad=False):
  189. self.window_size = window_size
  190. self.random_pad = random_pad
  191. def get_frame_num(self, data):
  192. C, T, V, M = data.shape
  193. for i in range(T - 1, -1, -1):
  194. tmp = np.sum(data[:, i, :, :])
  195. if tmp > 0:
  196. T = i + 1
  197. break
  198. return T
  199. def __call__(self, results):
  200. data = results
  201. C, T, V, M = data.shape
  202. T = self.get_frame_num(data)
  203. if T == self.window_size:
  204. data_pad = data[:, :self.window_size, :, :]
  205. elif T < self.window_size:
  206. begin = random.randint(
  207. 0, self.window_size - T) if self.random_pad else 0
  208. data_pad = np.zeros((C, self.window_size, V, M))
  209. data_pad[:, begin:begin + T, :, :] = data[:, :T, :, :]
  210. else:
  211. if self.random_pad:
  212. index = np.random.choice(
  213. T, self.window_size, replace=False).astype('int64')
  214. else:
  215. index = np.linspace(0, T, self.window_size).astype("int64")
  216. data_pad = data[:, index, :, :]
  217. return data_pad
  218. def get_test_skeletons(input_file):
  219. assert input_file is not None, "--action_file can not be None"
  220. input_data = np.load(input_file)
  221. if input_data.ndim == 4:
  222. return [input_data]
  223. elif input_data.ndim == 5:
  224. output = list(
  225. map(lambda x: np.squeeze(x, 0),
  226. np.split(input_data, input_data.shape[0], 0)))
  227. return output
  228. else:
  229. raise ValueError(
  230. "Now only support input with shape: (N, C, T, K, M) or (C, T, K, M)")
  231. def main():
  232. detector = ActionRecognizer(
  233. FLAGS.model_dir,
  234. device=FLAGS.device,
  235. run_mode=FLAGS.run_mode,
  236. batch_size=FLAGS.batch_size,
  237. trt_min_shape=FLAGS.trt_min_shape,
  238. trt_max_shape=FLAGS.trt_max_shape,
  239. trt_opt_shape=FLAGS.trt_opt_shape,
  240. trt_calib_mode=FLAGS.trt_calib_mode,
  241. cpu_threads=FLAGS.cpu_threads,
  242. enable_mkldnn=FLAGS.enable_mkldnn,
  243. threshold=FLAGS.threshold,
  244. output_dir=FLAGS.output_dir,
  245. window_size=FLAGS.window_size,
  246. random_pad=FLAGS.random_pad)
  247. # predict from numpy array
  248. input_list = get_test_skeletons(FLAGS.action_file)
  249. detector.predict_skeleton(input_list, FLAGS.run_benchmark, repeats=10)
  250. if not FLAGS.run_benchmark:
  251. detector.det_times.info(average=True)
  252. else:
  253. mems = {
  254. 'cpu_rss_mb': detector.cpu_mem / len(input_list),
  255. 'gpu_rss_mb': detector.gpu_mem / len(input_list),
  256. 'gpu_util': detector.gpu_util * 100 / len(input_list)
  257. }
  258. perf_info = detector.det_times.report(average=True)
  259. model_dir = FLAGS.model_dir
  260. mode = FLAGS.run_mode
  261. model_info = {
  262. 'model_name': model_dir.strip('/').split('/')[-1],
  263. 'precision': mode.split('_')[-1]
  264. }
  265. data_info = {
  266. 'batch_size': FLAGS.batch_size,
  267. 'shape': "dynamic_shape",
  268. 'data_num': perf_info['img_num']
  269. }
  270. det_log = PaddleInferBenchmark(detector.config, model_info, data_info,
  271. perf_info, mems)
  272. det_log('Action')
  273. if __name__ == '__main__':
  274. paddle.enable_static()
  275. parser = argsparser()
  276. FLAGS = parser.parse_args()
  277. print_arguments(FLAGS)
  278. FLAGS.device = FLAGS.device.upper()
  279. assert FLAGS.device in ['CPU', 'GPU', 'XPU'
  280. ], "device should be CPU, GPU or XPU"
  281. assert not FLAGS.use_gpu, "use_gpu has been deprecated, please use --device"
  282. main()