video_preprocess_standard.cpp 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270
  1. /*************************************************************************
  2. * Copyright (C) [2021] by Cambricon, Inc. All rights reserved
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * The above copyright notice and this permission notice shall be included in
  11. * all copies or substantial portions of the Software.
  12. * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
  13. * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  14. * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
  15. * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  16. * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  17. * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
  18. * THE SOFTWARE.
  19. *************************************************************************/
  20. #include <memory>
  21. #include <string>
  22. #include <utility>
  23. #include <vector>
  24. #include "opencv2/highgui/highgui.hpp"
  25. #include "opencv2/imgproc/imgproc.hpp"
  26. #if (CV_MAJOR_VERSION >= 3)
  27. #include "opencv2/imgcodecs/imgcodecs.hpp"
  28. #endif
  29. #include "cnis/contrib/video_helper.h"
  30. #include "cnstream_frame_va.hpp"
  31. #include "cnstream_logging.hpp"
  32. #include "video_preproc.hpp"
  33. #include "video_preprocess_common.hpp"
  34. /**
  35. * @brief Video standard preprocessing
  36. */
  37. class VideoPreprocCpu : public cnstream::VideoPreproc {
  38. public:
  39. /**
  40. * @brief Execute standard preprocessing
  41. *
  42. * @param model_input: the input of neural network. The preproc result should be set to it.
  43. * @param input_data: the raw input data. The user could get infer_server::video::VideoFrame object from it.
  44. * @param model_info: model information, e.g., input/output number, shape and etc.
  45. *
  46. * @return return true if succeed
  47. */
  48. bool Execute(infer_server::ModelIO* model_input, const infer_server::InferData& input_data,
  49. const infer_server::ModelInfo& model_info) override;
  50. DECLARE_REFLEX_OBJECT_EX(VideoPreprocCpu, cnstream::VideoPreproc);
  51. }; // class VideoPreprocCpu
  52. IMPLEMENT_REFLEX_OBJECT_EX(VideoPreprocCpu, cnstream::VideoPreproc)
  53. bool VideoPreprocCpu::Execute(infer_server::ModelIO* model_input, const infer_server::InferData& input_data,
  54. const infer_server::ModelInfo& model_info) {
  55. // check model input number and shape
  56. uint32_t input_num = model_info.InputNum();
  57. if (input_num != 1) {
  58. LOGE(DEMO) << "[VideoPreprocCpu] model input number not supported. It should be 1, but " << input_num;
  59. return false;
  60. }
  61. infer_server::Shape input_shape;
  62. input_shape = model_info.InputShape(0);
  63. int w_idx = 2;
  64. int h_idx = 1;
  65. int c_idx = 3;
  66. if (model_info.InputLayout(0).order == infer_server::DimOrder::NCHW) {
  67. w_idx = 3;
  68. h_idx = 2;
  69. c_idx = 1;
  70. }
  71. if (input_shape[c_idx] != 4) {
  72. LOGE(DEMO) << "[VideoPreprocCpu] model input shape not supported, `c` should be 4, but " << input_shape[c_idx];
  73. return false;
  74. }
  75. if (model_info.InputLayout(0).dtype != infer_server::DataType::UINT8 &&
  76. model_info.InputLayout(0).dtype != infer_server::DataType::FLOAT32) {
  77. std::string dtype_str = "";
  78. switch (model_info.InputLayout(0).dtype) {
  79. case infer_server::DataType::FLOAT16: dtype_str = "FLOAT16"; break;
  80. case infer_server::DataType::INT16: dtype_str = "INT16"; break;
  81. case infer_server::DataType::INT32: dtype_str = "INT32"; break;
  82. case infer_server::DataType::INVALID: dtype_str = "INVALID"; break;
  83. default: dtype_str = "UNKNOWN"; break;
  84. }
  85. LOGE(DEMO) << "[VideoPreprocCpu] model input data type not supported. It should be uint8/float32, but "
  86. << dtype_str;
  87. return false;
  88. }
  89. // do preproc
  90. const infer_server::video::VideoFrame& frame = input_data.GetLref<infer_server::video::VideoFrame>();
  91. size_t src_w = frame.width;
  92. size_t src_h = frame.height;
  93. uint32_t dst_w = input_shape[w_idx];
  94. uint32_t dst_h = input_shape[h_idx];
  95. uint8_t* img_data = new (std::nothrow) uint8_t[frame.GetTotalSize()];
  96. if (!img_data) {
  97. LOGE(DEMO) << "[VideoPreprocCpu] Failed to alloc memory, size: " << frame.GetTotalSize();
  98. return false;
  99. }
  100. uint8_t* img_data_tmp = img_data;
  101. for (auto plane_idx = 0u; plane_idx < frame.plane_num; ++plane_idx) {
  102. memcpy(img_data_tmp, frame.plane[plane_idx].Data(), frame.GetPlaneSize(plane_idx));
  103. img_data_tmp += frame.GetPlaneSize(plane_idx);
  104. }
  105. // convert color space from src to dst
  106. cv::Mat dst_cvt_color_img;
  107. if (!ConvertColorSpace(src_w, src_h, frame.format, model_input_pixel_format_, img_data, &dst_cvt_color_img)) {
  108. LOGW(DEMO) << "[VideoPreprocCpu] Unsupport pixel format. src: " << static_cast<int>(frame.format)
  109. << " dst: " << static_cast<int>(model_input_pixel_format_);
  110. delete[] img_data;
  111. return false;
  112. }
  113. cv::Mat dst_resized_img = dst_cvt_color_img;
  114. if (src_h != dst_h || src_w != dst_w) {
  115. cv::Mat resized_img(dst_h, dst_w, dst_cvt_color_img.type());
  116. cv::resize(dst_cvt_color_img, resized_img, cv::Size(dst_w, dst_h));
  117. dst_resized_img = resized_img;
  118. }
  119. // copy data to model_input buffer
  120. if (model_info.InputLayout(0).dtype == infer_server::DataType::FLOAT32) {
  121. // input data type is float32
  122. if (dst_resized_img.channels() == 4) {
  123. cv::Mat dst_img(dst_h, dst_w, CV_32FC4, model_input->buffers[0].MutableData());
  124. dst_resized_img.convertTo(dst_img, CV_32FC4);
  125. } else {
  126. cv::Mat dst_img(dst_h, dst_w, CV_32FC3, model_input->buffers[0].MutableData());
  127. dst_resized_img.convertTo(dst_img, CV_32FC3);
  128. }
  129. } else {
  130. // input data type is uint8
  131. cv::Mat dst_img(dst_h, dst_w, dst_resized_img.type(), model_input->buffers[0].MutableData());
  132. dst_resized_img.copyTo(dst_img);
  133. }
  134. delete[] img_data;
  135. return true;
  136. }
  137. /**
  138. * @brief Video standard object preprocessing
  139. */
  140. class VideoObjPreprocCpu : public cnstream::VideoPreproc {
  141. public:
  142. /**
  143. * @brief Execute standard preprocessing for secondary neural network
  144. *
  145. * @param model_input: the input of neural network. The preproc result should be set to it.
  146. * @param input_data: the raw input data. The user could get infer_server::video::VideoFrame object from it.
  147. * @param model_info: model information, e.g., input/output number, shape and etc.
  148. *
  149. * @return return true if succeed
  150. */
  151. bool Execute(infer_server::ModelIO* model_input, const infer_server::InferData& input_data,
  152. const infer_server::ModelInfo& model_info) override;
  153. DECLARE_REFLEX_OBJECT_EX(VideoObjPreprocCpu, cnstream::VideoPreproc);
  154. }; // class VideoObjPreprocCpu
  155. IMPLEMENT_REFLEX_OBJECT_EX(VideoObjPreprocCpu, cnstream::VideoPreproc)
  156. bool VideoObjPreprocCpu::Execute(infer_server::ModelIO* model_input, const infer_server::InferData& input_data,
  157. const infer_server::ModelInfo& model_info) {
  158. // check model input number and shape
  159. uint32_t input_num = model_info.InputNum();
  160. if (input_num != 1) {
  161. LOGE(DEMO) << "[VideoObjPreprocCpu] model input number not supported. It should be 1, but " << input_num;
  162. return false;
  163. }
  164. infer_server::Shape input_shape;
  165. input_shape = model_info.InputShape(0);
  166. int c_idx = 3;
  167. int w_idx = 2;
  168. int h_idx = 1;
  169. if (model_info.InputLayout(0).order == infer_server::DimOrder::NCHW) {
  170. w_idx = 3;
  171. h_idx = 2;
  172. c_idx = 1;
  173. }
  174. if (input_shape[c_idx] != 4) {
  175. LOGE(DEMO) << "[VideoObjPreprocCpu] model input shape not supported, `c` should be 4, but " << input_shape[c_idx];
  176. return false;
  177. }
  178. if (model_info.InputLayout(0).dtype != infer_server::DataType::UINT8 &&
  179. model_info.InputLayout(0).dtype != infer_server::DataType::FLOAT32) {
  180. std::string dtype_str = "";
  181. switch (model_info.InputLayout(0).dtype) {
  182. case infer_server::DataType::FLOAT16: dtype_str = "FLOAT16"; break;
  183. case infer_server::DataType::INT16: dtype_str = "INT16"; break;
  184. case infer_server::DataType::INT32: dtype_str = "INT32"; break;
  185. case infer_server::DataType::INVALID: dtype_str = "INVALID"; break;
  186. default: dtype_str = "UNKNOWN"; break;
  187. }
  188. LOGE(DEMO) << "[VideoObjPreprocCpu] model input data type not supported. It should be uint8/float32, but "
  189. << dtype_str;
  190. return false;
  191. }
  192. // do preproc
  193. const infer_server::video::VideoFrame& frame = input_data.GetLref<infer_server::video::VideoFrame>();
  194. size_t src_w = frame.width;
  195. size_t src_h = frame.height;
  196. uint32_t dst_w = input_shape[w_idx];
  197. uint32_t dst_h = input_shape[h_idx];
  198. uint8_t* img_data = new (std::nothrow) uint8_t[frame.GetTotalSize()];
  199. if (!img_data) {
  200. LOGE(DEMO) << "[VideoObjPreprocCpu] Failed to alloc memory, size: " << frame.GetTotalSize();
  201. return false;
  202. }
  203. uint8_t* img_data_tmp = img_data;
  204. for (auto plane_idx = 0u; plane_idx < frame.plane_num; ++plane_idx) {
  205. memcpy(img_data_tmp, frame.plane[plane_idx].Data(), frame.GetPlaneSize(plane_idx));
  206. img_data_tmp += frame.GetPlaneSize(plane_idx);
  207. }
  208. // convert color space from src to dst
  209. cv::Mat dst_cvt_color_img;
  210. if (!ConvertColorSpace(src_w, src_h, frame.format, model_input_pixel_format_, img_data, &dst_cvt_color_img)) {
  211. LOGW(DEMO) << "[VideoObjPreprocCpu] Unsupport pixel format. src: " << static_cast<int>(frame.format)
  212. << " dst: " << static_cast<int>(model_input_pixel_format_);
  213. delete[] img_data;
  214. return false;
  215. }
  216. cv::Rect obj_roi(src_w * frame.roi.x, src_h * frame.roi.y, src_w * frame.roi.w, src_h * frame.roi.h);
  217. cv::Mat obj_img = dst_cvt_color_img(obj_roi);
  218. cv::Mat dst_obj_resized_img = obj_img;
  219. if ((unsigned)obj_img.rows != dst_h || (unsigned)obj_img.cols != dst_w) {
  220. cv::Mat resized_img(dst_h, dst_w, obj_img.type());
  221. cv::resize(obj_img, resized_img, cv::Size(dst_w, dst_h));
  222. dst_obj_resized_img.release();
  223. dst_obj_resized_img = resized_img;
  224. }
  225. // copy data to model_input buffer
  226. if (model_info.InputLayout(0).dtype == infer_server::DataType::FLOAT32) {
  227. // input data type is float32
  228. if (dst_obj_resized_img.channels() == 4) {
  229. cv::Mat dst_img(dst_h, dst_w, CV_32FC4, model_input->buffers[0].MutableData());
  230. dst_obj_resized_img.convertTo(dst_img, CV_32FC4);
  231. } else {
  232. cv::Mat dst_img(dst_h, dst_w, CV_32FC3, model_input->buffers[0].MutableData());
  233. dst_obj_resized_img.convertTo(dst_img, CV_32FC3);
  234. }
  235. } else {
  236. // input data type is uint8
  237. cv::Mat dst_img(dst_h, dst_w, dst_obj_resized_img.type(), model_input->buffers[0].MutableData());
  238. dst_obj_resized_img.copyTo(dst_img);
  239. }
  240. delete[] img_data;
  241. return true;
  242. }