video_preprocess_yolov3.cpp 6.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157
  1. /*************************************************************************
  2. * Copyright (C) [2021] by Cambricon, Inc. All rights reserved
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * The above copyright notice and this permission notice shall be included in
  11. * all copies or substantial portions of the Software.
  12. * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
  13. * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  14. * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
  15. * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  16. * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  17. * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
  18. * THE SOFTWARE.
  19. *************************************************************************/
  20. #include <algorithm>
  21. #include <memory>
  22. #include <string>
  23. #include <vector>
  24. #include "opencv2/highgui/highgui.hpp"
  25. #include "opencv2/imgproc/imgproc.hpp"
  26. #if (CV_MAJOR_VERSION >= 3)
  27. #include "opencv2/imgcodecs/imgcodecs.hpp"
  28. #endif
  29. #include "cnis/contrib/video_helper.h"
  30. #include "cnstream_frame_va.hpp"
  31. #include "cnstream_logging.hpp"
  32. #include "video_preproc.hpp"
  33. #include "video_preprocess_common.hpp"
  34. /**
  35. * @brief Video preprocessing for YOLOv3 neural network
  36. */
  37. class VideoPreprocYolov3 : public cnstream::VideoPreproc {
  38. public:
  39. /**
  40. * @brief Execute YOLOv3 neural network preprocessing
  41. *
  42. * @param model_input: the input of neural network. The preproc result should be set to it.
  43. * @param input_data: the raw input data. The user could get infer_server::video::VideoFrame object from it.
  44. * @param model_info: model information, e.g., input/output number, shape and etc.
  45. *
  46. * @return return true if succeed
  47. */
  48. bool Execute(infer_server::ModelIO* model_input, const infer_server::InferData& input_data,
  49. const infer_server::ModelInfo& model_info) {
  50. // check model input number and shape
  51. uint32_t input_num = model_info.InputNum();
  52. if (input_num != 1) {
  53. LOGE(DEMO) << "[VideoPreprocYolov3] model input number not supported. It should be 1, but " << input_num;
  54. return false;
  55. }
  56. infer_server::Shape input_shape;
  57. input_shape = model_info.InputShape(0);
  58. int c_idx = 3;
  59. int w_idx = 2;
  60. int h_idx = 1;
  61. if (model_info.InputLayout(0).order == infer_server::DimOrder::NCHW) {
  62. c_idx = 1;
  63. w_idx = 3;
  64. h_idx = 2;
  65. }
  66. if (input_shape[c_idx] != 4) {
  67. LOGE(DEMO) << "[VideoPreprocYolov3] model input shape not supported, `c` should be 4, but " << input_shape[c_idx];
  68. return false;
  69. }
  70. if (model_info.InputLayout(0).dtype != infer_server::DataType::UINT8 &&
  71. model_info.InputLayout(0).dtype != infer_server::DataType::FLOAT32) {
  72. std::string dtype_str = "";
  73. switch (model_info.InputLayout(0).dtype) {
  74. case infer_server::DataType::FLOAT16: dtype_str = "FLOAT16"; break;
  75. case infer_server::DataType::INT16: dtype_str = "INT16"; break;
  76. case infer_server::DataType::INT32: dtype_str = "INT32"; break;
  77. case infer_server::DataType::INVALID: dtype_str = "INVALID"; break;
  78. default: dtype_str = "UNKNOWN"; break;
  79. }
  80. LOGE(DEMO) << "[VideoPreprocYolov3] model input data type not supported. It should be uint8/float32, but "
  81. << dtype_str;
  82. return false;
  83. }
  84. // do preproc
  85. const infer_server::video::VideoFrame& frame = input_data.GetLref<infer_server::video::VideoFrame>();
  86. size_t src_w = frame.width;
  87. size_t src_h = frame.height;
  88. uint32_t dst_w = input_shape[w_idx];
  89. uint32_t dst_h = input_shape[h_idx];
  90. uint8_t* img_data = new (std::nothrow) uint8_t[frame.GetTotalSize()];
  91. if (!img_data) {
  92. LOGE(DEMO) << "[VideoPreprocYolov3] Failed to alloc memory, size: " << frame.GetTotalSize();
  93. return false;
  94. }
  95. uint8_t* img_data_tmp = img_data;
  96. for (auto plane_idx = 0u; plane_idx < frame.plane_num; ++plane_idx) {
  97. memcpy(img_data_tmp, frame.plane[plane_idx].Data(), frame.GetPlaneSize(plane_idx));
  98. img_data_tmp += frame.GetPlaneSize(plane_idx);
  99. }
  100. // convert color space from src to dst
  101. cv::Mat dst_cvt_color_img;
  102. if (!ConvertColorSpace(src_w, src_h, frame.format, model_input_pixel_format_, img_data, &dst_cvt_color_img)) {
  103. LOGW(DEMO) << "[VideoPreprocYolov3] Unsupport pixel format. src: " << static_cast<int>(frame.format)
  104. << " dst: " << static_cast<int>(model_input_pixel_format_);
  105. delete[] img_data;
  106. return false;
  107. }
  108. cv::Mat dst_pad_img = dst_cvt_color_img;
  109. if (src_h != dst_h || src_w != dst_w) {
  110. cv::Mat pad_img(dst_h, dst_w, dst_cvt_color_img.type(), cv::Scalar(128, 128, 128));
  111. const float scaling_factors = std::min(1.0 * dst_w / src_w, 1.0 * dst_h / src_h);
  112. cv::Mat resized_img(src_h * scaling_factors, src_w * scaling_factors, dst_cvt_color_img.type());
  113. cv::resize(dst_cvt_color_img, resized_img, cv::Size(resized_img.cols, resized_img.rows));
  114. cv::Rect roi;
  115. roi.x = (pad_img.cols - resized_img.cols) / 2;
  116. roi.y = (pad_img.rows - resized_img.rows) / 2;
  117. roi.width = resized_img.cols;
  118. roi.height = resized_img.rows;
  119. resized_img.copyTo(pad_img(roi));
  120. dst_pad_img = pad_img;
  121. }
  122. // copy data to model_input buffer
  123. if (model_info.InputLayout(0).dtype == infer_server::DataType::FLOAT32) {
  124. // input data type is float32
  125. if (dst_pad_img.channels() == 4) {
  126. cv::Mat dst_img(dst_h, dst_w, CV_32FC4, model_input->buffers[0].MutableData());
  127. dst_pad_img.convertTo(dst_img, CV_32FC4);
  128. } else {
  129. cv::Mat dst_img(dst_h, dst_w, CV_32FC3, model_input->buffers[0].MutableData());
  130. dst_pad_img.convertTo(dst_img, CV_32FC3);
  131. }
  132. } else {
  133. // input data type is uint8
  134. cv::Mat dst_img(dst_h, dst_w, dst_pad_img.type(), model_input->buffers[0].MutableData());
  135. dst_pad_img.copyTo(dst_img);
  136. }
  137. delete[] img_data;
  138. return true;
  139. }
  140. private:
  141. DECLARE_REFLEX_OBJECT_EX(VideoPreprocYolov3, cnstream::VideoPreproc);
  142. }; // class VideoPreprocYolov3
  143. IMPLEMENT_REFLEX_OBJECT_EX(VideoPreprocYolov3, cnstream::VideoPreproc);