preprocess_op.cc 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353
  1. // Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
  2. //
  3. // Licensed under the Apache License, Version 2.0 (the "License");
  4. // you may not use this file except in compliance with the License.
  5. // You may obtain a copy of the License at
  6. //
  7. // http://www.apache.org/licenses/LICENSE-2.0
  8. //
  9. // Unless required by applicable law or agreed to in writing, software
  10. // distributed under the License is distributed on an "AS IS" BASIS,
  11. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. // See the License for the specific language governing permissions and
  13. // limitations under the License.
  14. #include <string>
  15. #include <thread>
  16. #include <vector>
  17. #include "include/preprocess_op.h"
  18. namespace PaddleDetection {
  19. void InitInfo::Run(cv::Mat* im, ImageBlob* data) {
  20. data->im_shape_ = {static_cast<float>(im->rows),
  21. static_cast<float>(im->cols)};
  22. data->scale_factor_ = {1., 1.};
  23. data->in_net_shape_ = {static_cast<float>(im->rows),
  24. static_cast<float>(im->cols)};
  25. }
  26. void NormalizeImage::Run(cv::Mat* im, ImageBlob* data) {
  27. double e = 1.0;
  28. if (is_scale_) {
  29. e /= 255.0;
  30. }
  31. (*im).convertTo(*im, CV_32FC3, e);
  32. for (int h = 0; h < im->rows; h++) {
  33. for (int w = 0; w < im->cols; w++) {
  34. im->at<cv::Vec3f>(h, w)[0] =
  35. (im->at<cv::Vec3f>(h, w)[0] - mean_[0]) / scale_[0];
  36. im->at<cv::Vec3f>(h, w)[1] =
  37. (im->at<cv::Vec3f>(h, w)[1] - mean_[1]) / scale_[1];
  38. im->at<cv::Vec3f>(h, w)[2] =
  39. (im->at<cv::Vec3f>(h, w)[2] - mean_[2]) / scale_[2];
  40. }
  41. }
  42. }
  43. void Permute::Run(cv::Mat* im, ImageBlob* data) {
  44. (*im).convertTo(*im, CV_32FC3);
  45. int rh = im->rows;
  46. int rw = im->cols;
  47. int rc = im->channels();
  48. (data->im_data_).resize(rc * rh * rw);
  49. float* base = (data->im_data_).data();
  50. for (int i = 0; i < rc; ++i) {
  51. cv::extractChannel(*im, cv::Mat(rh, rw, CV_32FC1, base + i * rh * rw), i);
  52. }
  53. }
  54. void Resize::Run(cv::Mat* im, ImageBlob* data) {
  55. auto resize_scale = GenerateScale(*im);
  56. cv::resize(
  57. *im, *im, cv::Size(), resize_scale.first, resize_scale.second, interp_);
  58. data->in_net_shape_ = {static_cast<float>(im->rows),
  59. static_cast<float>(im->cols)};
  60. data->im_shape_ = {
  61. static_cast<float>(im->rows), static_cast<float>(im->cols),
  62. };
  63. data->scale_factor_ = {
  64. resize_scale.second, resize_scale.first,
  65. };
  66. }
  67. std::pair<float, float> Resize::GenerateScale(const cv::Mat& im) {
  68. std::pair<float, float> resize_scale;
  69. int origin_w = im.cols;
  70. int origin_h = im.rows;
  71. if (keep_ratio_) {
  72. int im_size_max = std::max(origin_w, origin_h);
  73. int im_size_min = std::min(origin_w, origin_h);
  74. int target_size_max =
  75. *std::max_element(target_size_.begin(), target_size_.end());
  76. int target_size_min =
  77. *std::min_element(target_size_.begin(), target_size_.end());
  78. float scale_min =
  79. static_cast<float>(target_size_min) / static_cast<float>(im_size_min);
  80. float scale_max =
  81. static_cast<float>(target_size_max) / static_cast<float>(im_size_max);
  82. float scale_ratio = std::min(scale_min, scale_max);
  83. resize_scale = {scale_ratio, scale_ratio};
  84. } else {
  85. resize_scale.first =
  86. static_cast<float>(target_size_[1]) / static_cast<float>(origin_w);
  87. resize_scale.second =
  88. static_cast<float>(target_size_[0]) / static_cast<float>(origin_h);
  89. }
  90. return resize_scale;
  91. }
  92. void LetterBoxResize::Run(cv::Mat* im, ImageBlob* data) {
  93. float resize_scale = GenerateScale(*im);
  94. int new_shape_w = std::round(im->cols * resize_scale);
  95. int new_shape_h = std::round(im->rows * resize_scale);
  96. data->im_shape_ = {static_cast<float>(new_shape_h),
  97. static_cast<float>(new_shape_w)};
  98. float padw = (target_size_[1] - new_shape_w) / 2.;
  99. float padh = (target_size_[0] - new_shape_h) / 2.;
  100. int top = std::round(padh - 0.1);
  101. int bottom = std::round(padh + 0.1);
  102. int left = std::round(padw - 0.1);
  103. int right = std::round(padw + 0.1);
  104. cv::resize(
  105. *im, *im, cv::Size(new_shape_w, new_shape_h), 0, 0, cv::INTER_AREA);
  106. data->in_net_shape_ = {
  107. static_cast<float>(im->rows), static_cast<float>(im->cols),
  108. };
  109. cv::copyMakeBorder(*im,
  110. *im,
  111. top,
  112. bottom,
  113. left,
  114. right,
  115. cv::BORDER_CONSTANT,
  116. cv::Scalar(127.5));
  117. data->in_net_shape_ = {
  118. static_cast<float>(im->rows), static_cast<float>(im->cols),
  119. };
  120. data->scale_factor_ = {
  121. resize_scale, resize_scale,
  122. };
  123. }
  124. float LetterBoxResize::GenerateScale(const cv::Mat& im) {
  125. int origin_w = im.cols;
  126. int origin_h = im.rows;
  127. int target_h = target_size_[0];
  128. int target_w = target_size_[1];
  129. float ratio_h = static_cast<float>(target_h) / static_cast<float>(origin_h);
  130. float ratio_w = static_cast<float>(target_w) / static_cast<float>(origin_w);
  131. float resize_scale = std::min(ratio_h, ratio_w);
  132. return resize_scale;
  133. }
  134. void PadStride::Run(cv::Mat* im, ImageBlob* data) {
  135. if (stride_ <= 0) {
  136. data->in_net_im_ = im->clone();
  137. return;
  138. }
  139. int rc = im->channels();
  140. int rh = im->rows;
  141. int rw = im->cols;
  142. int nh = (rh / stride_) * stride_ + (rh % stride_ != 0) * stride_;
  143. int nw = (rw / stride_) * stride_ + (rw % stride_ != 0) * stride_;
  144. cv::copyMakeBorder(
  145. *im, *im, 0, nh - rh, 0, nw - rw, cv::BORDER_CONSTANT, cv::Scalar(0));
  146. data->in_net_im_ = im->clone();
  147. data->in_net_shape_ = {
  148. static_cast<float>(im->rows), static_cast<float>(im->cols),
  149. };
  150. }
  151. void TopDownEvalAffine::Run(cv::Mat* im, ImageBlob* data) {
  152. cv::resize(*im, *im, cv::Size(trainsize_[0], trainsize_[1]), 0, 0, interp_);
  153. // todo: Simd::ResizeBilinear();
  154. data->in_net_shape_ = {
  155. static_cast<float>(trainsize_[1]), static_cast<float>(trainsize_[0]),
  156. };
  157. }
  158. void GetAffineTrans(const cv::Point2f center,
  159. const cv::Point2f input_size,
  160. const cv::Point2f output_size,
  161. cv::Mat* trans) {
  162. cv::Point2f srcTri[3];
  163. cv::Point2f dstTri[3];
  164. float src_w = input_size.x;
  165. float dst_w = output_size.x;
  166. float dst_h = output_size.y;
  167. cv::Point2f src_dir(0, -0.5 * src_w);
  168. cv::Point2f dst_dir(0, -0.5 * dst_w);
  169. srcTri[0] = center;
  170. srcTri[1] = center + src_dir;
  171. cv::Point2f src_d = srcTri[0] - srcTri[1];
  172. srcTri[2] = srcTri[1] + cv::Point2f(-src_d.y, src_d.x);
  173. dstTri[0] = cv::Point2f(dst_w * 0.5, dst_h * 0.5);
  174. dstTri[1] = cv::Point2f(dst_w * 0.5, dst_h * 0.5) + dst_dir;
  175. cv::Point2f dst_d = dstTri[0] - dstTri[1];
  176. dstTri[2] = dstTri[1] + cv::Point2f(-dst_d.y, dst_d.x);
  177. *trans = cv::getAffineTransform(srcTri, dstTri);
  178. }
  179. void WarpAffine::Run(cv::Mat* im, ImageBlob* data) {
  180. cv::cvtColor(*im, *im, cv::COLOR_RGB2BGR);
  181. cv::Mat trans(2, 3, CV_32FC1);
  182. cv::Point2f center;
  183. cv::Point2f input_size;
  184. int h = im->rows;
  185. int w = im->cols;
  186. if (keep_res_) {
  187. input_h_ = (h | pad_) + 1;
  188. input_w_ = (w + pad_) + 1;
  189. input_size = cv::Point2f(input_w_, input_h_);
  190. center = cv::Point2f(w / 2, h / 2);
  191. } else {
  192. float s = std::max(h, w) * 1.0;
  193. input_size = cv::Point2f(s, s);
  194. center = cv::Point2f(w / 2., h / 2.);
  195. }
  196. cv::Point2f output_size(input_w_, input_h_);
  197. GetAffineTrans(center, input_size, output_size, &trans);
  198. cv::warpAffine(*im, *im, trans, cv::Size(input_w_, input_h_));
  199. data->in_net_shape_ = {
  200. static_cast<float>(input_h_), static_cast<float>(input_w_),
  201. };
  202. }
  203. void Pad::Run(cv::Mat* im, ImageBlob* data) {
  204. int h = size_[0];
  205. int w = size_[1];
  206. int rh = im->rows;
  207. int rw = im->cols;
  208. if (h == rh && w == rw){
  209. data->in_net_im_ = im->clone();
  210. return;
  211. }
  212. cv::copyMakeBorder(
  213. *im, *im, 0, h - rh, 0, w - rw, cv::BORDER_CONSTANT, cv::Scalar(114));
  214. data->in_net_im_ = im->clone();
  215. data->in_net_shape_ = {
  216. static_cast<float>(im->rows), static_cast<float>(im->cols),
  217. };
  218. }
  219. // Preprocessor op running order
  220. const std::vector<std::string> Preprocessor::RUN_ORDER = {"InitInfo",
  221. "TopDownEvalAffine",
  222. "Resize",
  223. "LetterBoxResize",
  224. "WarpAffine",
  225. "NormalizeImage",
  226. "PadStride",
  227. "Pad",
  228. "Permute"};
  229. void Preprocessor::Run(cv::Mat* im, ImageBlob* data) {
  230. for (const auto& name : RUN_ORDER) {
  231. if (ops_.find(name) != ops_.end()) {
  232. ops_[name]->Run(im, data);
  233. }
  234. }
  235. }
  236. void CropImg(cv::Mat& img,
  237. cv::Mat& crop_img,
  238. std::vector<int>& area,
  239. std::vector<float>& center,
  240. std::vector<float>& scale,
  241. float expandratio) {
  242. int crop_x1 = std::max(0, area[0]);
  243. int crop_y1 = std::max(0, area[1]);
  244. int crop_x2 = std::min(img.cols - 1, area[2]);
  245. int crop_y2 = std::min(img.rows - 1, area[3]);
  246. int center_x = (crop_x1 + crop_x2) / 2.;
  247. int center_y = (crop_y1 + crop_y2) / 2.;
  248. int half_h = (crop_y2 - crop_y1) / 2.;
  249. int half_w = (crop_x2 - crop_x1) / 2.;
  250. // adjust h or w to keep image ratio, expand the shorter edge
  251. if (half_h * 3 > half_w * 4) {
  252. half_w = static_cast<int>(half_h * 0.75);
  253. } else {
  254. half_h = static_cast<int>(half_w * 4 / 3);
  255. }
  256. crop_x1 =
  257. std::max(0, center_x - static_cast<int>(half_w * (1 + expandratio)));
  258. crop_y1 =
  259. std::max(0, center_y - static_cast<int>(half_h * (1 + expandratio)));
  260. crop_x2 = std::min(img.cols - 1,
  261. static_cast<int>(center_x + half_w * (1 + expandratio)));
  262. crop_y2 = std::min(img.rows - 1,
  263. static_cast<int>(center_y + half_h * (1 + expandratio)));
  264. crop_img =
  265. img(cv::Range(crop_y1, crop_y2 + 1), cv::Range(crop_x1, crop_x2 + 1));
  266. center.clear();
  267. center.emplace_back((crop_x1 + crop_x2) / 2);
  268. center.emplace_back((crop_y1 + crop_y2) / 2);
  269. scale.clear();
  270. scale.emplace_back((crop_x2 - crop_x1));
  271. scale.emplace_back((crop_y2 - crop_y1));
  272. }
  273. bool CheckDynamicInput(const std::vector<cv::Mat>& imgs) {
  274. if (imgs.size() == 1) return false;
  275. int h = imgs.at(0).rows;
  276. int w = imgs.at(0).cols;
  277. for (int i = 1; i < imgs.size(); ++i) {
  278. int hi = imgs.at(i).rows;
  279. int wi = imgs.at(i).cols;
  280. if (hi != h || wi != w) {
  281. return true;
  282. }
  283. }
  284. return false;
  285. }
  286. std::vector<cv::Mat> PadBatch(const std::vector<cv::Mat>& imgs) {
  287. std::vector<cv::Mat> out_imgs;
  288. int max_h = 0;
  289. int max_w = 0;
  290. int rh = 0;
  291. int rw = 0;
  292. // find max_h and max_w in batch
  293. for (int i = 0; i < imgs.size(); ++i) {
  294. rh = imgs.at(i).rows;
  295. rw = imgs.at(i).cols;
  296. if (rh > max_h) max_h = rh;
  297. if (rw > max_w) max_w = rw;
  298. }
  299. for (int i = 0; i < imgs.size(); ++i) {
  300. cv::Mat im = imgs.at(i);
  301. cv::copyMakeBorder(im,
  302. im,
  303. 0,
  304. max_h - imgs.at(i).rows,
  305. 0,
  306. max_w - imgs.at(i).cols,
  307. cv::BORDER_CONSTANT,
  308. cv::Scalar(0));
  309. out_imgs.push_back(im);
  310. }
  311. return out_imgs;
  312. }
  313. } // namespace PaddleDetection