run_detection.cc 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390
  1. // Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
  2. //
  3. // Licensed under the Apache License, Version 2.0 (the "License");
  4. // you may not use this file except in compliance with the License.
  5. // You may obtain a copy of the License at
  6. //
  7. // http://www.apache.org/licenses/LICENSE-2.0
  8. //
  9. // Unless required by applicable law or agreed to in writing, software
  10. // distributed under the License is distributed on an "AS IS" BASIS,
  11. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. // See the License for the specific language governing permissions and
  13. // limitations under the License.
  14. #include <fstream>
  15. #include <iostream>
  16. #include <vector>
  17. #include <chrono>
  18. #include <numeric>
  19. #include "opencv2/core.hpp"
  20. #include "opencv2/imgcodecs.hpp"
  21. #include "opencv2/imgproc.hpp"
  22. #include "paddle_api.h" // NOLINT
  23. using namespace paddle::lite_api; // NOLINT
  24. using namespace std;
  25. struct Object {
  26. cv::Rect rec;
  27. int class_id;
  28. float prob;
  29. };
  30. // Object for storing all preprocessed data
  31. struct ImageBlob {
  32. // image width and height
  33. std::vector<int> im_shape_;
  34. // Buffer for image data after preprocessing
  35. const float* im_data_;
  36. std::vector<float> mean_;
  37. std::vector<float> scale_;
  38. };
  39. void PrintBenchmarkLog(std::vector<double> det_time,
  40. std::map<std::string, std::string> config,
  41. int img_num) {
  42. std::cout << "----------------- Config info ------------------" << std::endl;
  43. std::cout << "runtime_device: armv8" << std::endl;
  44. std::cout << "precision: " << config.at("precision") << std::endl;
  45. std::cout << "num_threads: " << config.at("num_threads") << std::endl;
  46. std::cout << "---------------- Data info ---------------------" << std::endl;
  47. std::cout << "batch_size: " << 1 << std::endl;
  48. std::cout << "---------------- Model info --------------------" << std::endl;
  49. std::cout << "Model_name: " << config.at("model_file") << std::endl;
  50. std::cout << "---------------- Perf info ---------------------" << std::endl;
  51. std::cout << "Total number of predicted data: " << img_num
  52. << " and total time spent(s): "
  53. << std::accumulate(det_time.begin(), det_time.end(), 0) << std::endl;
  54. std::cout << "preproce_time(ms): " << det_time[0] / img_num
  55. << ", inference_time(ms): " << det_time[1] / img_num
  56. << ", postprocess_time(ms): " << det_time[2] << std::endl;
  57. }
  58. std::vector<std::string> LoadLabels(const std::string &path) {
  59. std::ifstream file;
  60. std::vector<std::string> labels;
  61. file.open(path);
  62. while (file) {
  63. std::string line;
  64. std::getline(file, line);
  65. std::string::size_type pos = line.find(" ");
  66. if (pos != std::string::npos) {
  67. line = line.substr(pos);
  68. }
  69. labels.push_back(line);
  70. }
  71. file.clear();
  72. file.close();
  73. return labels;
  74. }
  75. std::vector<std::string> ReadDict(std::string path) {
  76. std::ifstream in(path);
  77. std::string filename;
  78. std::string line;
  79. std::vector<std::string> m_vec;
  80. if (in) {
  81. while (getline(in, line)) {
  82. m_vec.push_back(line);
  83. }
  84. } else {
  85. std::cout << "no such file" << std::endl;
  86. }
  87. return m_vec;
  88. }
  89. std::vector<std::string> split(const std::string &str,
  90. const std::string &delim) {
  91. std::vector<std::string> res;
  92. if ("" == str)
  93. return res;
  94. char *strs = new char[str.length() + 1];
  95. std::strcpy(strs, str.c_str());
  96. char *d = new char[delim.length() + 1];
  97. std::strcpy(d, delim.c_str());
  98. char *p = std::strtok(strs, d);
  99. while (p) {
  100. string s = p;
  101. res.push_back(s);
  102. p = std::strtok(NULL, d);
  103. }
  104. return res;
  105. }
  106. std::map<std::string, std::string> LoadConfigTxt(std::string config_path) {
  107. auto config = ReadDict(config_path);
  108. std::map<std::string, std::string> dict;
  109. for (int i = 0; i < config.size(); i++) {
  110. std::vector<std::string> res = split(config[i], " ");
  111. dict[res[0]] = res[1];
  112. }
  113. return dict;
  114. }
  115. void PrintConfig(const std::map<std::string, std::string> &config) {
  116. std::cout << "=======PaddleDetection lite demo config======" << std::endl;
  117. for (auto iter = config.begin(); iter != config.end(); iter++) {
  118. std::cout << iter->first << " : " << iter->second << std::endl;
  119. }
  120. std::cout << "===End of PaddleDetection lite demo config===" << std::endl;
  121. }
  122. // fill tensor with mean and scale and trans layout: nhwc -> nchw, neon speed up
  123. void neon_mean_scale(const float* din,
  124. float* dout,
  125. int size,
  126. const std::vector<float> mean,
  127. const std::vector<float> scale) {
  128. if (mean.size() != 3 || scale.size() != 3) {
  129. std::cerr << "[ERROR] mean or scale size must equal to 3\n";
  130. exit(1);
  131. }
  132. float32x4_t vmean0 = vdupq_n_f32(mean[0]);
  133. float32x4_t vmean1 = vdupq_n_f32(mean[1]);
  134. float32x4_t vmean2 = vdupq_n_f32(mean[2]);
  135. float32x4_t vscale0 = vdupq_n_f32(1.f / scale[0]);
  136. float32x4_t vscale1 = vdupq_n_f32(1.f / scale[1]);
  137. float32x4_t vscale2 = vdupq_n_f32(1.f / scale[2]);
  138. float* dout_c0 = dout;
  139. float* dout_c1 = dout + size;
  140. float* dout_c2 = dout + size * 2;
  141. int i = 0;
  142. for (; i < size - 3; i += 4) {
  143. float32x4x3_t vin3 = vld3q_f32(din);
  144. float32x4_t vsub0 = vsubq_f32(vin3.val[0], vmean0);
  145. float32x4_t vsub1 = vsubq_f32(vin3.val[1], vmean1);
  146. float32x4_t vsub2 = vsubq_f32(vin3.val[2], vmean2);
  147. float32x4_t vs0 = vmulq_f32(vsub0, vscale0);
  148. float32x4_t vs1 = vmulq_f32(vsub1, vscale1);
  149. float32x4_t vs2 = vmulq_f32(vsub2, vscale2);
  150. vst1q_f32(dout_c0, vs0);
  151. vst1q_f32(dout_c1, vs1);
  152. vst1q_f32(dout_c2, vs2);
  153. din += 12;
  154. dout_c0 += 4;
  155. dout_c1 += 4;
  156. dout_c2 += 4;
  157. }
  158. for (; i < size; i++) {
  159. *(dout_c0++) = (*(din++) - mean[0]) * scale[0];
  160. *(dout_c0++) = (*(din++) - mean[1]) * scale[1];
  161. *(dout_c0++) = (*(din++) - mean[2]) * scale[2];
  162. }
  163. }
  164. std::vector<Object> visualize_result(
  165. const float* data,
  166. int count,
  167. float thresh,
  168. cv::Mat& image,
  169. const std::vector<std::string> &class_names) {
  170. if (data == nullptr) {
  171. std::cerr << "[ERROR] data can not be nullptr\n";
  172. exit(1);
  173. }
  174. std::vector<Object> rect_out;
  175. for (int iw = 0; iw < count; iw++) {
  176. int oriw = image.cols;
  177. int orih = image.rows;
  178. if (data[1] > thresh) {
  179. Object obj;
  180. int x = static_cast<int>(data[2]);
  181. int y = static_cast<int>(data[3]);
  182. int w = static_cast<int>(data[4] - data[2] + 1);
  183. int h = static_cast<int>(data[5] - data[3] + 1);
  184. cv::Rect rec_clip =
  185. cv::Rect(x, y, w, h) & cv::Rect(0, 0, image.cols, image.rows);
  186. obj.class_id = static_cast<int>(data[0]);
  187. obj.prob = data[1];
  188. obj.rec = rec_clip;
  189. if (w > 0 && h > 0 && obj.prob <= 1) {
  190. rect_out.push_back(obj);
  191. cv::rectangle(image, rec_clip, cv::Scalar(0, 0, 255), 1, cv::LINE_AA);
  192. std::string str_prob = std::to_string(obj.prob);
  193. std::string text = std::string(class_names[obj.class_id]) + ": " +
  194. str_prob.substr(0, str_prob.find(".") + 4);
  195. int font_face = cv::FONT_HERSHEY_COMPLEX_SMALL;
  196. double font_scale = 1.f;
  197. int thickness = 1;
  198. cv::Size text_size =
  199. cv::getTextSize(text, font_face, font_scale, thickness, nullptr);
  200. float new_font_scale = w * 0.5 * font_scale / text_size.width;
  201. text_size = cv::getTextSize(
  202. text, font_face, new_font_scale, thickness, nullptr);
  203. cv::Point origin;
  204. origin.x = x + 3;
  205. origin.y = y + text_size.height + 3;
  206. cv::putText(image,
  207. text,
  208. origin,
  209. font_face,
  210. new_font_scale,
  211. cv::Scalar(0, 255, 255),
  212. thickness,
  213. cv::LINE_AA);
  214. std::cout << "detection, image size: " << image.cols << ", "
  215. << image.rows
  216. << ", detect object: " << class_names[obj.class_id]
  217. << ", score: " << obj.prob << ", location: x=" << x
  218. << ", y=" << y << ", width=" << w << ", height=" << h
  219. << std::endl;
  220. }
  221. }
  222. data += 6;
  223. }
  224. return rect_out;
  225. }
  226. // Load Model and create model predictor
  227. std::shared_ptr<PaddlePredictor> LoadModel(std::string model_file,
  228. int num_theads) {
  229. MobileConfig config;
  230. config.set_threads(num_theads);
  231. config.set_model_from_file(model_file);
  232. std::shared_ptr<PaddlePredictor> predictor =
  233. CreatePaddlePredictor<MobileConfig>(config);
  234. return predictor;
  235. }
  236. ImageBlob prepare_imgdata(const cv::Mat& img,
  237. std::map<std::string,
  238. std::string> config) {
  239. ImageBlob img_data;
  240. std::vector<int> target_size_;
  241. std::vector<std::string> size_str = split(config.at("Resize"), ",");
  242. transform(size_str.begin(), size_str.end(), back_inserter(target_size_),
  243. [](std::string const& s){return stoi(s);});
  244. int width = target_size_[0];
  245. int height = target_size_[1];
  246. img_data.im_shape_ = {
  247. static_cast<int>(target_size_[0]),
  248. static_cast<int>(target_size_[1])
  249. };
  250. std::vector<float> mean_;
  251. std::vector<float> scale_;
  252. std::vector<std::string> mean_str = split(config.at("mean"), ",");
  253. std::vector<std::string> std_str = split(config.at("std"), ",");
  254. transform(mean_str.begin(), mean_str.end(), back_inserter(mean_),
  255. [](std::string const& s){return stof(s);});
  256. transform(std_str.begin(), std_str.end(), back_inserter(scale_),
  257. [](std::string const& s){return stof(s);});
  258. img_data.mean_ = mean_;
  259. img_data.scale_ = scale_;
  260. return img_data;
  261. }
  262. void preprocess(const cv::Mat& img, const ImageBlob img_data, float* data) {
  263. cv::Mat rgb_img;
  264. cv::cvtColor(img, rgb_img, cv::COLOR_BGR2RGB);
  265. cv::resize(
  266. rgb_img, rgb_img, cv::Size(img_data.im_shape_[0],img_data.im_shape_[1]),
  267. 0.f, 0.f, cv::INTER_CUBIC);
  268. cv::Mat imgf;
  269. rgb_img.convertTo(imgf, CV_32FC3, 1 / 255.f);
  270. const float* dimg = reinterpret_cast<const float*>(imgf.data);
  271. neon_mean_scale(
  272. dimg, data, int(img_data.im_shape_[0] * img_data.im_shape_[1]),
  273. img_data.mean_, img_data.scale_);
  274. }
  275. void RunModel(std::map<std::string, std::string> config,
  276. std::string img_path,
  277. const int repeats,
  278. std::vector<double>* times) {
  279. std::string model_file = config.at("model_file");
  280. std::string label_path = config.at("label_path");
  281. // Load Labels
  282. std::vector<std::string> class_names = LoadLabels(label_path);
  283. auto predictor = LoadModel(model_file, stoi(config.at("num_threads")));
  284. cv::Mat img = imread(img_path, cv::IMREAD_COLOR);
  285. auto img_data = prepare_imgdata(img, config);
  286. auto preprocess_start = std::chrono::steady_clock::now();
  287. // 1. Prepare input data from image
  288. // input 0
  289. std::unique_ptr<Tensor> input_tensor0(std::move(predictor->GetInput(0)));
  290. input_tensor0->Resize({1, 3, img_data.im_shape_[0], img_data.im_shape_[1]});
  291. auto* data0 = input_tensor0->mutable_data<float>();
  292. preprocess(img, img_data, data0);
  293. // input1
  294. std::unique_ptr<Tensor> input_tensor1(std::move(predictor->GetInput(1)));
  295. input_tensor1->Resize({1, 2});
  296. auto* data1 = input_tensor1->mutable_data<int>();
  297. data1[0] = img_data.im_shape_[0];
  298. data1[1] = img_data.im_shape_[1];
  299. auto preprocess_end = std::chrono::steady_clock::now();
  300. // 2. Run predictor
  301. // warm up
  302. for (int i = 0; i < repeats / 2; i++)
  303. {
  304. predictor->Run();
  305. }
  306. auto inference_start = std::chrono::steady_clock::now();
  307. for (int i = 0; i < repeats; i++)
  308. {
  309. predictor->Run();
  310. }
  311. auto inference_end = std::chrono::steady_clock::now();
  312. // 3. Get output and post process
  313. auto postprocess_start = std::chrono::steady_clock::now();
  314. std::unique_ptr<const Tensor> output_tensor(
  315. std::move(predictor->GetOutput(0)));
  316. const float* outptr = output_tensor->data<float>();
  317. auto shape_out = output_tensor->shape();
  318. int64_t cnt = 1;
  319. for (auto& i : shape_out) {
  320. cnt *= i;
  321. }
  322. auto rec_out = visualize_result(
  323. outptr, static_cast<int>(cnt / 6), 0.5f, img, class_names);
  324. std::string result_name =
  325. img_path.substr(0, img_path.find(".")) + "_result.jpg";
  326. cv::imwrite(result_name, img);
  327. auto postprocess_end = std::chrono::steady_clock::now();
  328. std::chrono::duration<float> prep_diff = preprocess_end - preprocess_start;
  329. times->push_back(double(prep_diff.count() * 1000));
  330. std::chrono::duration<float> infer_diff = inference_end - inference_start;
  331. times->push_back(double(infer_diff.count() / repeats * 1000));
  332. std::chrono::duration<float> post_diff = postprocess_end - postprocess_start;
  333. times->push_back(double(post_diff.count() * 1000));
  334. }
  335. int main(int argc, char** argv) {
  336. if (argc < 3) {
  337. std::cerr << "[ERROR] usage: " << argv[0] << " config_path image_path\n";
  338. exit(1);
  339. }
  340. std::string config_path = argv[1];
  341. std::string img_path = argv[2];
  342. // load config
  343. auto config = LoadConfigTxt(config_path);
  344. PrintConfig(config);
  345. bool enable_benchmark = bool(stoi(config.at("enable_benchmark")));
  346. int repeats = enable_benchmark ? 50 : 1;
  347. std::vector<double> det_times;
  348. RunModel(config, img_path, repeats, &det_times);
  349. PrintBenchmarkLog(det_times, config, 1);
  350. return 0;
  351. }