picodet.cpp 7.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236
  1. // Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
  2. //
  3. // Licensed under the Apache License, Version 2.0 (the "License");
  4. // you may not use this file except in compliance with the License.
  5. // You may obtain a copy of the License at
  6. //
  7. // http://www.apache.org/licenses/LICENSE-2.0
  8. //
  9. // Unless required by applicable law or agreed to in writing, software
  10. // distributed under the License is distributed on an "AS IS" BASIS,
  11. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. // See the License for the specific language governing permissions and
  13. // limitations under the License.
  14. // reference from https://github.com/RangiLyu/nanodet/tree/main/demo_ncnn
  15. #include "picodet.h"
  16. #include <benchmark.h>
  17. #include <iostream>
  18. inline float fast_exp(float x) {
  19. union {
  20. uint32_t i;
  21. float f;
  22. } v{};
  23. v.i = (1 << 23) * (1.4426950409 * x + 126.93490512f);
  24. return v.f;
  25. }
  26. inline float sigmoid(float x) { return 1.0f / (1.0f + fast_exp(-x)); }
  27. template <typename _Tp>
  28. int activation_function_softmax(const _Tp *src, _Tp *dst, int length) {
  29. const _Tp alpha = *std::max_element(src, src + length);
  30. _Tp denominator{0};
  31. for (int i = 0; i < length; ++i) {
  32. dst[i] = fast_exp(src[i] - alpha);
  33. denominator += dst[i];
  34. }
  35. for (int i = 0; i < length; ++i) {
  36. dst[i] /= denominator;
  37. }
  38. return 0;
  39. }
  40. bool PicoDet::hasGPU = false;
  41. PicoDet *PicoDet::detector = nullptr;
  42. PicoDet::PicoDet(const char *param, const char *bin, int input_width,
  43. int input_hight, bool useGPU, float score_threshold_ = 0.5,
  44. float nms_threshold_ = 0.3) {
  45. this->Net = new ncnn::Net();
  46. #if NCNN_VULKAN
  47. this->hasGPU = ncnn::get_gpu_count() > 0;
  48. #endif
  49. this->Net->opt.use_vulkan_compute = this->hasGPU && useGPU;
  50. this->Net->opt.use_fp16_arithmetic = true;
  51. this->Net->load_param(param);
  52. this->Net->load_model(bin);
  53. this->in_w = input_width;
  54. this->in_h = input_hight;
  55. this->score_threshold = score_threshold_;
  56. this->nms_threshold = nms_threshold_;
  57. }
  58. PicoDet::~PicoDet() { delete this->Net; }
  59. void PicoDet::preprocess(cv::Mat &image, ncnn::Mat &in) {
  60. // cv::resize(image, image, cv::Size(this->in_w, this->in_h), 0.f, 0.f);
  61. int img_w = image.cols;
  62. int img_h = image.rows;
  63. in = ncnn::Mat::from_pixels_resize(image.data, ncnn::Mat::PIXEL_BGR, img_w,
  64. img_h, this->in_w, this->in_h);
  65. const float mean_vals[3] = {103.53f, 116.28f, 123.675f};
  66. const float norm_vals[3] = {0.017429f, 0.017507f, 0.017125f};
  67. in.substract_mean_normalize(mean_vals, norm_vals);
  68. }
  69. int PicoDet::detect(cv::Mat image, std::vector<BoxInfo> &result_list,
  70. bool has_postprocess) {
  71. ncnn::Mat input;
  72. preprocess(image, input);
  73. auto ex = this->Net->create_extractor();
  74. ex.set_light_mode(false);
  75. ex.set_num_threads(4);
  76. #if NCNN_VULKAN
  77. ex.set_vulkan_compute(this->hasGPU);
  78. #endif
  79. ex.input("image", input); // picodet
  80. this->image_h = image.rows;
  81. this->image_w = image.cols;
  82. std::vector<std::vector<BoxInfo>> results;
  83. results.resize(this->num_class);
  84. if (has_postprocess) {
  85. ncnn::Mat dis_pred;
  86. ncnn::Mat cls_pred;
  87. ex.extract(this->nms_heads_info[0].c_str(), dis_pred);
  88. ex.extract(this->nms_heads_info[1].c_str(), cls_pred);
  89. std::cout << dis_pred.h << " " << dis_pred.w << std::endl;
  90. std::cout << cls_pred.h << " " << cls_pred.w << std::endl;
  91. this->nms_boxes(cls_pred, dis_pred, this->score_threshold, results);
  92. } else {
  93. for (const auto &head_info : this->non_postprocess_heads_info) {
  94. ncnn::Mat dis_pred;
  95. ncnn::Mat cls_pred;
  96. ex.extract(head_info.dis_layer.c_str(), dis_pred);
  97. ex.extract(head_info.cls_layer.c_str(), cls_pred);
  98. this->decode_infer(cls_pred, dis_pred, head_info.stride,
  99. this->score_threshold, results);
  100. }
  101. }
  102. for (int i = 0; i < (int)results.size(); i++) {
  103. this->nms(results[i], this->nms_threshold);
  104. for (auto box : results[i]) {
  105. box.x1 = box.x1 / this->in_w * this->image_w;
  106. box.x2 = box.x2 / this->in_w * this->image_w;
  107. box.y1 = box.y1 / this->in_h * this->image_h;
  108. box.y2 = box.y2 / this->in_h * this->image_h;
  109. result_list.push_back(box);
  110. }
  111. }
  112. return 0;
  113. }
  114. void PicoDet::nms_boxes(ncnn::Mat &cls_pred, ncnn::Mat &dis_pred,
  115. float score_threshold,
  116. std::vector<std::vector<BoxInfo>> &result_list) {
  117. BoxInfo bbox;
  118. int i, j;
  119. for (i = 0; i < dis_pred.h; i++) {
  120. bbox.x1 = dis_pred.row(i)[0];
  121. bbox.y1 = dis_pred.row(i)[1];
  122. bbox.x2 = dis_pred.row(i)[2];
  123. bbox.y2 = dis_pred.row(i)[3];
  124. const float *scores = cls_pred.row(i);
  125. float score = 0;
  126. int cur_label = 0;
  127. for (int label = 0; label < this->num_class; label++) {
  128. float score_ = cls_pred.row(label)[i];
  129. if (score_ > score) {
  130. score = score_;
  131. cur_label = label;
  132. }
  133. }
  134. bbox.score = score;
  135. bbox.label = cur_label;
  136. result_list[cur_label].push_back(bbox);
  137. }
  138. }
  139. void PicoDet::decode_infer(ncnn::Mat &cls_pred, ncnn::Mat &dis_pred, int stride,
  140. float threshold,
  141. std::vector<std::vector<BoxInfo>> &results) {
  142. int feature_h = ceil((float)this->in_w / stride);
  143. int feature_w = ceil((float)this->in_h / stride);
  144. for (int idx = 0; idx < feature_h * feature_w; idx++) {
  145. const float *scores = cls_pred.row(idx);
  146. int row = idx / feature_w;
  147. int col = idx % feature_w;
  148. float score = 0;
  149. int cur_label = 0;
  150. for (int label = 0; label < this->num_class; label++) {
  151. if (scores[label] > score) {
  152. score = scores[label];
  153. cur_label = label;
  154. }
  155. }
  156. if (score > threshold) {
  157. const float *bbox_pred = dis_pred.row(idx);
  158. results[cur_label].push_back(
  159. this->disPred2Bbox(bbox_pred, cur_label, score, col, row, stride));
  160. }
  161. }
  162. }
  163. BoxInfo PicoDet::disPred2Bbox(const float *&dfl_det, int label, float score,
  164. int x, int y, int stride) {
  165. float ct_x = (x + 0.5) * stride;
  166. float ct_y = (y + 0.5) * stride;
  167. std::vector<float> dis_pred;
  168. dis_pred.resize(4);
  169. for (int i = 0; i < 4; i++) {
  170. float dis = 0;
  171. float *dis_after_sm = new float[this->reg_max + 1];
  172. activation_function_softmax(dfl_det + i * (this->reg_max + 1), dis_after_sm,
  173. this->reg_max + 1);
  174. for (int j = 0; j < this->reg_max + 1; j++) {
  175. dis += j * dis_after_sm[j];
  176. }
  177. dis *= stride;
  178. dis_pred[i] = dis;
  179. delete[] dis_after_sm;
  180. }
  181. float xmin = (std::max)(ct_x - dis_pred[0], .0f);
  182. float ymin = (std::max)(ct_y - dis_pred[1], .0f);
  183. float xmax = (std::min)(ct_x + dis_pred[2], (float)this->in_w);
  184. float ymax = (std::min)(ct_y + dis_pred[3], (float)this->in_w);
  185. return BoxInfo{xmin, ymin, xmax, ymax, score, label};
  186. }
  187. void PicoDet::nms(std::vector<BoxInfo> &input_boxes, float NMS_THRESH) {
  188. std::sort(input_boxes.begin(), input_boxes.end(),
  189. [](BoxInfo a, BoxInfo b) { return a.score > b.score; });
  190. std::vector<float> vArea(input_boxes.size());
  191. for (int i = 0; i < int(input_boxes.size()); ++i) {
  192. vArea[i] = (input_boxes.at(i).x2 - input_boxes.at(i).x1 + 1) *
  193. (input_boxes.at(i).y2 - input_boxes.at(i).y1 + 1);
  194. }
  195. for (int i = 0; i < int(input_boxes.size()); ++i) {
  196. for (int j = i + 1; j < int(input_boxes.size());) {
  197. float xx1 = (std::max)(input_boxes[i].x1, input_boxes[j].x1);
  198. float yy1 = (std::max)(input_boxes[i].y1, input_boxes[j].y1);
  199. float xx2 = (std::min)(input_boxes[i].x2, input_boxes[j].x2);
  200. float yy2 = (std::min)(input_boxes[i].y2, input_boxes[j].y2);
  201. float w = (std::max)(float(0), xx2 - xx1 + 1);
  202. float h = (std::max)(float(0), yy2 - yy1 + 1);
  203. float inter = w * h;
  204. float ovr = inter / (vArea[i] + vArea[j] - inter);
  205. if (ovr >= NMS_THRESH) {
  206. input_boxes.erase(input_boxes.begin() + j);
  207. vArea.erase(vArea.begin() + j);
  208. } else {
  209. j++;
  210. }
  211. }
  212. }
  213. }