InferTools.cpp 7.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204
  1. #include "InferTools.hpp"
  2. namespace gsd{
  3. #define BATCH_SIZE 1
  4. #define MAX_IMAGE_INPUT_SIZE_THRESH 3000 * 3000 // ensure it exceed the maximum size in the input images !
  5. #define NMS_THRESH 0.1
  6. #define CONF_THRESH 0.1
  7. static gsd::Logger gLogger;
  8. // stuff we know about the network and the input/output blobs
  9. static const int INPUT_H = Yolo::INPUT_H;
  10. static const int INPUT_W = Yolo::INPUT_W;
  11. static const int CLASS_NUM = Yolo::CLASS_NUM;
  12. static const int OUTPUT_SIZE = Yolo::MAX_OUTPUT_BBOX_COUNT * sizeof(Yolo::Detection) / sizeof(float) + 1; // we assume the yololayer outputs no more than MAX_OUTPUT_BBOX_COUNT boxes that conf >= 0.1
  13. static float prob[BATCH_SIZE * OUTPUT_SIZE];
  14. /**
  15. * @description: getPtr
  16. * @return {*}
  17. */
  18. std::shared_ptr<InferTools> InferTools::getPtr(){
  19. static std::shared_ptr<InferTools> m_InferTools = nullptr;
  20. if(m_InferTools == nullptr) m_InferTools = std::shared_ptr<InferTools>(new InferTools);
  21. return m_InferTools;
  22. }
  23. /**
  24. * @description: 初始化
  25. * @return {*}
  26. */
  27. bool InferTools::Init(std::string enginefile){
  28. cudaSetDevice(this->device);
  29. std::ifstream file(enginefile, std::ios::binary);
  30. if(!file.good()){
  31. ErrorL << "read " << enginefile << ", error!" << std::endl;
  32. return false;
  33. }
  34. size_t size = 0;
  35. file.seekg(0, file.end);
  36. size = file.tellg();
  37. file.seekg(0, file.beg);
  38. if(trtModelStream == nullptr){
  39. trtModelStream = new char[size];
  40. }
  41. assert(trtModelStream);
  42. file.read(trtModelStream, size);
  43. file.close();
  44. if(this->runtime == nullptr){
  45. runtime = createInferRuntime(gLogger);
  46. }
  47. assert(runtime != nullptr);
  48. if(this->engine == nullptr){
  49. this->engine = runtime->deserializeCudaEngine(trtModelStream, size);
  50. }
  51. assert(engine != nullptr);
  52. if(this->context == nullptr){
  53. this->context = engine->createExecutionContext();
  54. }
  55. assert(context != nullptr);
  56. if(trtModelStream != nullptr){
  57. delete[] trtModelStream;
  58. trtModelStream = nullptr;
  59. }
  60. assert(engine->getNbBindings() == 2);
  61. inputIndex = engine->getBindingIndex(INPUT_BLOB_NAME);
  62. outputIndex = engine->getBindingIndex(OUTPUT_BLOB_NAME);
  63. assert(inputIndex == 0);
  64. assert(outputIndex == 1);
  65. CUDA_CHECK(cudaMalloc((void**)&buffers[inputIndex], BATCH_SIZE * 3 * INPUT_H * INPUT_W * sizeof(float)));
  66. CUDA_CHECK(cudaMalloc((void**)&buffers[outputIndex], BATCH_SIZE * OUTPUT_SIZE * sizeof(float)));
  67. CUDA_CHECK(cudaStreamCreate(&stream));
  68. // prepare input data cache in pinned memory
  69. CUDA_CHECK(cudaMallocHost((void**)&img_host, MAX_IMAGE_INPUT_SIZE_THRESH * 3));
  70. // prepare input data cache in device memory
  71. CUDA_CHECK(cudaMalloc((void**)&img_device, MAX_IMAGE_INPUT_SIZE_THRESH * 3));
  72. return true;
  73. }
  74. /**
  75. * @description: 推理
  76. * @param {Mat&} img
  77. * @return {*}
  78. */
  79. bool InferTools::Inference(std::shared_ptr<cv::Mat> img, CNStreamInferData::Ptr result){
  80. unique_lock<std::mutex> lk(m_mutex);
  81. float* buffer_idx = (float*)buffers[inputIndex];
  82. if(img->empty()) return false;
  83. size_t size_image = img->cols * img->rows * 3;
  84. size_t size_image_dst = INPUT_H * INPUT_W * 3;
  85. //copy data to pinned memory
  86. memcpy(img_host, img->data,size_image);
  87. //copy data to device memory
  88. CUDA_CHECK(cudaMemcpyAsync(img_device,img_host,size_image,cudaMemcpyHostToDevice,stream));
  89. preprocess_kernel_img(img_device, img->cols, img->rows, buffer_idx, INPUT_W, INPUT_H, stream);
  90. buffer_idx += size_image_dst;
  91. // Run inference
  92. auto start = std::chrono::system_clock::now();
  93. doInference(*context, stream, (void**)buffers, prob, BATCH_SIZE);
  94. auto end = std::chrono::system_clock::now();
  95. if(config::getPtr()->Debug) DebugL << "inference time: " << std::chrono::duration_cast<std::chrono::milliseconds>(end - start).count() << "ms" << std::endl;
  96. int fcount = 1;
  97. std::vector<std::vector<Yolo::Detection>> batch_res(fcount);
  98. for(int b = 0; b < fcount; b++){
  99. auto& res = batch_res[b];
  100. nms(res, &prob[b * OUTPUT_SIZE], CONF_THRESH, NMS_THRESH);
  101. }
  102. if(result == nullptr) return false;
  103. result->width = img->cols;
  104. result->height = img->rows;
  105. for (int b = 0; b < fcount; b++){
  106. auto& res = batch_res[b];
  107. for (size_t j = 0; j < res.size(); j++) {
  108. int class_id = res[j].class_id;
  109. cv::Rect r = get_rect(*img, res[j].bbox);
  110. InferInfo data;
  111. data.Label = std::to_string(class_id);
  112. data.Score = res[j].conf;
  113. data.BBox.x = (double)r.x / (double)result->width;
  114. data.BBox.y = (double)r.y / (double)result->height;
  115. data.BBox.w = (double)r.width / (double)result->width;
  116. data.BBox.h = (double)r.height / (double)result->height;
  117. result->Objects.push_back(data);
  118. }
  119. }
  120. // 比例过滤器
  121. if(!InfineFilter::getPtr()->proportionalFilter(result)){
  122. result->Objects.clear();
  123. result = nullptr;
  124. return false;
  125. }
  126. // 异物过滤器
  127. InfineFilter::getPtr()->AlienFilter(result);
  128. #ifdef TEST
  129. for (int b = 0; b < fcount; b++) {
  130. auto& res = batch_res[b];
  131. DebugL << res.size() << endl;
  132. for (size_t j = 0; j < res.size(); j++) {
  133. cv::Rect r = get_rect(*img, res[j].bbox);
  134. cv::rectangle(*img, r, cv::Scalar(0x27, 0xC1, 0x36), 2);
  135. cv::putText(*img, std::to_string((int)res[j].class_id), cv::Point(r.x, r.y - 1), cv::FONT_HERSHEY_PLAIN, 1.2, cv::Scalar(0xFF, 0xFF, 0xFF), 2);
  136. }
  137. cv::imwrite("_" + std::to_string(b) + ".jpg", *img);
  138. }
  139. #endif
  140. return true;
  141. }
  142. /**
  143. * @description: 释放资源
  144. * @return {*}
  145. */
  146. void InferTools::Destroy(){
  147. cudaStreamDestroy(stream);
  148. CUDA_CHECK(cudaFree(img_device));
  149. CUDA_CHECK(cudaFreeHost(img_host));
  150. CUDA_CHECK(cudaFree(buffers[inputIndex]));
  151. CUDA_CHECK(cudaFree(buffers[outputIndex]));
  152. // Destroy the engine
  153. context->destroy();
  154. engine->destroy();
  155. runtime->destroy();
  156. }
  157. /**
  158. * @description: doInference
  159. * @param {IExecutionContext&} context
  160. * @param {cudaStream_t&} stream
  161. * @param {void} *
  162. * @param {float*} output
  163. * @param {int} batchSize
  164. * @return {*}
  165. */
  166. void InferTools::doInference(IExecutionContext& context, cudaStream_t& stream, void **buffers, float* output, int batchSize) {
  167. // infer on the batch asynchronously, and DMA output back to host
  168. context.enqueue(batchSize, buffers, stream, nullptr);
  169. CUDA_CHECK(cudaMemcpyAsync(output, buffers[1], batchSize * OUTPUT_SIZE * sizeof(float), cudaMemcpyDeviceToHost, stream));
  170. cudaStreamSynchronize(stream);
  171. }
  172. }