123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204 |
- #include "InferTools.hpp"
- namespace gsd{
- #define BATCH_SIZE 1
- #define MAX_IMAGE_INPUT_SIZE_THRESH 3000 * 3000 // ensure it exceed the maximum size in the input images !
- #define NMS_THRESH 0.1
- #define CONF_THRESH 0.1
- static gsd::Logger gLogger;
- // stuff we know about the network and the input/output blobs
- static const int INPUT_H = Yolo::INPUT_H;
- static const int INPUT_W = Yolo::INPUT_W;
- static const int CLASS_NUM = Yolo::CLASS_NUM;
- static const int OUTPUT_SIZE = Yolo::MAX_OUTPUT_BBOX_COUNT * sizeof(Yolo::Detection) / sizeof(float) + 1; // we assume the yololayer outputs no more than MAX_OUTPUT_BBOX_COUNT boxes that conf >= 0.1
- static float prob[BATCH_SIZE * OUTPUT_SIZE];
- /**
- * @description: getPtr
- * @return {*}
- */
- std::shared_ptr<InferTools> InferTools::getPtr(){
- static std::shared_ptr<InferTools> m_InferTools = nullptr;
- if(m_InferTools == nullptr) m_InferTools = std::shared_ptr<InferTools>(new InferTools);
- return m_InferTools;
- }
- /**
- * @description: 初始化
- * @return {*}
- */
- bool InferTools::Init(std::string enginefile){
- cudaSetDevice(this->device);
- std::ifstream file(enginefile, std::ios::binary);
- if(!file.good()){
- ErrorL << "read " << enginefile << ", error!" << std::endl;
- return false;
- }
- size_t size = 0;
- file.seekg(0, file.end);
- size = file.tellg();
- file.seekg(0, file.beg);
- if(trtModelStream == nullptr){
- trtModelStream = new char[size];
- }
- assert(trtModelStream);
- file.read(trtModelStream, size);
- file.close();
- if(this->runtime == nullptr){
- runtime = createInferRuntime(gLogger);
- }
- assert(runtime != nullptr);
- if(this->engine == nullptr){
- this->engine = runtime->deserializeCudaEngine(trtModelStream, size);
- }
- assert(engine != nullptr);
- if(this->context == nullptr){
- this->context = engine->createExecutionContext();
- }
- assert(context != nullptr);
- if(trtModelStream != nullptr){
- delete[] trtModelStream;
- trtModelStream = nullptr;
- }
- assert(engine->getNbBindings() == 2);
- inputIndex = engine->getBindingIndex(INPUT_BLOB_NAME);
- outputIndex = engine->getBindingIndex(OUTPUT_BLOB_NAME);
- assert(inputIndex == 0);
- assert(outputIndex == 1);
- CUDA_CHECK(cudaMalloc((void**)&buffers[inputIndex], BATCH_SIZE * 3 * INPUT_H * INPUT_W * sizeof(float)));
- CUDA_CHECK(cudaMalloc((void**)&buffers[outputIndex], BATCH_SIZE * OUTPUT_SIZE * sizeof(float)));
- CUDA_CHECK(cudaStreamCreate(&stream));
- // prepare input data cache in pinned memory
- CUDA_CHECK(cudaMallocHost((void**)&img_host, MAX_IMAGE_INPUT_SIZE_THRESH * 3));
- // prepare input data cache in device memory
- CUDA_CHECK(cudaMalloc((void**)&img_device, MAX_IMAGE_INPUT_SIZE_THRESH * 3));
- return true;
- }
- /**
- * @description: 推理
- * @param {Mat&} img
- * @return {*}
- */
- bool InferTools::Inference(std::shared_ptr<cv::Mat> img, CNStreamInferData::Ptr result){
- unique_lock<std::mutex> lk(m_mutex);
- float* buffer_idx = (float*)buffers[inputIndex];
- if(img->empty()) return false;
- size_t size_image = img->cols * img->rows * 3;
- size_t size_image_dst = INPUT_H * INPUT_W * 3;
- //copy data to pinned memory
- memcpy(img_host, img->data,size_image);
- //copy data to device memory
- CUDA_CHECK(cudaMemcpyAsync(img_device,img_host,size_image,cudaMemcpyHostToDevice,stream));
- preprocess_kernel_img(img_device, img->cols, img->rows, buffer_idx, INPUT_W, INPUT_H, stream);
- buffer_idx += size_image_dst;
-
- // Run inference
- auto start = std::chrono::system_clock::now();
- doInference(*context, stream, (void**)buffers, prob, BATCH_SIZE);
- auto end = std::chrono::system_clock::now();
- if(config::getPtr()->Debug) DebugL << "inference time: " << std::chrono::duration_cast<std::chrono::milliseconds>(end - start).count() << "ms" << std::endl;
- int fcount = 1;
- std::vector<std::vector<Yolo::Detection>> batch_res(fcount);
- for(int b = 0; b < fcount; b++){
- auto& res = batch_res[b];
- nms(res, &prob[b * OUTPUT_SIZE], CONF_THRESH, NMS_THRESH);
- }
- if(result == nullptr) return false;
- result->width = img->cols;
- result->height = img->rows;
- for (int b = 0; b < fcount; b++){
- auto& res = batch_res[b];
- for (size_t j = 0; j < res.size(); j++) {
- int class_id = res[j].class_id;
- cv::Rect r = get_rect(*img, res[j].bbox);
- InferInfo data;
- data.Label = std::to_string(class_id);
- data.Score = res[j].conf;
- data.BBox.x = (double)r.x / (double)result->width;
- data.BBox.y = (double)r.y / (double)result->height;
- data.BBox.w = (double)r.width / (double)result->width;
- data.BBox.h = (double)r.height / (double)result->height;
- result->Objects.push_back(data);
- }
- }
- // 比例过滤器
- if(!InfineFilter::getPtr()->proportionalFilter(result)){
- result->Objects.clear();
- result = nullptr;
- return false;
- }
- // 异物过滤器
- InfineFilter::getPtr()->AlienFilter(result);
- #ifdef TEST
- for (int b = 0; b < fcount; b++) {
- auto& res = batch_res[b];
- DebugL << res.size() << endl;
- for (size_t j = 0; j < res.size(); j++) {
- cv::Rect r = get_rect(*img, res[j].bbox);
- cv::rectangle(*img, r, cv::Scalar(0x27, 0xC1, 0x36), 2);
- cv::putText(*img, std::to_string((int)res[j].class_id), cv::Point(r.x, r.y - 1), cv::FONT_HERSHEY_PLAIN, 1.2, cv::Scalar(0xFF, 0xFF, 0xFF), 2);
- }
- cv::imwrite("_" + std::to_string(b) + ".jpg", *img);
- }
- #endif
- return true;
- }
- /**
- * @description: 释放资源
- * @return {*}
- */
- void InferTools::Destroy(){
- cudaStreamDestroy(stream);
- CUDA_CHECK(cudaFree(img_device));
- CUDA_CHECK(cudaFreeHost(img_host));
- CUDA_CHECK(cudaFree(buffers[inputIndex]));
- CUDA_CHECK(cudaFree(buffers[outputIndex]));
- // Destroy the engine
- context->destroy();
- engine->destroy();
- runtime->destroy();
- }
- /**
- * @description: doInference
- * @param {IExecutionContext&} context
- * @param {cudaStream_t&} stream
- * @param {void} *
- * @param {float*} output
- * @param {int} batchSize
- * @return {*}
- */
- void InferTools::doInference(IExecutionContext& context, cudaStream_t& stream, void **buffers, float* output, int batchSize) {
- // infer on the batch asynchronously, and DMA output back to host
- context.enqueue(batchSize, buffers, stream, nullptr);
- CUDA_CHECK(cudaMemcpyAsync(output, buffers[1], batchSize * OUTPUT_SIZE * sizeof(float), cudaMemcpyDeviceToHost, stream));
- cudaStreamSynchronize(stream);
- }
- }
|