lishengyin
/
gsd_var


			
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204
							#include "InferTools.hpp"


namespace gsd{

    #define BATCH_SIZE 1

    #define MAX_IMAGE_INPUT_SIZE_THRESH 3000 * 3000 // ensure it exceed the maximum size in the input images !

    #define NMS_THRESH 0.1
    #define CONF_THRESH 0.1

    static gsd::Logger gLogger;

    // stuff we know about the network and the input/output blobs
    static const int INPUT_H = Yolo::INPUT_H;
    static const int INPUT_W = Yolo::INPUT_W;
    static const int CLASS_NUM = Yolo::CLASS_NUM;
    static const int OUTPUT_SIZE = Yolo::MAX_OUTPUT_BBOX_COUNT * sizeof(Yolo::Detection) / sizeof(float) + 1;  // we assume the yololayer outputs no more than MAX_OUTPUT_BBOX_COUNT boxes that conf >= 0.1

    static float prob[BATCH_SIZE * OUTPUT_SIZE];

    /**
     * @description: getPtr
     * @return {*}
     */    
    std::shared_ptr<InferTools> InferTools::getPtr(){
        static std::shared_ptr<InferTools> m_InferTools = nullptr;
        if(m_InferTools == nullptr) m_InferTools = std::shared_ptr<InferTools>(new InferTools);
        return m_InferTools;
    }

    /**
     * @description: 初始化
     * @return {*}
     */    
    bool InferTools::Init(std::string enginefile){
        cudaSetDevice(this->device);
        std::ifstream file(enginefile, std::ios::binary);

        if(!file.good()){
            ErrorL << "read " << enginefile << ", error!" << std::endl;
            return false;
        }

        size_t size = 0;
        file.seekg(0, file.end);
        size = file.tellg();
        file.seekg(0, file.beg);
        if(trtModelStream == nullptr){
            trtModelStream = new char[size];
        }
        assert(trtModelStream);
        file.read(trtModelStream, size);
        file.close();

        if(this->runtime == nullptr){
            runtime = createInferRuntime(gLogger);
        }
        assert(runtime != nullptr);
        if(this->engine == nullptr){
            this->engine = runtime->deserializeCudaEngine(trtModelStream, size);
        }
        assert(engine != nullptr);
        if(this->context == nullptr){
            this->context = engine->createExecutionContext();
        }
        assert(context != nullptr);

        if(trtModelStream != nullptr){
            delete[] trtModelStream;
            trtModelStream = nullptr;
        }
        assert(engine->getNbBindings() == 2);

        inputIndex = engine->getBindingIndex(INPUT_BLOB_NAME);
        outputIndex = engine->getBindingIndex(OUTPUT_BLOB_NAME);

        assert(inputIndex == 0);
        assert(outputIndex == 1);

        CUDA_CHECK(cudaMalloc((void**)&buffers[inputIndex], BATCH_SIZE * 3 * INPUT_H * INPUT_W * sizeof(float)));
        CUDA_CHECK(cudaMalloc((void**)&buffers[outputIndex], BATCH_SIZE * OUTPUT_SIZE * sizeof(float)));

        CUDA_CHECK(cudaStreamCreate(&stream));

        // prepare input data cache in pinned memory 
        CUDA_CHECK(cudaMallocHost((void**)&img_host, MAX_IMAGE_INPUT_SIZE_THRESH * 3));
        // prepare input data cache in device memory
        CUDA_CHECK(cudaMalloc((void**)&img_device, MAX_IMAGE_INPUT_SIZE_THRESH * 3));

        return true;
    }


    /**
     * @description: 推理
     * @param {Mat&} img
     * @return {*}
     */    
    bool InferTools::Inference(std::shared_ptr<cv::Mat> img, CNStreamInferData::Ptr result){
        unique_lock<std::mutex> lk(m_mutex);

        float* buffer_idx = (float*)buffers[inputIndex];
        if(img->empty()) return false;

        size_t  size_image = img->cols * img->rows * 3;
        size_t  size_image_dst = INPUT_H * INPUT_W * 3;
        //copy data to pinned memory
        memcpy(img_host, img->data,size_image);
        //copy data to device memory
        CUDA_CHECK(cudaMemcpyAsync(img_device,img_host,size_image,cudaMemcpyHostToDevice,stream));

        preprocess_kernel_img(img_device, img->cols, img->rows, buffer_idx, INPUT_W, INPUT_H, stream);   
        buffer_idx += size_image_dst;
        
        // Run inference
        auto start = std::chrono::system_clock::now();
        doInference(*context, stream, (void**)buffers, prob, BATCH_SIZE);
        auto end = std::chrono::system_clock::now();
        if(config::getPtr()->Debug) DebugL << "inference time: " << std::chrono::duration_cast<std::chrono::milliseconds>(end - start).count() << "ms" << std::endl;
        int fcount = 1;
        std::vector<std::vector<Yolo::Detection>> batch_res(fcount);
        for(int b = 0; b < fcount; b++){
            auto& res = batch_res[b];
            nms(res, &prob[b * OUTPUT_SIZE], CONF_THRESH, NMS_THRESH);
        }
        if(result == nullptr) return false;
        result->width = img->cols;
        result->height = img->rows;
        for (int b = 0; b < fcount; b++){
            auto& res = batch_res[b];
            for (size_t j = 0; j < res.size(); j++) {
                int class_id = res[j].class_id;
                cv::Rect r = get_rect(*img, res[j].bbox);
                InferInfo data;
                data.Label = std::to_string(class_id);
                data.Score = res[j].conf;
                data.BBox.x = (double)r.x / (double)result->width;
                data.BBox.y = (double)r.y / (double)result->height;
                data.BBox.w = (double)r.width / (double)result->width;
                data.BBox.h = (double)r.height / (double)result->height;
                result->Objects.push_back(data);
            }
        }

        // 比例过滤器
        if(!InfineFilter::getPtr()->proportionalFilter(result)){
            result->Objects.clear();
            result = nullptr;
            return false;
        }

        // 异物过滤器
        InfineFilter::getPtr()->AlienFilter(result);

        #ifdef TEST
            for (int b = 0; b < fcount; b++) {
                auto& res = batch_res[b];
                DebugL << res.size() << endl;
                for (size_t j = 0; j < res.size(); j++) {
                    cv::Rect r = get_rect(*img, res[j].bbox);
                    cv::rectangle(*img, r, cv::Scalar(0x27, 0xC1, 0x36), 2);
                    cv::putText(*img, std::to_string((int)res[j].class_id), cv::Point(r.x, r.y - 1), cv::FONT_HERSHEY_PLAIN, 1.2, cv::Scalar(0xFF, 0xFF, 0xFF), 2);
                }
                cv::imwrite("_" + std::to_string(b) + ".jpg", *img);
            }
        #endif

        return true;
    }

    /**
     * @description: 释放资源
     * @return {*}
     */    
    void InferTools::Destroy(){
        cudaStreamDestroy(stream);
        CUDA_CHECK(cudaFree(img_device));
        CUDA_CHECK(cudaFreeHost(img_host));
        CUDA_CHECK(cudaFree(buffers[inputIndex]));
        CUDA_CHECK(cudaFree(buffers[outputIndex]));
        // Destroy the engine
        context->destroy();
        engine->destroy();
        runtime->destroy();
    }

    /**
     * @description: doInference
     * @param {IExecutionContext&} context
     * @param {cudaStream_t&} stream
     * @param {void} *
     * @param {float*} output
     * @param {int} batchSize
     * @return {*}
     */    
    void InferTools::doInference(IExecutionContext& context, cudaStream_t& stream, void **buffers, float* output, int batchSize) {
        // infer on the batch asynchronously, and DMA output back to host
        context.enqueue(batchSize, buffers, stream, nullptr);
        CUDA_CHECK(cudaMemcpyAsync(output, buffers[1], batchSize * OUTPUT_SIZE * sizeof(float), cudaMemcpyDeviceToHost, stream));
        cudaStreamSynchronize(stream);
    }
}