#include "common.hpp" cv::Rect get_rect(cv::Mat& img, float bbox[4]) { float l, r, t, b; float r_w = Yolo::INPUT_W / (img.cols * 1.0); float r_h = Yolo::INPUT_H / (img.rows * 1.0); if (r_h > r_w) { l = bbox[0] - bbox[2] / 2.f; r = bbox[0] + bbox[2] / 2.f; t = bbox[1] - bbox[3] / 2.f - (Yolo::INPUT_H - r_w * img.rows) / 2; b = bbox[1] + bbox[3] / 2.f - (Yolo::INPUT_H - r_w * img.rows) / 2; l = l / r_w; r = r / r_w; t = t / r_w; b = b / r_w; } else { l = bbox[0] - bbox[2] / 2.f - (Yolo::INPUT_W - r_h * img.cols) / 2; r = bbox[0] + bbox[2] / 2.f - (Yolo::INPUT_W - r_h * img.cols) / 2; t = bbox[1] - bbox[3] / 2.f; b = bbox[1] + bbox[3] / 2.f; l = l / r_h; r = r / r_h; t = t / r_h; b = b / r_h; } return cv::Rect(round(l), round(t), round(r - l), round(b - t)); } float iou(float lbox[4], float rbox[4]) { float interBox[] = { (std::max)(lbox[0] - lbox[2] / 2.f , rbox[0] - rbox[2] / 2.f), //left (std::min)(lbox[0] + lbox[2] / 2.f , rbox[0] + rbox[2] / 2.f), //right (std::max)(lbox[1] - lbox[3] / 2.f , rbox[1] - rbox[3] / 2.f), //top (std::min)(lbox[1] + lbox[3] / 2.f , rbox[1] + rbox[3] / 2.f), //bottom }; if (interBox[2] > interBox[3] || interBox[0] > interBox[1]) return 0.0f; float interBoxS = (interBox[1] - interBox[0])*(interBox[3] - interBox[2]); return interBoxS / (lbox[2] * lbox[3] + rbox[2] * rbox[3] - interBoxS); } bool cmp(const Yolo::Detection& a, const Yolo::Detection& b) { return a.conf > b.conf; } void nms(std::vector& res, float *output, float conf_thresh, float nms_thresh) { int det_size = sizeof(Yolo::Detection) / sizeof(float); std::map> m; for (int i = 0; i < output[0] && i < Yolo::MAX_OUTPUT_BBOX_COUNT; i++) { if (output[1 + det_size * i + 4] <= conf_thresh) continue; Yolo::Detection det; memcpy(&det, &output[1 + det_size * i], det_size * sizeof(float)); if (m.count(det.class_id) == 0) m.emplace(det.class_id, std::vector()); m[det.class_id].push_back(det); } for (auto it = m.begin(); it != m.end(); it++) { //std::cout << it->second[0].class_id << " --- " << std::endl; auto& dets = it->second; std::sort(dets.begin(), dets.end(), cmp); for (size_t m = 0; m < dets.size(); ++m) { auto& item = dets[m]; res.push_back(item); for (size_t n = m + 1; n < dets.size(); ++n) { if (iou(item.bbox, dets[n].bbox) > nms_thresh) { dets.erase(dets.begin() + n); --n; } } } } } std::map loadWeights(const std::string file) { std::cout << "Loading weights: " << file << std::endl; std::map weightMap; // Open weights file std::ifstream input(file); assert(input.is_open() && "Unable to load weight file. please check if the .wts file path is right!!!!!!"); // Read number of weight blobs int32_t count; input >> count; assert(count > 0 && "Invalid weight map file."); while (count--) { Weights wt{ DataType::kFLOAT, nullptr, 0 }; uint32_t size; // Read name and type of blob std::string name; input >> name >> std::dec >> size; wt.type = DataType::kFLOAT; // Load blob uint32_t* val = reinterpret_cast(malloc(sizeof(val) * size)); for (uint32_t x = 0, y = size; x < y; ++x) { input >> std::hex >> val[x]; } wt.values = val; wt.count = size; weightMap[name] = wt; } return weightMap; } IScaleLayer* addBatchNorm2d(INetworkDefinition *network, std::map& weightMap, ITensor& input, std::string lname, float eps) { float *gamma = (float*)weightMap[lname + ".weight"].values; float *beta = (float*)weightMap[lname + ".bias"].values; float *mean = (float*)weightMap[lname + ".running_mean"].values; float *var = (float*)weightMap[lname + ".running_var"].values; int len = weightMap[lname + ".running_var"].count; float *scval = reinterpret_cast(malloc(sizeof(float) * len)); for (int i = 0; i < len; i++) { scval[i] = gamma[i] / sqrt(var[i] + eps); } Weights scale{ DataType::kFLOAT, scval, len }; float *shval = reinterpret_cast(malloc(sizeof(float) * len)); for (int i = 0; i < len; i++) { shval[i] = beta[i] - mean[i] * gamma[i] / sqrt(var[i] + eps); } Weights shift{ DataType::kFLOAT, shval, len }; float *pval = reinterpret_cast(malloc(sizeof(float) * len)); for (int i = 0; i < len; i++) { pval[i] = 1.0; } Weights power{ DataType::kFLOAT, pval, len }; weightMap[lname + ".scale"] = scale; weightMap[lname + ".shift"] = shift; weightMap[lname + ".power"] = power; IScaleLayer* scale_1 = network->addScale(input, ScaleMode::kCHANNEL, shift, scale, power); assert(scale_1); return scale_1; } ILayer* convBlock(INetworkDefinition *network, std::map& weightMap, ITensor& input, int outch, int ksize, int s, int g, std::string lname) { Weights emptywts{ DataType::kFLOAT, nullptr, 0 }; int p = ksize / 3; IConvolutionLayer* conv1 = network->addConvolutionNd(input, outch, DimsHW{ ksize, ksize }, weightMap[lname + ".conv.weight"], emptywts); assert(conv1); conv1->setStrideNd(DimsHW{ s, s }); conv1->setPaddingNd(DimsHW{ p, p }); conv1->setNbGroups(g); IScaleLayer* bn1 = addBatchNorm2d(network, weightMap, *conv1->getOutput(0), lname + ".bn", 1e-3); // silu = x * sigmoid auto sig = network->addActivation(*bn1->getOutput(0), ActivationType::kSIGMOID); assert(sig); auto ew = network->addElementWise(*bn1->getOutput(0), *sig->getOutput(0), ElementWiseOperation::kPROD); assert(ew); return ew; } ILayer* focus(INetworkDefinition *network, std::map& weightMap, ITensor& input, int inch, int outch, int ksize, std::string lname) { ISliceLayer *s1 = network->addSlice(input, Dims3{ 0, 0, 0 }, Dims3{ inch, Yolo::INPUT_H / 2, Yolo::INPUT_W / 2 }, Dims3{ 1, 2, 2 }); ISliceLayer *s2 = network->addSlice(input, Dims3{ 0, 1, 0 }, Dims3{ inch, Yolo::INPUT_H / 2, Yolo::INPUT_W / 2 }, Dims3{ 1, 2, 2 }); ISliceLayer *s3 = network->addSlice(input, Dims3{ 0, 0, 1 }, Dims3{ inch, Yolo::INPUT_H / 2, Yolo::INPUT_W / 2 }, Dims3{ 1, 2, 2 }); ISliceLayer *s4 = network->addSlice(input, Dims3{ 0, 1, 1 }, Dims3{ inch, Yolo::INPUT_H / 2, Yolo::INPUT_W / 2 }, Dims3{ 1, 2, 2 }); ITensor* inputTensors[] = { s1->getOutput(0), s2->getOutput(0), s3->getOutput(0), s4->getOutput(0) }; auto cat = network->addConcatenation(inputTensors, 4); auto conv = convBlock(network, weightMap, *cat->getOutput(0), outch, ksize, 1, 1, lname + ".conv"); return conv; } ILayer* bottleneck(INetworkDefinition *network, std::map& weightMap, ITensor& input, int c1, int c2, bool shortcut, int g, float e, std::string lname) { auto cv1 = convBlock(network, weightMap, input, (int)((float)c2 * e), 1, 1, 1, lname + ".cv1"); auto cv2 = convBlock(network, weightMap, *cv1->getOutput(0), c2, 3, 1, g, lname + ".cv2"); if (shortcut && c1 == c2) { auto ew = network->addElementWise(input, *cv2->getOutput(0), ElementWiseOperation::kSUM); return ew; } return cv2; } ILayer* bottleneckCSP(INetworkDefinition *network, std::map& weightMap, ITensor& input, int c1, int c2, int n, bool shortcut, int g, float e, std::string lname) { Weights emptywts{ DataType::kFLOAT, nullptr, 0 }; int c_ = (int)((float)c2 * e); auto cv1 = convBlock(network, weightMap, input, c_, 1, 1, 1, lname + ".cv1"); auto cv2 = network->addConvolutionNd(input, c_, DimsHW{ 1, 1 }, weightMap[lname + ".cv2.weight"], emptywts); ITensor *y1 = cv1->getOutput(0); for (int i = 0; i < n; i++) { auto b = bottleneck(network, weightMap, *y1, c_, c_, shortcut, g, 1.0, lname + ".m." + std::to_string(i)); y1 = b->getOutput(0); } auto cv3 = network->addConvolutionNd(*y1, c_, DimsHW{ 1, 1 }, weightMap[lname + ".cv3.weight"], emptywts); ITensor* inputTensors[] = { cv3->getOutput(0), cv2->getOutput(0) }; auto cat = network->addConcatenation(inputTensors, 2); IScaleLayer* bn = addBatchNorm2d(network, weightMap, *cat->getOutput(0), lname + ".bn", 1e-4); auto lr = network->addActivation(*bn->getOutput(0), ActivationType::kLEAKY_RELU); lr->setAlpha(0.1); auto cv4 = convBlock(network, weightMap, *lr->getOutput(0), c2, 1, 1, 1, lname + ".cv4"); return cv4; } ILayer* C3(INetworkDefinition *network, std::map& weightMap, ITensor& input, int c1, int c2, int n, bool shortcut, int g, float e, std::string lname) { int c_ = (int)((float)c2 * e); auto cv1 = convBlock(network, weightMap, input, c_, 1, 1, 1, lname + ".cv1"); auto cv2 = convBlock(network, weightMap, input, c_, 1, 1, 1, lname + ".cv2"); ITensor *y1 = cv1->getOutput(0); for (int i = 0; i < n; i++) { auto b = bottleneck(network, weightMap, *y1, c_, c_, shortcut, g, 1.0, lname + ".m." + std::to_string(i)); y1 = b->getOutput(0); } ITensor* inputTensors[] = { y1, cv2->getOutput(0) }; auto cat = network->addConcatenation(inputTensors, 2); auto cv3 = convBlock(network, weightMap, *cat->getOutput(0), c2, 1, 1, 1, lname + ".cv3"); return cv3; } ILayer* SPP(INetworkDefinition *network, std::map& weightMap, ITensor& input, int c1, int c2, int k1, int k2, int k3, std::string lname) { int c_ = c1 / 2; auto cv1 = convBlock(network, weightMap, input, c_, 1, 1, 1, lname + ".cv1"); auto pool1 = network->addPoolingNd(*cv1->getOutput(0), PoolingType::kMAX, DimsHW{ k1, k1 }); pool1->setPaddingNd(DimsHW{ k1 / 2, k1 / 2 }); pool1->setStrideNd(DimsHW{ 1, 1 }); auto pool2 = network->addPoolingNd(*cv1->getOutput(0), PoolingType::kMAX, DimsHW{ k2, k2 }); pool2->setPaddingNd(DimsHW{ k2 / 2, k2 / 2 }); pool2->setStrideNd(DimsHW{ 1, 1 }); auto pool3 = network->addPoolingNd(*cv1->getOutput(0), PoolingType::kMAX, DimsHW{ k3, k3 }); pool3->setPaddingNd(DimsHW{ k3 / 2, k3 / 2 }); pool3->setStrideNd(DimsHW{ 1, 1 }); ITensor* inputTensors[] = { cv1->getOutput(0), pool1->getOutput(0), pool2->getOutput(0), pool3->getOutput(0) }; auto cat = network->addConcatenation(inputTensors, 4); auto cv2 = convBlock(network, weightMap, *cat->getOutput(0), c2, 1, 1, 1, lname + ".cv2"); return cv2; } ILayer* SPPF(INetworkDefinition *network, std::map& weightMap, ITensor& input, int c1, int c2, int k, std::string lname) { int c_ = c1 / 2; auto cv1 = convBlock(network, weightMap, input, c_, 1, 1, 1, lname + ".cv1"); auto pool1 = network->addPoolingNd(*cv1->getOutput(0), PoolingType::kMAX, DimsHW{ k, k }); pool1->setPaddingNd(DimsHW{ k / 2, k / 2 }); pool1->setStrideNd(DimsHW{ 1, 1 }); auto pool2 = network->addPoolingNd(*pool1->getOutput(0), PoolingType::kMAX, DimsHW{ k, k }); pool2->setPaddingNd(DimsHW{ k / 2, k / 2 }); pool2->setStrideNd(DimsHW{ 1, 1 }); auto pool3 = network->addPoolingNd(*pool2->getOutput(0), PoolingType::kMAX, DimsHW{ k, k }); pool3->setPaddingNd(DimsHW{ k / 2, k / 2 }); pool3->setStrideNd(DimsHW{ 1, 1 }); ITensor* inputTensors[] = { cv1->getOutput(0), pool1->getOutput(0), pool2->getOutput(0), pool3->getOutput(0) }; auto cat = network->addConcatenation(inputTensors, 4); auto cv2 = convBlock(network, weightMap, *cat->getOutput(0), c2, 1, 1, 1, lname + ".cv2"); return cv2; } std::vector> getAnchors(std::map& weightMap, std::string lname) { std::vector> anchors; Weights wts = weightMap[lname + ".anchor_grid"]; int anchor_len = Yolo::CHECK_COUNT * 2; for (int i = 0; i < wts.count / anchor_len; i++) { auto *p = (const float*)wts.values + i * anchor_len; std::vector anchor(p, p + anchor_len); anchors.push_back(anchor); } return anchors; } IPluginV2Layer* addYoLoLayer(INetworkDefinition *network, std::map& weightMap, std::string lname, std::vector dets) { auto creator = getPluginRegistry()->getPluginCreator("YoloLayer_TRT", "1"); auto anchors = getAnchors(weightMap, lname); PluginField plugin_fields[2]; int netinfo[4] = {Yolo::CLASS_NUM, Yolo::INPUT_W, Yolo::INPUT_H, Yolo::MAX_OUTPUT_BBOX_COUNT}; plugin_fields[0].data = netinfo; plugin_fields[0].length = 4; plugin_fields[0].name = "netinfo"; plugin_fields[0].type = PluginFieldType::kFLOAT32; int scale = 8; std::vector kernels; for (size_t i = 0; i < anchors.size(); i++) { Yolo::YoloKernel kernel; kernel.width = Yolo::INPUT_W / scale; kernel.height = Yolo::INPUT_H / scale; memcpy(kernel.anchors, &anchors[i][0], anchors[i].size() * sizeof(float)); kernels.push_back(kernel); scale *= 2; } plugin_fields[1].data = &kernels[0]; plugin_fields[1].length = kernels.size(); plugin_fields[1].name = "kernels"; plugin_fields[1].type = PluginFieldType::kFLOAT32; PluginFieldCollection plugin_data; plugin_data.nbFields = 2; plugin_data.fields = plugin_fields; IPluginV2 *plugin_obj = creator->createPlugin("yololayer", &plugin_data); std::vector input_tensors; for (auto det: dets) { input_tensors.push_back(det->getOutput(0)); } auto yolo = network->addPluginV2(&input_tensors[0], input_tensors.size(), *plugin_obj); return yolo; }