当前位置：网站首页>[yolov5s target detection] opencv loads onnx model for reasoning on GPU

[yolov5s target detection] opencv loads onnx model for reasoning on GPU

2022-06-21 11:35:00 【Ten year dream Lab】

Reasoning screenshot

Reasoning test

Source code ：

main The main program ：

#include "yolo.h"
#include <iostream>
#include<opencv2//opencv.hpp>
#include<math.h>


using namespace std;
using namespace cv;
using namespace dnn;


int main()
{
  string model_path = "yolov5sGPU.onnx";//CPU You can use     release GPU  
  Yolo test;
  Net net;
  if (test.readModel(net, model_path, true))//false: CPU    true:GPU
  {
    cout << "read net ok!" << endl;
  }
  else {
    return -1;
  }
  bool vedio = true;
  if (vedio)
  {
    //  use  OpenCV  Open the camera to read the file （ You can get pictures any way you like OK which ）
    cv::VideoCapture cap = cv::VideoCapture("test2.mp4");
    //  Set width and height   It doesn't matter how wide or how high it is, it will be converted to fixed width and height by an algorithm 
    //  The fixed width and height value should be that you pass YoloV5 What is required by the trained model 
    //  The incoming method is to construct  YoloV5  Object  width  The default value is  640,height  The default value is  640
    //cap.set(cv::CAP_PROP_FRAME_WIDTH, 1000);
    //cap.set(cv::CAP_PROP_FRAME_HEIGHT, 800);


    cv::Mat frame;
    // Generate random colors //
    vector<Scalar> color;
    srand(time(0));
    for (int i = 0; i < 80; i++) {//80 species 
      int b = rand() % 256;
      int g = rand() % 256;
      int r = rand() % 256;
      color.push_back(Scalar(b, g, r));
    }
    //color.push_back(Scalar(0, 0, 255));
    while (cap.isOpened())
    {
      //  Read a frame 
      cap.read(frame);


      if (frame.empty())
      {
        std::cout << "Read frame failed! or The End!" << std::endl;
        break;
      }
      vector<Output> result;
      if (test.Detect(frame, net, result))// return true  Object detected //
      {
        test.drawPred(frame, result, color);
      }
      else {
        cout << "Detect Failed!" << endl;// CPU Pattern   There are many unrecognizable .GPU All modes are recognizable 
      }
      //resize(frame, frame, Size(960, 540));
      cv::imshow("result", frame);
      if (cv::waitKey(1) == 27) break;
    }
    cv::destroyWindow("result");
    return 0;
  }
  else// Picture catalog 
  {
    // Generate random colors //
    vector<Scalar> color;
    srand(time(0));
    for (int i = 0; i < 80; i++) {//80 species 
      int b = rand() % 256;
      int g = rand() % 256;
      int r = rand() % 256;
      color.push_back(Scalar(b, g, r));
    }
    //color.push_back(Scalar(0, 0, 255));
    String folder_path = "./image";
    //String folder_path = "./test2";
    //String folder_path = "./test";
    std::vector<cv::String> file_names;
    cv::glob(folder_path, file_names);   //get file names




    for (int i = 0; i < file_names.size(); i++) {
      vector<Output> result;
      cv::Mat img;
      std::cout << file_names[i] << std::endl;
      img = cv::imread(file_names[i]);
      if (!img.data) {
        continue;
      }
      //resize(img, img, Size(956, 800));
      if (test.Detect(img, net, result))// return true  Object detected //
      {
        test.drawPred(img, result, color);
      }
      else {
        cout << "Detect Failed!" << endl;// CPU Pattern   There are many unrecognizable .GPU All modes are recognizable 
      }
      //resize(img, img, Size(717, 600));//Size(956, 800)
      //resize(img, img, Size(1434, 1200));
      namedWindow("result", cv::WINDOW_AUTOSIZE);
      imshow("result", img);
      cv::waitKey(1000);// wait for 1 second //
    }
    //cv::destroyAllWindows();
    cv::destroyWindow("result");
    //system("pause");
  }


  return 0;
}

yolo.h

#pragma once
#include<iostream>
#include<opencv2/opencv.hpp>


#define YOLO_P6 false // Whether to use P6 Model //


struct Output {
  int id;             // Result category id/
  float confidence;   // Result confidence //
  cv::Rect box;       // Rectangle box //
};


class Yolo {
public:
  Yolo() {
  }
  ~Yolo() {}
  bool readModel(cv::dnn::Net& net, std::string& netPath, bool isCuda);
  bool Detect(cv::Mat& SrcImg, cv::dnn::Net& net, std::vector<Output>& output);
  void drawPred(cv::Mat& img, std::vector<Output> result, std::vector<cv::Scalar> color);


private:
#if(defined YOLO_P6 && YOLO_P6==true)
  const float netAnchors[4][6] = { { 19,27, 44,40, 38,94 },{ 96,68, 86,152, 180,137 },{ 140,301, 303,264, 238,542 },{ 436,615, 739,380, 925,792 } };


  const int netWidth = 1280;  //ONNX Picture input width 
  const int netHeight = 1280; //ONNX Picture input height 


  const int strideSize = 4;  //stride size
#else
  const float netAnchors[3][6] = { { 10,13, 16,30, 33,23 },{ 30,61, 62,45, 59,119 },{ 116,90, 156,198, 373,326 } };


  const int netWidth = 640;   //ONNX Picture input width  yolov5s.onnx 640
  const int netHeight = 640;  //ONNX Picture input height 


  const int strideSize = 3;   //stride size
#endif // YOLO_P6


  const float netStride[4] = { 8, 16.0,32,64 };


  float boxThreshold = 0.25;
  float classThreshold = 0.25;


  float nmsThreshold = 0.45;
  float nmsScoreThreshold = boxThreshold * classThreshold;


  std::vector<std::string> className = { "person", "bicycle", "car", "motorcycle", "airplane", "bus", "train", "truck", "boat", "traffic light",
  "fire hydrant", "stop sign", "parking meter", "bench", "bird", "cat", "dog", "horse", "sheep", "cow",
    "elephant", "bear", "zebra", "giraffe", "backpack", "umbrella", "handbag", "tie", "suitcase", "frisbee",
    "skis", "snowboard", "sports ball", "kite", "baseball bat", "baseball glove", "skateboard", "surfboard",
    "tennis racket", "bottle", "wine glass", "cup", "fork", "knife", "spoon", "bowl", "banana", "apple",
    "sandwich", "orange", "broccoli", "carrot", "hot dog", "pizza", "donut", "cake", "chair", "couch",
    "potted plant", "bed", "dining table", "toilet", "tv", "laptop", "mouse", "remote", "keyboard", "cell phone",
    "microwave", "oven", "toaster", "sink", "refrigerator", "book", "clock", "vase", "scissors", "teddy bear",
    "hair drier", "toothbrush" };
};

yolo.cpp

#include"yolo.h"
using namespace std;
using namespace cv;
using namespace cv::dnn;


bool Yolo::readModel(Net& net, string& netPath, bool isCuda = false) {
  try {
    net = readNet(netPath);
  }
  catch (const std::exception&) {
    return false;
  }
  //cuda
  if (isCuda) {
    net.setPreferableBackend(cv::dnn::DNN_BACKEND_CUDA);
    net.setPreferableTarget(cv::dnn::DNN_TARGET_CUDA);//_FP16
  }
  //cpu
  else {
    net.setPreferableBackend(cv::dnn::DNN_BACKEND_DEFAULT);
    net.setPreferableTarget(cv::dnn::DNN_TARGET_CPU);
  }
  return true;
}
bool Yolo::Detect(Mat& SrcImg, Net& net, vector<Output>& output) {
  Mat blob;
  int col = SrcImg.cols;
  int row = SrcImg.rows;
  int maxLen = MAX(col, row);
  Mat netInputImg = SrcImg.clone();
  if (maxLen > 1.2 * col || maxLen > 1.2 * row) {
    Mat resizeImg = Mat::zeros(maxLen, maxLen, CV_8UC3);
    SrcImg.copyTo(resizeImg(Rect(0, 0, col, row)));
    netInputImg = resizeImg;
  }
  blobFromImage(netInputImg, blob, 1 / 255.0, cv::Size(netWidth, netHeight), cv::Scalar(0, 0, 0), true, false);
  // If there is no problem with other settings but the result deviates greatly , Try the following two sentences //
  //blobFromImage(netInputImg, blob, 1 / 255.0, cv::Size(netWidth, netHeight), cv::Scalar(104, 117, 123), true, false);
  //blobFromImage(netInputImg, blob, 1 / 255.0, cv::Size(netWidth, netHeight), cv::Scalar(114, 114,114), true, false);
  net.setInput(blob);
  std::vector<cv::Mat> netOutputImg;
  //vector<string> outputLayerName{"345","403", "461","output" };
  //net.forward(netOutputImg, outputLayerName[3]); // obtain output Output //
  try
  {  //release OK
    net.forward(netOutputImg, net.getUnconnectedOutLayersNames());//debug Report errors  initCUDABackend CUDA backend will fallback to the CPU implementation for the layer "_input"
  }
  catch (const std::exception& e)
  {
    cout << e.what();
  }


  std::vector<int> classIds;// result id Array //
  std::vector<float> confidences;// Results each id Corresponding confidence array //
  std::vector<cv::Rect> boxes;// Every id Rectangle box //
  float ratio_h = (float)netInputImg.rows / netHeight;
  float ratio_w = (float)netInputImg.cols / netWidth;
  int net_width = className.size() + 5;  // The output network width is the number of categories +5//
  float* pdata = (float*)netOutputImg[0].data;
  for (int stride = 0; stride < strideSize; stride++) {    //stride
    int grid_x = (int)(netWidth / netStride[stride]);
    int grid_y = (int)(netHeight / netStride[stride]);
    for (int anchor = 0; anchor < 3; anchor++) {  //anchors
      const float anchor_w = netAnchors[stride][anchor * 2];
      const float anchor_h = netAnchors[stride][anchor * 2 + 1];
      for (int i = 0; i < grid_y; i++) {
        for (int j = 0; j < grid_x; j++) {
          float box_score = pdata[4]; ;// Get... For each row box The probability that the box contains an object //
          if (box_score >= boxThreshold) {
            cv::Mat scores(1, className.size(), CV_32FC1, pdata + 5);
            Point classIdPoint;
            double max_class_socre;
            minMaxLoc(scores, 0, &max_class_socre, 0, &classIdPoint);
            max_class_socre = (float)max_class_socre;
            if (max_class_socre >= classThreshold) {
              //rect [x,y,w,h]
              float x = pdata[0];  //x
              float y = pdata[1];  //y
              float w = pdata[2];  //w
              float h = pdata[3];  //h
              int left = (x - 0.5 * w) * ratio_w;
              int top = (y - 0.5 * h) * ratio_h;
              classIds.push_back(classIdPoint.x);
              confidences.push_back(max_class_socre * box_score);
              boxes.push_back(Rect(left, top, int(w * ratio_w), int(h * ratio_h)));
            }
          }
          pdata += net_width;// The next line //
        }
      }
    }
  }


  // Non maximum suppression is performed to eliminate redundant overlapping boxes with low confidence （NMS）//
  vector<int> nms_result;
  NMSBoxes(boxes, confidences, nmsScoreThreshold, nmsThreshold, nms_result);
  for (int i = 0; i < nms_result.size(); i++) {
    int idx = nms_result[i];
    Output result;
    result.id = classIds[idx];
    result.confidence = confidences[idx];
    result.box = boxes[idx];
    output.push_back(result);
  }
  if (output.size())
    return true;
  else
    return false;
}


void Yolo::drawPred(Mat& img, vector<Output> result, vector<Scalar> color) {
  for (int i = 0; i < result.size(); i++) {
    int left, top;
    left = result[i].box.x;
    top = result[i].box.y;
    int color_num = i;
    //rectangle(img, result[i].box, color[result[i].id], 2, 8);
    rectangle(img, result[i].box, color[result[i].id], 2, 8);
    string label = className[result[i].id] + ":" + to_string(result[i].confidence);


    int baseLine;
    Size labelSize = getTextSize(label, FONT_HERSHEY_SIMPLEX, 0.5, 1, &baseLine);
    top = max(top, labelSize.height);
    //rectangle(frame, Point(left, top - int(1.5 * labelSize.height)), Point(left + int(1.5 * labelSize.width), top + baseLine), Scalar(0, 255, 0), FILLED);
    putText(img, label, Point(left, top), FONT_HERSHEY_SIMPLEX, 1, color[result[i].id], 2);
  }
  //imshow("1", img);
  imwrite("out.bmp", img);
  //waitKey();
  //destroyAllWindows();
}

Conclusion ：

I trained a single classification model , The same model in GPU Up reasoning can achieve 100% target detection , stay CPU The failure rate of reasoning is very high .

Reference resources ：

https://blog.csdn.net/qq_45945548/article/details/121701492

https://github.com/doleron/yolov5-opencv-cpp-python

The End

原网站

版权声明
本文为[Ten year dream Lab]所创，转载请带上原文链接，感谢
https://yzsam.com/2022/172/202206211112394062.html