OpenCV C++ and Yolo v5

OpenCV4.5.4 is able to read the yolov5.onnx model, however I am unable to make detections as the image output is a downscaled image without predictions.

My code is as it follows:

/*
 * To change this license header, choose License Headers in Project Properties.
 * To change this template file, choose Tools | Templates
 * and open the template in the editor.
 */

/* 
 * File:   main.cpp
 * Author: pauloasmendes
 *
 * Created on 24 de Novembro de 2021, 16:32
 */

#include <cstdlib> 
#include <iostream>
#include <fstream> 
#include <opencv4/opencv2/opencv.hpp> 

using namespace std;

// YOLO
#include <stdexcept>
#include <opencv4/opencv2/dnn.hpp>
#include <opencv4/opencv2/dnn/all_layers.hpp>
constexpr float CONFIDENCE_THRESHOLD = 0;
constexpr float NMS_THRESHOLD = 0.4;
//number of classes to detect
//constexpr int NUM_CLASSES = 80;
constexpr int NUM_CLASSES = 5;// to detect only one class -> the first in the coco_names_txt file list ?!??
// colors for bounding boxes
const cv::Scalar colors[] = {
    {0, 255, 255},
    {255, 255, 0},
    {0, 255, 0},
    {255, 0, 0}
};
const auto NUM_COLORS = sizeof(colors)/sizeof(colors[0]);
//
/*
 * 
 */
int main(int argc, char** argv) {
    cout << CV_VERSION<< endl;
    cv::Mat im_1;
    
    
    im_1 = cv::imread("im_14_RGB.jpg", cv::IMREAD_COLOR);
    if(!im_1.data){
        cout << "\n\t Could not open or find the image 1" << endl;
    }
    // let's downscale the image using new  width and height
    int down_width = 640;
    int down_height = 640; 
   //resize down
   cv::resize(im_1, im_1, cv::Size(down_width, down_height), cv::INTER_LINEAR);
  

    
    // YOLO V5
    // read coco class names do ficheiro .txt
    std::vector<std::string> class_names;
    {
        std::ifstream class_file("coco_names.txt");
        if (!class_file)
        {
            std::cerr << "failed to open classes.txt\n";
            return 0;
        }

        std::string line;
        while (std::getline(class_file, line))
            class_names.push_back(line);
    }
    // Initialize the parameters para alocação de memoria for object detection using YOLOV4
    // faço load dos ficheiros de configuração do método YOLOV4
    //auto net = cv::dnn::readNetFromDarknet("custom-yolov4-detector.cfg", "custom-yolov4-detector_best.weights");
    //auto net = cv::dnn::readNetFromDarknet("yolov4.cfg", "custom-yolov4-tiny-detector_best.weights");    
    //cv::dnn::Net net = cv::dnn::readNetFromONNX("best.onnx");
    auto net = cv::dnn::readNetFromONNX("yolov5.onnx");
    
    cout << "here" << endl;
    // using GPU for image processing
    //net.setPreferableBackend(cv::dnn::DNN_BACKEND_CUDA);
    //net.setPreferableTarget(cv::dnn::DNN_TARGET_CUDA);
    // using CPU for image processing
    net.setPreferableBackend(cv::dnn::DNN_BACKEND_OPENCV);
    net.setPreferableTarget(cv::dnn::DNN_TARGET_CPU);
    auto output_names = net.getUnconnectedOutLayersNames();
    cv::Mat blob;
    std::vector<cv::Mat> detections;
    std::vector<int> indices[NUM_CLASSES];
    std::vector<cv::Rect> boxes[NUM_CLASSES];
    std::vector<float> scores[NUM_CLASSES]; 
     
     
    
    // Creates 4-dimensional blob from image.
    cv::dnn::blobFromImage(im_1, blob, 0.00392, cv::Size(im_1.rows, im_1.cols), cv::Scalar(), true, false, CV_32F);
    net.setInput(blob);
    net.forward(detections, output_names);
    // object detection using YOLOV4
    for (auto& output : detections){
            const auto num_boxes = output.rows;
            for (int i = 0; i < num_boxes; i++){
                //calculo das 5 predições para cada bounding box: x, y, w, h , confiança
                auto x = output.at<float>(i, 0) * im_1.cols;
                auto y = output.at<float>(i, 1) * im_1.rows;
                auto width = output.at<float>(i, 2) * im_1.cols;
                auto height = output.at<float>(i, 3) * im_1.rows;
                cv::Rect rect(x - width/2, y - height/2, width, height);
                
                for (int c = 0; c < NUM_CLASSES; c++){
                    auto confidence = *output.ptr<float>(i, 5 + c);
                    if (confidence >= CONFIDENCE_THRESHOLD){
                        boxes[c].push_back(rect);
                        scores[c].push_back(confidence);
                    }
                }
            }
    }
    // Realiza a supressão não máxima das bounding boxes e das pontuações  de confiança correspondentes.
    // eliminação de bounding boxes repetidas que identificam o mesmo objecto.
    for (int c = 0; c < NUM_CLASSES; c++)
            cv::dnn::NMSBoxes(boxes[c], scores[c], 0.0, NMS_THRESHOLD, indices[c]);
        
    // identificação dos objectos e correspondentes pontuações de confiança através de bounding boxes. 
    for (int c= 0; c < NUM_CLASSES; c++){
            for (size_t i = 0; i < indices[c].size(); ++i){
                const auto color = colors[c % NUM_COLORS];

                auto idx = indices[c][i];
                const auto& rect = boxes[c][idx];
                cv::rectangle(im_1, cv::Point(rect.x, rect.y), cv::Point(rect.x + rect.width, rect.y + rect.height), color, 3);
                
                // coloco a identificação da classe do objeto contido na bounding box - pedestre ou garrafa por ex.
                std::ostringstream label_ss;
                label_ss << class_names[c] << ": " << std::fixed << std::setprecision(2) << scores[c][idx];
                auto label = label_ss.str();
                
                int baseline;
                auto label_bg_sz = cv::getTextSize(label.c_str(), cv::FONT_HERSHEY_COMPLEX_SMALL, 1, 1, &baseline);
                // defino o rectangulo que define o objeto detectado
                cv::rectangle(im_1, cv::Point(rect.x, rect.y - label_bg_sz.height - baseline - 10), cv::Point(rect.x + label_bg_sz.width, rect.y), color, cv::FILLED);
                // coloco a identificação da classe do objecto detectado.
                cv::putText(im_1, label.c_str(), cv::Point(rect.x, rect.y - baseline - 5), cv::FONT_HERSHEY_COMPLEX_SMALL, 1, cv::Scalar(0, 0, 0));
            }
    }
    cv::namedWindow("YOLOV5 detection", cv::WINDOW_NORMAL);
    cv::imshow("YOLOV5 detection", im_1);
    cv::waitKey(0);
    cv::imwrite("YOLOV5_res.jpg", im_1);
    
    
    
    return 0;
}


1 Like