OpenCV not build correctly for CUDA_ARCH

Hi everyone!

I am unable to run a simple test program, using OpenCV 4.5.3 build with OpenVINO and CUDA support.

Here it is my test program:

#include <iostream>
#include <filesystem>
#include <chrono>
#include <thread>
#include <fstream>
#include <vector>

#include "opencv2/core.hpp"
#include "opencv2/dnn.hpp"
#include "opencv2/highgui.hpp"
#include "opencv2/imgproc.hpp"

int main()
{
    unsigned int num_inferences = 1;
    std::string imagefilename = "C:/Lixo/TestImages/1.jpeg";
    bool grayscale = true;

    // models input path and input layer name

    std::string modelFilename = "C:/Lixo/mymodel.onnx";

    cv::String inputLayerName = "/input_layer2";

    cv::Size inputSize = cv::Size(72,72);

    // Read image and resize
    cv::Mat grayImg, img, resized;
    img = cv::imread(imagefilename);
    cv::cvtColor(img, grayImg, cv::COLOR_BGR2GRAY);
    cv::resize(grayImg, resized, metricsInputSizes[5]);

    cv::dnn::Net net = cv::dnn::readNetFromONNX(modelFilename);

    net.setPreferableBackend(cv::dnn::DNN_BACKEND_CUDA);
    net.setPreferableTarget(cv::dnn::DNN_TARGET_CUDA);

    std::vector<cv::Mat> imgBatch = { resized };
    bool swapRBChannels = false;
    cv::Mat blob = cv::dnn::blobFromImages(imgBatch, 1.0, cv::Size(), cv::Scalar(), swapRBChannels, false, CV_8U); //CV_32F);

    net.setInput(blob, inputLayerName);

    std::vector<cv::String> unconnectedOutLayerNames = net.getUnconnectedOutLayersNames();
    std::vector<cv::Mat> outputs;
    outputs.clear();

    auto time1 = std::chrono::high_resolution_clock::now();
    try
    {
        net.forward(outputs, unconnectedOutLayerNames);
    }   
    catch (std::exception& ex)
    {
        std::cout << ex.what() << std::endl;
    }

This is the error that I get:

OpenCV(4.5.3-openvino) C:\Users\cesar.gouveia\Projects\OpenCV-Package\opencv\modules\dnn\src\dnn.cpp:2385: error: (-216:No CUDA support) OpenCV was not built to work with the selected device. Please check CUDA_ARCH_PTX or CUDA_ARCH_BIN in your build configuration. in function 'cv::dnn::dnn4_v20210608::Net::Impl::initCUDABackend'

The current GPU that I’m using to run this script is:

NVIDIA GeForce MX450

Which has CUDA 7.5 (NVIDIA GeForce MX450 25W Specs | TechPowerUp GPU Database)

I went to check back on the cmake variables and this is the output produced while cmaking:

--   NVIDIA CUDA:                   YES (ver 10.1, CUFFT CUBLAS FAST_MATH)
--     NVIDIA GPU arch:             30 35 37 50 52 60 61 70 75
--     NVIDIA PTX archs:

So it seems that it was build with CUDA and with support for 75, so my question is why opencvdnn gives this kind of error while infering? Do you have any ideias?

Thanks in advance!
César.

That is weird. If I were you I would walk through bool isDeviceCompatible() to see where it is failing.

If you haven’t built Debug then to avoid waiting for that to build you can copy the function code into a test and walk through that version instead to see why its failing.


bool isDeviceCompatible()
{
    int device_id = cv::cuda::getDevice();
    if (device_id < 0)
        return false;

    int major = 0, minor = 0;
    cudaDeviceGetAttribute(&major, cudaDevAttrComputeCapabilityMajor, device_id);
    cudaDeviceGetAttribute(&minor, cudaDevAttrComputeCapabilityMinor, device_id);

    if (cv::cuda::TargetArchs::hasEqualOrLessPtx(major, minor))
        return true;

    for (int i = minor; i >= 0; i--)
        if (cv::cuda::TargetArchs::hasBin(major, i))
            return true;

    return false;
}