Hello, hopefully someone can help me out here. I am struggling to get detections using a YoloV9 ONNX model with the Java OpenCV DNN module. I have tried pretty much everything, including compiling different OpenCV versions. The model had high precision in training, but is unable to detect anything even with a very low confidence threshold when running it inside OpenCV. THere are no errors, simply no detections.
The model was exported in .onnx format using the export.py
script from the official repository, with opset=12 and default options.
Here is a link to my model weights:
Here is the image I am trying to run detections on.
If someone could take a look at this code, it would be greatly appreciated:
package com.***.opencv_dnn;
import java.net.URISyntaxException;
import org.opencv.core.Core;
import org.opencv.core.Core.MinMaxLocResult;
import org.opencv.core.Mat;
import org.opencv.core.Scalar;
import org.opencv.core.Size;
import org.opencv.dnn.Dnn;
import org.opencv.dnn.Image2BlobParams;
import org.opencv.dnn.Net;
import org.opencv.imgcodecs.Imgcodecs;
public class App {
private static final String MODEL_PATH = "src/main/resources/yolov9-3d-shapes-100-images-100-epochs.onnx";
private static final int TARGET_IMG_HEIGHT = 640;
private static final int TARGET_IMG_WIDTH = 640;
private static final float SCALE_FACTOR = 1f / 255f;
private static final int NUM_CLASSES = 45;
private static final float CONF_THRESHOLD = 0.1f;
private static final String[] CLASS_NAMES = new String[] { "2_number", "3_number", "4_number", "5_number",
"6_number", "7_number", "8_number", "9_number", "a_lower", "a_upper", "b_upper", "c_upper", "cone", "cube",
"cylinder", "d20", "d_upper", "e_lower", "e_upper", "f_upper", "g_lower", "g_upper", "h_lower", "h_upper",
"k_upper", "l_upper", "m_lower", "m_upper", "n_lower", "p_upper", "q_upper", "r_lower", "r_upper",
"s_upper", "sphere", "t_lower", "t_upper", "u_upper", "v_upper", "w_upper", "wheel", "x_lower", "y_lower",
"y_upper", "z_upper" };
public static void main(String[] args) {
loadOpenCV();
Net model = loadModel();
Mat image = getPreprocessedImage("src/test/resources/shapes2.jpeg");
model.setInput(image);
Mat output = model.forward().reshape(0, NUM_CLASSES + 4);
for (int i = 0; i < output.cols(); i++) {
Mat col = output.col(i);
Mat confidences = col.rowRange(4, NUM_CLASSES + 4);
MinMaxLocResult mm = Core.minMaxLoc(confidences);
if ((float) mm.maxVal > CONF_THRESHOLD) {
System.out.println(getClassName((int) mm.maxLoc.x));
}
}
}
private static void loadOpenCV() {
try {
String filename = App.class.getClassLoader().getResource("opencv_4100.so").toURI().toString()
.replace("file:", "");
System.load(filename);
} catch (URISyntaxException e) {
throw new RuntimeException(e);
}
}
private static Net loadModel() {
return Dnn.readNetFromONNX(MODEL_PATH);
}
private static Mat preprocess(Mat image) {
Image2BlobParams params = new Image2BlobParams();
params.set_scalefactor(new Scalar(SCALE_FACTOR));
params.set_size(new Size(TARGET_IMG_WIDTH, TARGET_IMG_HEIGHT));
params.set_swapRB(true);
return Dnn.blobFromImageWithParams(image, params);
}
private static Mat getPreprocessedImage(String path) {
Mat image = Imgcodecs.imread(path, Imgcodecs.IMREAD_COLOR);
return preprocess(image);
}
private static String getClassName(int loc) {
return CLASS_NAMES[loc];
}
}