OpenCV DNN 4.10 detects objects, but 4.90 does not

Hello,

I am using OpenCV 4.10.0 DNN with YOLO to detect objects in an image. This is working just fine, but when I switch to OpenCV 4.9.0, no objects are detected. Is there some bug with OpenCV 4.9.0’s DNN module in 4.90? How can the difference in behavior be explained?

Here’s the code:

package com.sadcaptcha.opencv_dnn;

import java.net.URISyntaxException;
import java.util.ArrayList;
import java.util.List;

import org.opencv.core.Core;
import org.opencv.core.Core.MinMaxLocResult;
import org.opencv.core.Mat;
import org.opencv.core.MatOfFloat;
import org.opencv.core.MatOfInt;
import org.opencv.core.MatOfRect2d;
import org.opencv.core.Point;
import org.opencv.core.Rect;
import org.opencv.core.Rect2d;
import org.opencv.core.Scalar;
import org.opencv.core.Size;
import org.opencv.dnn.Dnn;
import org.opencv.dnn.Net;
import org.opencv.imgcodecs.Imgcodecs;
import org.opencv.imgproc.Imgproc;
import org.opencv.utils.Converters;

/**
 * Special kudos to suddh123 on Github for writing this code:
 * https://github.com/suddh123/YOLO-object-detection-in-java/blob/code/yolo.java
 */
public class App {

	private static final String OPENCV_RESOURCE_NAME = "opencv_4100.so";
	private static final String MODEL_PATH = "src/main/resources/yolov9-3d-shapes-100-images-100-epochs.onnx";
	private static final Size TARGET_IMAGE_SIZE = new Size(640, 640);
	private static final float SCALE_FACTOR = 1f / 255f;
	private static final int NUM_CLASSES = 45;
	private static final float CONF_THRESHOLD = 0.5f;
	private static final float NMS_THRESHOLD = 0.5f;

	private static final String[] CLASS_NAMES = new String[] { "2_number", "3_number", "4_number", "5_number",
			"6_number", "7_number", "8_number", "9_number", "a_lower", "a_upper", "b_upper", "c_upper", "cone", "cube",
			"cylinder", "d20", "d_upper", "e_lower", "e_upper", "f_upper", "g_lower", "g_upper", "h_lower", "h_upper",
			"k_upper", "l_upper", "m_lower", "m_upper", "n_lower", "p_upper", "q_upper", "r_lower", "r_upper",
			"s_upper", "sphere", "t_lower", "t_upper", "u_upper", "v_upper", "w_upper", "wheel", "x_lower", "y_lower",
			"y_upper", "z_upper" };

	public static void main(String[] args) {
		loadOpenCVFromResources();
		Net model = loadModel();
		Mat image = loadImage("src/test/resources/shapes0.jpeg");
		Mat blob = preprocess(image);
		PreNmsModelResult result = getBoxPredictions(model, blob);
		List<BoxPrediction> preds = applyNonMaxSuppression(result);
		Size originalImageSize = image.size();
		for (BoxPrediction pred : preds) {
			pred = pred.scale(originalImageSize);
			Imgproc.rectangle(image, pred.getBox(), new Scalar(0, 0, 255), 2);
			Imgproc.circle(image, pred.getCenter(), 2, new Scalar(0, 0, 255), 3);
			Imgproc.putText(image, pred.getClassName(), pred.getCenter(),Imgproc.FONT_HERSHEY_SIMPLEX , 1, new Scalar(0, 0, 0), 2);
		}
		Imgcodecs.imwrite("res.jpg", image);
	}

	/**
	 * Get the predicted boxes for a preprocessed blob.
	 * This method applies non-max suppression to filter the results.
	 */
	private static PreNmsModelResult getBoxPredictions(Net model, Mat blob) {
		// YoloV9 output is batch_size x (num_classes + 4) x 8400
		// where the + 4 in the second dimesion refers to the
		// center_x,center_y,width,height of the detection box
		// Since we're only looking at one sample, we reshape it to 8400 rows to get rid
		// of the batch dimension
		// YoloV9 output: batch size x n_classes + 4 x 8400. Batch size is 1 so we
		// remove that dimension by reshaping it to 49 rows where 49 = n_classes + 4
		// That way, the model outputs are now arranged in 8400 columns where each
		// column value represents a box anchor.
		// Each row of a column represents the probability for a given class
		// The first through fourth rows of each column is x,y,w,h
		// In other words, for each of the 8400 anchors (columns) the first 4 rows
		// represent the box location, and the rest of the values represent the
		// probabilities that box is class N
		// For each column, we want to get the maximum probability.
		// If the maximum probability is greater than our determined threshold, we will
		// consider that an answer and add it to our list
		// We will create a rectangle for the answers, and store the class names as
		// well.
		model.setInput(blob);
		Mat output = model.forward().reshape(0, NUM_CLASSES + 4);
		Mat confidences;
		float confidence;
		Mat column;
		MinMaxLocResult mm;
		Rect2d box;
		double width;
		double height;
		double centerX;
		double centerY;
		double left;
		double top;

		PreNmsModelResult result = new PreNmsModelResult();
		for (int i = 0; i < output.cols(); i++) {
			column = output.col(i);
			confidences = column.rowRange(4, NUM_CLASSES + 4);
			mm = Core.minMaxLoc(confidences);
			confidence = (float) mm.maxVal;

			if (confidence > CONF_THRESHOLD) {
				centerX = column.get(0, 0)[0];
				centerY = column.get(1, 0)[0];
				width = column.get(2, 0)[0];
				height = column.get(3, 0)[0];
				left = centerX - width / 2;
				top = centerY - height / 2;
				box = new Rect2d(left, top, width, height);
				result.addClassId((int) mm.maxLoc.y);
				result.addConfidence(confidence);
				result.addBox(box);
			}
		}
		return result;
	}

	private static List<BoxPrediction> applyNonMaxSuppression(PreNmsModelResult input) {
		MatOfFloat confs = new MatOfFloat(Converters.vector_float_to_Mat(input.getConfidences()));
		Rect2d[] boxesArray = input.getBoxes().toArray(new Rect2d[0]);
		MatOfRect2d boxesMat = new MatOfRect2d(boxesArray);
		MatOfInt indices = new MatOfInt();
		Dnn.NMSBoxes(boxesMat, confs, CONF_THRESHOLD, NMS_THRESHOLD, indices);
		int[] ind = indices.toArray();
		List<BoxPrediction> preds = new ArrayList<>();
		for (int i = 0; i < ind.length; i++) {
			Rect box = new Rect((int) boxesArray[ind[i]].x, (int) boxesArray[ind[i]].y, (int) boxesArray[ind[i]].width,
					(int) boxesArray[ind[i]].height);
			preds.add(new BoxPrediction.Builder()
					.withBox(box)
					.withCenter(new Point((box.width / 2) + box.x, (box.height / 2) + box.y))
					.withClassName(getClassName(input.getClassIds().get(ind[i])))
					.withClassIndex(input.getClassIds().get(ind[i]))
					.withImageSize(TARGET_IMAGE_SIZE)
					.build());
		}
		return preds;
	}

	private static void loadOpenCVFromResources() {
		try {
			String filename = App.class.getClassLoader().getResource(OPENCV_RESOURCE_NAME).toURI().toString()
					.replace("file:", "");
			System.load(filename);
		} catch (URISyntaxException e) {
			throw new RuntimeException(e);
		}
	}

	private static Net loadModel() {
		return Dnn.readNetFromONNX(MODEL_PATH);
	}

	private static Mat preprocess(Mat image) {
		return Dnn.blobFromImage(image, SCALE_FACTOR, TARGET_IMAGE_SIZE, new Scalar(0, 0, 0), true, false);
	}

	private static Mat loadImage(String path) {
		return Imgcodecs.imread(path, Imgcodecs.IMREAD_COLOR);
	}

	private static String getClassName(int loc) {
		return CLASS_NAMES[loc];
	}

}

Thanks

perhaps previously unsupported layer types, or buggy implementation got fixed. idk.

browse the changelog/release notes for opencv v4.10 to learn about any changes.

does the program generate any console output? such issues might be revealed there.

perhaps contact that person for support with the code they wrote