Hello,
I am using OpenCV 4.10.0 DNN with YOLO to detect objects in an image. This is working just fine, but when I switch to OpenCV 4.9.0, no objects are detected. Is there some bug with OpenCV 4.9.0’s DNN module in 4.90? How can the difference in behavior be explained?
Here’s the code:
package com.sadcaptcha.opencv_dnn;
import java.net.URISyntaxException;
import java.util.ArrayList;
import java.util.List;
import org.opencv.core.Core;
import org.opencv.core.Core.MinMaxLocResult;
import org.opencv.core.Mat;
import org.opencv.core.MatOfFloat;
import org.opencv.core.MatOfInt;
import org.opencv.core.MatOfRect2d;
import org.opencv.core.Point;
import org.opencv.core.Rect;
import org.opencv.core.Rect2d;
import org.opencv.core.Scalar;
import org.opencv.core.Size;
import org.opencv.dnn.Dnn;
import org.opencv.dnn.Net;
import org.opencv.imgcodecs.Imgcodecs;
import org.opencv.imgproc.Imgproc;
import org.opencv.utils.Converters;
/**
* Special kudos to suddh123 on Github for writing this code:
* https://github.com/suddh123/YOLO-object-detection-in-java/blob/code/yolo.java
*/
public class App {
private static final String OPENCV_RESOURCE_NAME = "opencv_4100.so";
private static final String MODEL_PATH = "src/main/resources/yolov9-3d-shapes-100-images-100-epochs.onnx";
private static final Size TARGET_IMAGE_SIZE = new Size(640, 640);
private static final float SCALE_FACTOR = 1f / 255f;
private static final int NUM_CLASSES = 45;
private static final float CONF_THRESHOLD = 0.5f;
private static final float NMS_THRESHOLD = 0.5f;
private static final String[] CLASS_NAMES = new String[] { "2_number", "3_number", "4_number", "5_number",
"6_number", "7_number", "8_number", "9_number", "a_lower", "a_upper", "b_upper", "c_upper", "cone", "cube",
"cylinder", "d20", "d_upper", "e_lower", "e_upper", "f_upper", "g_lower", "g_upper", "h_lower", "h_upper",
"k_upper", "l_upper", "m_lower", "m_upper", "n_lower", "p_upper", "q_upper", "r_lower", "r_upper",
"s_upper", "sphere", "t_lower", "t_upper", "u_upper", "v_upper", "w_upper", "wheel", "x_lower", "y_lower",
"y_upper", "z_upper" };
public static void main(String[] args) {
loadOpenCVFromResources();
Net model = loadModel();
Mat image = loadImage("src/test/resources/shapes0.jpeg");
Mat blob = preprocess(image);
PreNmsModelResult result = getBoxPredictions(model, blob);
List<BoxPrediction> preds = applyNonMaxSuppression(result);
Size originalImageSize = image.size();
for (BoxPrediction pred : preds) {
pred = pred.scale(originalImageSize);
Imgproc.rectangle(image, pred.getBox(), new Scalar(0, 0, 255), 2);
Imgproc.circle(image, pred.getCenter(), 2, new Scalar(0, 0, 255), 3);
Imgproc.putText(image, pred.getClassName(), pred.getCenter(),Imgproc.FONT_HERSHEY_SIMPLEX , 1, new Scalar(0, 0, 0), 2);
}
Imgcodecs.imwrite("res.jpg", image);
}
/**
* Get the predicted boxes for a preprocessed blob.
* This method applies non-max suppression to filter the results.
*/
private static PreNmsModelResult getBoxPredictions(Net model, Mat blob) {
// YoloV9 output is batch_size x (num_classes + 4) x 8400
// where the + 4 in the second dimesion refers to the
// center_x,center_y,width,height of the detection box
// Since we're only looking at one sample, we reshape it to 8400 rows to get rid
// of the batch dimension
// YoloV9 output: batch size x n_classes + 4 x 8400. Batch size is 1 so we
// remove that dimension by reshaping it to 49 rows where 49 = n_classes + 4
// That way, the model outputs are now arranged in 8400 columns where each
// column value represents a box anchor.
// Each row of a column represents the probability for a given class
// The first through fourth rows of each column is x,y,w,h
// In other words, for each of the 8400 anchors (columns) the first 4 rows
// represent the box location, and the rest of the values represent the
// probabilities that box is class N
// For each column, we want to get the maximum probability.
// If the maximum probability is greater than our determined threshold, we will
// consider that an answer and add it to our list
// We will create a rectangle for the answers, and store the class names as
// well.
model.setInput(blob);
Mat output = model.forward().reshape(0, NUM_CLASSES + 4);
Mat confidences;
float confidence;
Mat column;
MinMaxLocResult mm;
Rect2d box;
double width;
double height;
double centerX;
double centerY;
double left;
double top;
PreNmsModelResult result = new PreNmsModelResult();
for (int i = 0; i < output.cols(); i++) {
column = output.col(i);
confidences = column.rowRange(4, NUM_CLASSES + 4);
mm = Core.minMaxLoc(confidences);
confidence = (float) mm.maxVal;
if (confidence > CONF_THRESHOLD) {
centerX = column.get(0, 0)[0];
centerY = column.get(1, 0)[0];
width = column.get(2, 0)[0];
height = column.get(3, 0)[0];
left = centerX - width / 2;
top = centerY - height / 2;
box = new Rect2d(left, top, width, height);
result.addClassId((int) mm.maxLoc.y);
result.addConfidence(confidence);
result.addBox(box);
}
}
return result;
}
private static List<BoxPrediction> applyNonMaxSuppression(PreNmsModelResult input) {
MatOfFloat confs = new MatOfFloat(Converters.vector_float_to_Mat(input.getConfidences()));
Rect2d[] boxesArray = input.getBoxes().toArray(new Rect2d[0]);
MatOfRect2d boxesMat = new MatOfRect2d(boxesArray);
MatOfInt indices = new MatOfInt();
Dnn.NMSBoxes(boxesMat, confs, CONF_THRESHOLD, NMS_THRESHOLD, indices);
int[] ind = indices.toArray();
List<BoxPrediction> preds = new ArrayList<>();
for (int i = 0; i < ind.length; i++) {
Rect box = new Rect((int) boxesArray[ind[i]].x, (int) boxesArray[ind[i]].y, (int) boxesArray[ind[i]].width,
(int) boxesArray[ind[i]].height);
preds.add(new BoxPrediction.Builder()
.withBox(box)
.withCenter(new Point((box.width / 2) + box.x, (box.height / 2) + box.y))
.withClassName(getClassName(input.getClassIds().get(ind[i])))
.withClassIndex(input.getClassIds().get(ind[i]))
.withImageSize(TARGET_IMAGE_SIZE)
.build());
}
return preds;
}
private static void loadOpenCVFromResources() {
try {
String filename = App.class.getClassLoader().getResource(OPENCV_RESOURCE_NAME).toURI().toString()
.replace("file:", "");
System.load(filename);
} catch (URISyntaxException e) {
throw new RuntimeException(e);
}
}
private static Net loadModel() {
return Dnn.readNetFromONNX(MODEL_PATH);
}
private static Mat preprocess(Mat image) {
return Dnn.blobFromImage(image, SCALE_FACTOR, TARGET_IMAGE_SIZE, new Scalar(0, 0, 0), true, false);
}
private static Mat loadImage(String path) {
return Imgcodecs.imread(path, Imgcodecs.IMREAD_COLOR);
}
private static String getClassName(int loc) {
return CLASS_NAMES[loc];
}
}
Thanks