I am attempting to detect a face in an nv12 image that contains only a single face. I couldn’t attach the nv12 data file here, so instead I have attached the corresponding png file.
Normally it works fine and returns single face rect in the correct location. Occasionally, the face detect returns a face rect with large values. Sometimes I get 2 faces. Here is an excerpt of the log I get:
Face 0, top-left coordinates: (-5.17639e+26, -inf), box width: 0, box height: inf, score: 1.00
Face 1, top-left coordinates: (137.535, 44.9726), box width: 31.8052, box height: 41.9043, score: 1.00
OpenCV was built at:
86fa0308fc (HEAD → 4.x, origin/HEAD, origin/4.x) Merge pull request #23139 from AleksandrPanov:add_py_charuco_sample
using Visual Studio 2019 with the following command
cmake ‘-GVisual Studio 16 2019’ -D BUILD_SHARED_LIBS=OFF -D BUILD_WITH_STATIC_CRT=OFF -D 'CMAKE_CXX_FLAGS_RELEASE= /MD ’ -D 'CMAKE_CXX_FLAGS_DEBUG= /MDd ’ -D WITH_IPP=ON -D WITH_MKL=ON -DBUILD_PERF_TESTS:BOOL=OFF -DBUILD_TESTS:BOOL=OFF -DBUILD_DOCS:BOOL=OFF -DWITH_CUDA:BOOL=OFF -DBUILD_EXAMPLES:BOOL=OFF -DINSTALL_CREATE_DISTRIB=ON -DOPENCV_EXTRA_MODULES_PATH=/c/lib2/opencv_contrib/modules -DCMAKE_INSTALL_PREFIX=/c/lib2/install/opencv /c/lib2/opencv
Is there anything wrong with my code?
Here is the c++ code I used to reproduce this
#include "gtest/gtest.h"
#include <opencv2/core/core.hpp>
#include <opencv2/highgui/highgui.hpp>
#include <opencv2/imgproc.hpp>
#include <opencv2/core/types.hpp>
#include <opencv2/objdetect/objdetect.hpp>
#include <filesystem>
#include <fstream>
using namespace std;
namespace fs = std::filesystem;
int YU_NET_INPUT_SIZE = 320;
cv::Ptr<cv::FaceDetectorYN> createFaceDetector()
{
// Filter out faces of score < score_threshold
float scoreThreshold = 0.9;
// Suppress bounding boxes of iou >= nms_threshold
float nmsThreshold = 0.3;
// Keep top_k bounding boxes before NMS
int topK = 5000;
auto modelPath = fs::path("opencv-zoo") / "models" / "face_detection_yunet" / "face_detection_yunet_2022mar.onnx";
// Initialize FaceDetectorYN
auto faceDetectorYN = cv::FaceDetectorYN::create(
modelPath.string(),
"",
cv::Size(YU_NET_INPUT_SIZE, YU_NET_INPUT_SIZE),
scoreThreshold,
nmsThreshold,
topK
);
return faceDetectorYN;
}
vector<unsigned char> loadNv12Data()
{
ifstream input("1920x720.nv12", std::ios::binary);
vector<unsigned char> buffer(std::istreambuf_iterator<char>(input), {});
return buffer;
}
bool test()
{
auto faceDetectorYN = createFaceDetector();
auto dataVector = loadNv12Data();
int frameWidth = 1920;
int frameHeight = 720;
// find the face(s)
cv::Mat picNV12 = cv::Mat(frameHeight * 3 / 2, frameWidth, CV_8UC1, dataVector.data());
cv::Mat picBgr;
cv::cvtColor(picNV12, picBgr, cv::COLOR_YUV2BGR_NV12);
// Scale factor used to resize input video frames
// optimal size for YuNet is 320 x 320, scale the image
float scale = YU_NET_INPUT_SIZE / (float)frameWidth;
int imageWidth = int(picBgr.cols * scale);
int imageHeight = int(picBgr.rows * scale);
cv::Mat smallImg;
cv::resize(picBgr, smallImg, cv::Size(imageWidth, imageHeight));
// Set input size before inference
faceDetectorYN->setInputSize(smallImg.size());
cv::Mat faces;
faceDetectorYN->detect(smallImg, faces);
bool badScore = false;
for (int i = 0; i < faces.rows; i++)
{
float score = faces.at<float>(i, 4);
badScore = score < 0 ? true : badScore;
cout << "Face " << i
<< ", top-left coordinates: (" << faces.at<float>(i, 0) << ", " << faces.at<float>(i, 1) << "), "
<< "box width: " << faces.at<float>(i, 2) << ", box height: " << faces.at<float>(i, 3) << ", "
<< "score: " << cv::format("%.2f", faces.at<float>(i, 14))
<< endl;
}
return badScore;
}
TEST(deleteme, test)
{
for (int i = 0; i < 100; ++i)
{
bool badScore = test();
ASSERT_FALSE(badScore) << "Failed i = " << i << endl;
}
}