I am trying to implement a face detection using openCV. However, I encountered an error on this line of code:
cv2.rectangle(frame, (left,top), (right,bottom), (0,255,0), 2)
cv2.error: OpenCV(4.5.2) : error: (-5:Bad argument) in function 'rectangle'> Overload resolution failed:
* Can't parse 'pt1'. Sequence item with index 0 has a wrong type
* Can't parse 'pt1'. Sequence item with index 0 has a wrong type
* Can't parse 'rec'. Expected sequence length 4, got 2
* Can't parse 'rec'. Expected sequence length 4, got 2
Here is the code that I am working on
import numpy as np
import cv2
def nms_boxes(boxes, scores, nms_threshold):
x1 = boxes[:, 0]
y1 = boxes[:, 1]
x2 = boxes[:, 2]
y2 = boxes[:, 3]
areas = (x2-x1+1)*(y2-y1+1)
order = scores.argsort()[::-1]
keep = []
while order.size > 0:
i = order[0]
keep.append(i)
xx1 = np.maximum(x1[i], x1[order[1:]])
yy1 = np.maximum(y1[i], y1[order[1:]])
xx2 = np.minimum(x2[i], x2[order[1:]])
yy2 = np.minimum(y2[i], y2[order[1:]])
w1 = np.maximum(0.0, xx2 - xx1 + 1)
h1 = np.maximum(0.0, yy2 - yy1 + 1)
inter = w1 * h1
ovr = inter / (areas[i] + areas[order[1:]] - inter)
inds = np.where(ovr <= nms_threshold)[0] # threshold
order = order[inds + 1]
return keep
def softmax_2(data):
data_exp = np.zeros(data.shape)
data_sum = np.zeros(data.shape)
result = np.zeros(data.shape)
data_exp = np.exp(data)
data_sum[:,0] = np.sum(data_exp, axis=1)
data_sum[:,1] = data_sum[:,0]
result = data_exp / data_sum
return result
class FaceDetect():
def __init__(self, dpu, detThreshold=0.55, nmsThreshold=0.35):
self.detThreshold = detThreshold
self.nmsThreshold = nmsThreshold
self.inputChannels = []
self.inputHeight = []
self.inputWidth = []
self.inputShape = []
self.output0Channels = []
self.output0Height = []
self.output0Width = []
self.output0Size = []
self.output0Shape = []
def config(self, detThreshold, nmsThreshold):
self.detThreshold = detThreshold
self.nmsThreshold = nmsThreshold
def process(self,img):
dpu = self.dpu
inputHeight = self.inputHeight
inputWidth = self.inputWidth
inputShape = self.inputShape
output0Height = self.output0Height
output0Width = self.output0Width
output0Size = self.output0Size
output0Shape = self.output0Shape
output1Size = self.output1Size
output1Shape = self.output1Shape
imgHeight = img.shape[0]
imgWidth = img.shape[1]
scale_h = imgHeight / inputHeight
scale_w = imgWidth / inputWidth
""" Image pre-processing """
# normalize
img = img - 128.0
# resize
img = cv2.resize(img,(inputWidth,inputHeight))
""" Prepare input/output buffers """
#print("[INFO] process - prep input buffer ")
inputData = []
inputData.append(np.empty((inputShape),dtype=np.float32,order='C'))
inputImage = inputData[0]
inputImage[0,...] = img
#print("[INFO] process - prep output buffer ")
outputData = []
outputData.append(np.empty((output0Shape),dtype=np.float32,order='C'))
outputData.append(np.empty((output1Shape),dtype=np.float32,order='C'))
""" Execute model on DPU """
job_id = dpu.execute_async( inputData, outputData )
dpu.wait(job_id)
""" Retrieve output results """
OutputData0 = outputData[0].reshape(1,output0Size)
bboxes = np.reshape( OutputData0, (-1, 4) )
#
outputData1 = outputData[1].reshape(1,output1Size)
scores = np.reshape( outputData1, (-1, 2))
""" Get original face boxes """
gy = np.arange(0,output0Height)
gx = np.arange(0,output0Width)
[x,y] = np.meshgrid(gx,gy)
x = x.ravel()*4
y = y.ravel()*4
bboxes[:,0] = bboxes[:,0] + x
bboxes[:,1] = bboxes[:,1] + y
bboxes[:,2] = bboxes[:,2] + x
bboxes[:,3] = bboxes[:,3] + y
""" Run softmax """
softmax = softmax_2( scores )
""" Only keep faces for which prob is above detection threshold """
prob = softmax[:,1]
keep_idx = prob.ravel() > self.detThreshold
bboxes = bboxes[ keep_idx, : ]
bboxes = np.array( bboxes, dtype=np.float32 )
prob = prob[ keep_idx ]
""" Perform Non-Maxima Suppression """
face_indices = []
if ( len(bboxes) > 0 ):
face_indices = nms_boxes( bboxes, prob, self.nmsThreshold );
faces = bboxes[face_indices]
# extract bounding box for each face
for i, face in enumerate(faces):
xmin = max(face[0] * scale_w, 0 )
ymin = max(face[1] * scale_h, 0 )
xmax = min(face[2] * scale_w, imgWidth )
ymax = min(face[3] * scale_h, imgHeight )
faces[i] = ( int(xmin),int(ymin),int(xmax),int(ymax) )
return faces
# Initialize face detector
densebox_xmodel = "densebox_640_360.xmodel"
densebox_graph = xir.Graph.deserialize(densebox_xmodel)
dpu_face_detector = FaceDetect(densebox_dpu,detThreshold,nmsThreshold)
dpu_face_detector.start()
# Initialize the camera input
print("[INFO] starting camera input ...")
cam = cv2.VideoCapture(0)
cam.set(cv2.CAP_PROP_FRAME_WIDTH,640)
cam.set(cv2.CAP_PROP_FRAME_HEIGHT,480)
if not (cam.isOpened()):
print("[ERROR] Failed to open camera ", 0)
exit()
# loop over the frames from the video stream
while True:
# Capture image from camera
ret,frame = cam.read()
# Vitis-AI/DPU based face detector
faces = dpu_face_detector.process(frame)
# loop over the faces
for i,(left,top,right,bottom) in enumerate(faces):
# draw a bounding box surrounding the object so we can
# visualize it
cv2.rectangle( frame, (left,top), (right,bottom), (0,255,0), 2)
# Display the processed image
cv2.imshow("Face Detection", frame)
key = cv2.waitKey(1) & 0xFF
# if the `q` key was pressed, break from the loop
if key == ord("q"):
break
# Stop the face detector
dpu_face_detector.stop()
del densebox_dpu
# Cleanup
cam.release()
cv2.destroyAllWindows()