Can someone point me in the right direction to extract humans from an image?
I want to output just a png with players or humans.
As you can see in this image:
I input a jpg and detect the humans. However when I export them to a png the humans are missing.
here’s my script:
import cv2 as cv
import argparse
import sys
import numpy as np
import os.path
from datetime import datetime
# Initialize the parameters
confThreshold = 0.3 # Confidence threshold
nmsThreshold = 0.2 # Non-maximum suppression threshold
inpWidth = 320 # Width of network's input image
inpHeight = 320 # Height of network's input image
yourpath = "./images"
parser = argparse.ArgumentParser(description='Object Detection using YOLO in OPENCV')
parser.add_argument('--image', help='Path to image file.')
parser.add_argument('--video', help='Path to video file.')
args = parser.parse_args()
classesFile = "human.txt" #file including object name
classes = None
with open(classesFile, 'rt') as f:
classes = f.read().rstrip('\n').split('\n')
# Give the configuration and weight files for the model and load the network using them.
modelConfiguration = "human.cfg" #yolo modelfile
modelWeights = "human.weights" #yolo weightfile
net = cv.dnn.readNetFromDarknet(modelConfiguration, modelWeights)
def getOutputsNames(net):
# Get the names of all the layers in the network
layersNames = net.getLayerNames()
# Get the names of the output layers, i.e. the layers with unconnected outputs
return [layersNames[i[0] - 1] for i in net.getUnconnectedOutLayers()]
# Draw the predicted bounding box of object
def drawPred(classId, conf, left, top, right, bottom):
# Draw a bounding box.
cv.rectangle(frame, (left, top), (right, bottom), (255, 178, 50), 2)
label = '%.2f' % conf
# Get the label for the class name and its confidence
if classes:
assert (classId < len(classes))
label = '%s:%s' % (classes[classId], label)
# Display the label at the top of the bounding box
labelSize, baseLine = cv.getTextSize(label, cv.FONT_HERSHEY_SIMPLEX, 0.5, 1)
top = max(top, labelSize[1])
#cv.putText(frame, label, (left, top + 30), cv.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 3)
# Remove the bounding boxes with low confidence using non-maxima suppression
def color_filter(img, r, g, b):
colors = [b, g, r]
result = np.zeros(img.shape, dtype=np.uint8)
for i in range(3):
result[:, :, i] = np.where(img[:, :, i] < colors[i], 0, 255)
return result.astype(np.uint8)
def postprocess(frame, outs):
original_frame = frame.copy()
extraction_image = np.zeros([frame.shape[0], frame.shape[1], 3], dtype=np.uint8)
frameHeight = frame.shape[0]
frameWidth = frame.shape[1]
# Scan through all the bounding boxes output from the network and keep only the
# ones with high confidence scores. Assign the box's class label as the class with the highest score.
classIds = []
confidences = []
boxes = []
for out in outs:
for detection in out:
scores = detection[5:]
classId = np.argmax(scores)
confidence = scores[classId]
if confidence > confThreshold:
center_x = int(detection[0] * frameWidth)
center_y = int(detection[1] * frameHeight)
width = int(detection[2] * frameWidth)
height = int(detection[3] * frameHeight)
left = int(center_x - width / 2)
top = int(center_y - height / 2)
boxes.append([left, top, width, height])
# Perform non maximum suppression to eliminate redundant overlapping boxes with
# lower confidences.
indices = cv.dnn.NMSBoxes(boxes, confidences, confThreshold, nmsThreshold)
for i in indices:
i = i[0]
box = boxes[i]
left = box[0]
top = box[1]
width = box[2]
height = box[3]
index = classIds[i]
if (classIds[i] == 0 and width < 500):
crop_image = original_frame[top:top+height, left:left + width]
extraction_image[top:top + height, left:left + width] = crop_image
drawPred(classIds[i], confidences[i], left, top, left + width, top + height)
low_green = np.array([25, 52, 72])
high_green = np.array([102, 255, 255])
imgHSV = cv.cvtColor(extraction_image, cv.COLOR_BGR2HSV)
# create the Mask
mask = cv.inRange(imgHSV, low_green, high_green)
# inverse mask
mask = 255 - mask
res = cv.bitwise_and(extraction_image, extraction_image, mask=mask)
res[mask == 0] = (255, 255, 255)
#res = cv.resize(res, (int(res.shape[1] / 2), int(res.shape[0] / 2)))
h, w, c = res.shape
# append Alpha channel -- required for BGRA (Blue, Green, Red, Alpha)
image_bgra = np.concatenate([res, np.full((h, w, 1), 255, dtype=np.uint8)], axis=-1)
# create a mask where white pixels ([255, 255, 255]) are True
white = np.all(res == [255, 255, 255], axis=-1)
# change the values of Alpha to 0 for all the white pixels
image_bgra[white, -1] = 0
# save the image
cv.imwrite('image_bgra.png', image_bgra)
cv.imshow("extraction", image_bgra)
# Process inputs
for root, dirs, files in os.walk(yourpath, topdown=False):
i = 0
for name in files:
filename = os.path.join(root, name)
frame = cv.imread(filename);
# Create a 4D blob from a frame.
blob = cv.dnn.blobFromImage(frame, 1 / 255, (inpWidth, inpHeight), [0, 0, 0], 1, crop=False)
# Sets the input to the network
# Runs the forward pass to get output of the output layers
outs = net.forward(getOutputsNames(net))
# Remove the bounding boxes with low confidence
postprocess(frame, outs)
# frame = cv.resize(frame, (int(frame.shape[1] / 2), int(frame.shape[0] / 2)))
cv.imshow("human_extraction", frame)
I’m wondering if I have the correct mask settings? because the humans are being detected fine.
any help would be greatly appreciated.