Ok, here is the code:
I get an image from a camera by requests, decode with opencv and then I do the same process as mentioned above in the tutorial of learnopencv. Just to clarify that “for net in range(len(self.net)):” exists because I’m processing the same image for different models, each model one class.
def processa(self,url,username,password,index,sectorXY):
imagem=requests.get(url, auth=HTTPDigestAuth(username, password))
frame = cv2.imdecode(np.fromstring(imagem.content, np.uint8), cv2.IMREAD_UNCHANGED)
tamanho = frame.shape
altura = frame.shape[0]
comprimento = frame.shape[1]
blob = cv2.dnn.blobFromImage(frame, 1 / 255.0, (1280, 1280), swapRB=True, crop=False)
objetos_capturados_frame = []
smoke_detections=False
for net in range(len(self.net)):
print(net)
if str(net) not in self.filtros[index]:
continue
layer_names = self.net[net].getLayerNames()
outputlayers = [layer_names[i-1] for i in self.net[net].getUnconnectedOutLayers()]
self.net[net].setInput(blob)
outputs = self.net[net].forward(outputlayers)
class_ids = []
confidences = [] # Grau de confiança sobre a imagem
caixas = []
rows = outputs[0].shape[1]
x_factor = 3840 / 1280
y_factor = 2160 / 1280
for r in range(rows):
row = outputs[0][0][r]
confidence = row[4]
# Discard bad detections and continue.
if confidence >= (self.confidence/100):
classes_scores = row[5:]
if classes_scores[0] > 0.5:
cx, cy, w, h = row[0], row[1], row[2], row[3]
left = int((cx - w/2) * x_factor)
top = int((cy - h/2) * y_factor)
width = int(w * x_factor)
height = int(h * y_factor)
caixas.append([left, top, width, height])
confidences.append(float(confidence))
class_ids.append(net)
indexes = cv2.dnn.NMSBoxes(caixas, confidences, 0.5, 0.4)
if len(indexes) > 0 and net == 0:
smoke_detections=True
for i in indexes:
objeto_no_frame = {}
objetos_capturados_frame_aux=[]
#i = i[0]
caixa = caixas[i]
x = caixa[0]
y = caixa[1]
w = caixa[2]
h = caixa[3]
objeto_no_frame["object_id"] = int(class_ids[i])
objeto_no_frame["confianca"] = round(confidences[i],2)
objeto_no_frame["topLeft"] = [x, y]
objeto_no_frame["bottomRight"] = [w, h]
objetos_capturados_frame.append(objeto_no_frame)