I have been trying to find an efficient way to detect common objects in a video.
Could someone please suggest if this could be done with the built-in modules in opencv or some other, more trusted library?
This is my code as of now. However, the cvlib library does not seem to be a good option.
import cv2
import cvlib
from gtts import gTTS
from playsound import playsound
labels=[]
def speech(text):
print(text)
language='en'
output=gTTS(text=text,lang=language,slow=False)
output.save('/Users/advikajain/Documents/OpenCv work/sounds/output.mp3')
playsound('/Users/advikajain/Documents/OpenCv work/sounds/output.mp3')
video=cv2.VideoCapture('/Users/advikajain/Documents/OpenCv work/videos/test2.mp4')
while (video.isOpened()):
ret,frame=video.read()
bbox,label,conf=cvlib.detect_common_objects(frame)
output=cvlib.object_detection.draw_bbox(frame,bbox,label,conf)
cv2.imshow("advika",output)
for item in label:
if item in labels:
pass
else:
labels.append(item)
if cv2.waitKey(1) & 0xff == ord(' '):
break
i=0
new_sentence=[]
for label in labels:
if i==0:
new_sentence.append(f"I found a {label},and,")
else:
new_sentence.append(f"a {label},")
i+=1
speak=speech(str(new_sentence))