I'm trying to use opencv to detect people on a video and count if that people crossed a horizontal line

I’m fresh new in python and machine learning, and my understanding about the topic is still superficial.

I’ve done a few courses to understand how to treat images, and better tailor them to recognize what’s needed.

However, when tryying to mix up two concepts of detecting people and then veryfing if that people crossed a line to make a counter, i got stuck.

is it possible only using opencv?

import cv2

# Load the cascades
body_cascade = cv2.CascadeClassifier(cv2.data.haarcascades + 'haarcascade_fullbody.xml')
upperbody_cascade = cv2.CascadeClassifier(cv2.data.haarcascades + 'haarcascade_upperbody.xml')

cap = cv2.VideoCapture('subway.mp4')

# Initialize a counter for people crossing the line
crossing_count = 0
line_position = 500  # adjust this value based on your video frame size (y position of the line)

# Keep track of the ids of rectangles that have crossed the line
crossed_ids = set()

while True:
    ret, img = cap.read()
    if not ret:
        break

    img = cv2.resize(img, (800, 600))
    gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)

    # Detect upper bodies
    upperbodies = upperbody_cascade.detectMultiScale(gray, 1.2, 4)

    # Draw a line across the frame
    cv2.line(img, (0, line_position), (800, line_position), (255, 0, 0), 2)

    for i, (x, y, w, h) in enumerate(upperbodies):
        cv2.rectangle(img, (x, y), (x+w, y+h), (0, 255, 0), 2)

        # Check if the bottom of the rectangle crosses the line
        if y + h >= line_position and i not in crossed_ids:
            crossing_count += 1
            crossed_ids.add(i)

    # Display the count on the frame
    cv2.putText(img, f'Crossing Count: {crossing_count}', (10, 50), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 255, 255), 2)

    cv2.imshow('img', img)

    if cv2.waitKey(30) & 0xFF == ord('q'):
        break

cap.release()
cv2.destroyAllWindows()

two and a half steps:

  • detection
  • tracking
  • hysteresis on that line

detection: haar cascades are terrible for everything.

tracking: that’s why haar cascades are terrible. you are not gonna get a reliable detection in every frame. constructing tracks from detections requires associating detections in time, usually by nearest spatial distance (greedy or otherwise, it’s a graph matching). without a stable detection in every frame, you’ll have to look through several past frames’ detections. since you are supposed to keep “state” for your tracks anyway, you’d look in your tracks for such possible associations. with unstable detections, you’d need to keep tracks even if you DON’T see a detection right now, but wait a little before you end that specific track.

line and hysteresis: turn the line into two parallel lines with some separation. label a track as “above” if it goes above the top line, and “below” if it goes below the bottom line. in between, the lines, no state change.

1 Like

So, you mean, using for example countours with this approach of the parallel lines?

something like this?

import numpy as np
import cv2

def center(x, y, w, h):
    x1 = int(w / 2)
    y1 = int(h / 2)
    cx = x + x1
    cy = y + y1
    return cx,cy

def rescaleFrame(frame, scale=0.75):
    #Videos, Images and Live videos
    width = int(frame.shape[1] * scale)
    height = int(frame.shape[0] * scale)
    dimensions = (width,height)

    return cv2.resize(frame, dimensions, interpolation=cv2.INTER_AREA)

cap = cv2.VideoCapture('subway.mp4')

fgbg = cv2.createBackgroundSubtractorMOG2()

detects = []

posL = 740
offset = 20

xy1 = (20, posL)
xy2 = (1800, posL)


total = 0

up = 0
down = 0

while 1:
    ret, frame = cap.read()
    
    frame_resized = rescaleFrame(frame)

    gray = cv2.cvtColor(frame_resized, cv2.COLOR_BGR2GRAY)
    cv2.imshow("gray", gray)

    fgmask = fgbg.apply(gray)
    cv2.imshow("fgmask", fgmask)

    retval, th = cv2.threshold(fgmask, 200, 255, cv2.THRESH_BINARY)
    cv2.imshow("th", th)

    kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (5, 5))

    opening = cv2.morphologyEx(th, cv2.MORPH_OPEN, kernel, iterations = 2)
    cv2.imshow("opening", opening)

    dilation = cv2.dilate(opening,kernel,iterations = 4)
    cv2.imshow("dilation", dilation)

    closing = cv2.morphologyEx(dilation, cv2.MORPH_CLOSE, kernel, iterations = 8)
    cv2.imshow("closing", closing)

    cv2.line(frame_resized,xy1,xy2,(255,0,0),3)

    cv2.line(frame_resized,(xy1[0],posL-offset),(xy2[0],posL-offset),(255,255,0),2)

    cv2.line(frame_resized,(xy1[0],posL+offset),(xy2[0],posL+offset),(255,255,0),2)

    contours, hierarchy = cv2.findContours(dilation,cv2.RETR_LIST,cv2.CHAIN_APPROX_SIMPLE)
    i = 0
    for cnt in contours:
        (x,y,w,h) = cv2.boundingRect(cnt)

        area = cv2.contourArea(cnt)
        
        if int(area) > 3000 :
            centro = center(x, y, w, h)

            cv2.putText(frame_resized, str(i), (x+5, y+15), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 255),2)
            cv2.circle(frame_resized, centro, 4, (0, 0,255), -1)
            cv2.rectangle(frame_resized,(x,y),(x+w,y+h),(0,255,0),2)
            if len(detects) <= i:
                detects.append([])
            if centro[1]> posL-offset and centro[1] < posL+offset:
                detects[i].append(centro)
            else:
                detects[i].clear()
            i += 1

    if i == 0:
        detects.clear()

    i = 0

    if len(contours) == 0:
        detects.clear()

    else:

        for detect in detects:
            for (c,l) in enumerate(detect):


                if detect[c-1][1] < posL and l[1] > posL :
                    detect.clear()
                    up+=1
                    total+=1
                    cv2.line(frame_resized,xy1,xy2,(0,255,0),5)
                    continue

                if detect[c-1][1] > posL and l[1] < posL:
                    detect.clear()
                    down+=1
                    total+=1
                    cv2.line(frame_resized,xy1,xy2,(0,0,255),5)
                    continue

                if c > 0:
                    cv2.line(frame_resized,detect[c-1],l,(0,0,255),1)

    cv2.putText(frame_resized, "TOTAL: "+str(total), (10, 20), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 255),2)
    cv2.putText(frame_resized, "SUBINDO: "+str(up), (10, 40), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0),2)
    cv2.putText(frame_resized, "DESCENDO: "+str(down), (10, 60), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255),2)

    cv2.imshow("frame_resized", frame_resized)

    if cv2.waitKey(30) & 0xFF == ord('q'):
        break

cap.release()
cv2.destroyAllWindows()

that’s a lot of code. I’m gonna say no to “contours”. hysteresis has nothing to do with those.