Hello,
I am now using python opencv in docker to analyse video files. There are about 24 files of 1 hour on which i do movement analysis with nvidia optical flow.
However it takes about 40 minutes to analyze one day (24 hours, so 24 files of 1 hour).
I would like this to speed up, i now do the processing in parralel en my cpu is 100% because of the reading of the video files i think.
The video files are 30fps but i am comparing frame 1 with frame 31 by skipping 30 frames.
I think that the bottleneck is the reading of the files by the cpu, is this correct?
How could i speed this up, i tried using hw acceleration with opencv FFMPEG for reading the video files, but this was not faster. Would using cuda videocapture be faster? That is not available in python is it?
As you can see in my code i tried experimenting with cv2.cuda functions, however it was not faster, maybe because of downloading and uploading from cpu/gpu when using cuda funcion but still using cv.videocapture?
The actual optical flow analysis using nvidia optical flow is realy fast! So i think my code can run mucht faster if the reading of frames can be sped up. I now do not even resize the frames as the actual optical flow analysis is so fast it does not matter.
Or is the best way to just get a faster cpu?
would love to get some input, i am not very experienced and would love to learn! I already tried to optimize as much as possible, but for now i am stuck, and on the verge of buying a faster cpu. If anybody could help me, and tell me if it would be better to use my gpu nvidia: 3080 than that would be greatly appreciated!
My cpu now is a 3600XT, but i want to switch to Intel i7-14700K
Kind regards,
Jari
relevant code:
import cv2
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import os
import concurrent.futures
from datetime import datetime, timedelta
import pandas as pd
from tqdm import tqdm
import threading # Import threading for the lock
from collections import defaultdict
import pathlib
import sys
frame_counter_value = 30 # Set the frame counter value
movement_threshold = 800.0 # Adjust this value as needed
resample_frequency = '5T'
# Define the device_id
device_id = 0
# Get the tory path from the command line arguments
video_dir_path = sys.argv[1] if len(sys.argv) > 1 else '.'
#logging code
log_file_path = 'processed_files.log'
# Check if the log file exists, if not, create an empty one
if not pathlib.Path(log_file_path).is_file():
with open(log_file_path, 'w') as f:
pass
# Read the log file into a set (for faster lookups)
with open(log_file_path, 'r') as f:
processed_files = set(line.strip() for line in f)
#loggin code
# Create a global progress bar and lock
pbar = None
pbar_lock = threading.Lock()
def calculate_movement(video_file):
global pbar # Access the global progress bar
#os.environ['OPENCV_FFMPEG_CAPTURE_OPTIONS'] = 'video_codec;hevc_cuvid'
cap = cv2.VideoCapture(video_file)
fps = cap.get(cv2.CAP_PROP_FPS) # Adjust fps for frame skipping
frame_counter = 0
movement_percentages = [] # Initialize an empty list to store the movement percentages
# Read the first frame before the loop
ret, frame = cap.read()
if not ret:
return [], fps
# Upload the first frame to GPU memory
#frame_gpu = cv2.cuda_GpuMat()
#frame_gpu.upload(frame)
#Resize the frame on the GPU
#frame_gpu = cv2.cuda.resize(frame_gpu, (framesize, int(frame.shape[0] * framesize / frame.shape[1])))
# Convert the frame to grayscale on the GPU
#frame_gpu = cv2.cuda.cvtColor(frame_gpu, cv2.COLOR_BGR2GRAY)
# Convert the frame to grayscale
frame_gpu = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
prvs = frame_gpu
# Get the original size of the frames
frame_size = (prvs.shape[1], prvs.shape[0])
# Initialize the optical flow object using Farneback Optical Flow
#flow = cv2.cuda_FarnebackOpticalFlow.create(winSize=WIN_SIZE, numIters=NUM_ITERS)
optical_flow = cv2.cuda_NvidiaOpticalFlow_2_0.create(
frame_size,
cv2.cuda.NvidiaOpticalFlow_2_0_NV_OF_PERF_LEVEL_SLOW,
cv2.cuda.NvidiaOpticalFlow_2_0_NV_OF_OUTPUT_VECTOR_GRID_SIZE_4,
cv2.cuda.NvidiaOpticalFlow_2_0_NV_OF_HINT_VECTOR_GRID_SIZE_4,
False,
False,
False,
device_id
)
# Initialize an empty list to store the mag for each frame
while True:
frame_counter += 1
# Skip to the next frame every framecounter frames
cap.set(cv2.CAP_PROP_POS_FRAMES, frame_counter * frame_counter_value)
ret, frame = cap.read()
if not ret:
break
# Resize the frame and convert it to grayscale (on the CPU)
#frame = cv2.resize(frame, (framesize, int(frame.shape[0] * framesize / frame.shape[1])))
#frame = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
# Upload the frame to GPU memory
#frame_gpu = cv2.cuda_GpuMat()
#frame_gpu.upload(frame)
# Convert the frame to grayscale
frame_gpu = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
next = frame_gpu
flow = optical_flow.calc(prvs, next, None)
# Ensure the flow is in the correct format
flow = flow[0].astype(np.float32)
# Calculate the magnitude and angle of the 2D vectors
mag, ang = cv2.cartToPolar(flow[..., 0], flow[..., 1])
# Print the mag array
#if frame_counter % 100 == 0: print(mag)
# Calculate the percentage of pixels where the magnitude is above the threshold
movement_percentage = np.sum(mag > movement_threshold) / mag.size * 100
movement_percentages.append(movement_percentage)
# Update prvs every frame_counter_value frames
if frame_counter % frame_counter_value == 0:
prvs = next
# Update the progress bar with a lock to ensure thread safety
with pbar_lock:
pbar.update(1)
cap.release()
return movement_percentages, fps
#later in the code i have this for the parralel processing:
with concurrent.futures.ThreadPoolExecutor() as executor:
futures = {executor.submit(calculate_movement, os.path.join(video_dir_path, video_file)) for video_file in video_files}