Hi folks, I am trying to build a little project here, but I am struggling with the panorama generation.
Simple video feed of a planar surface turned into a panorama image.
My approach is processing frames in batches then, match and stitch every consecutive frames and recursively repeat until I get one final image.
For features matching I use Xfeat + LighterGlue. As you can see the matching seems good enough between frames:
inlier ratio: 0.9881605544325729
inlier ratio: 0.9608355091383812
inlier ratio: 0.9502032520325203
The problem I am facing is the final panorama image. It looks like this:
Here is my code:
import numpy as np
import torch
import cv2
import matplotlib.pyplot as plt
xfeat = torch.hub.load('verlab/accelerated_features', 'XFeat', pretrained = True, top_k = 4096)
def load_video(video_path):
cap = cv2.VideoCapture(video_path)
frames = []
while True:
ret, frame = cap.read()
if not ret:
break
frames.append(frame)
cap.release()
return frames
def warp_corners_and_draw_matches(ref_points, dst_points, img1, img2):
# Calculate the Homography matrix
H, mask = cv2.findHomography(ref_points, dst_points, cv2.USAC_MAGSAC, 3.5, maxIters=1_000, confidence=0.999)
mask = mask.flatten()
print('inlier ratio: ', np.sum(mask)/len(mask))
# Get corners of the first image (image1)
h, w = img1.shape[:2]
corners_img1 = np.array([[0, 0], [w-1, 0], [w-1, h-1], [0, h-1]], dtype=np.float32).reshape(-1, 1, 2)
# Warp corners to the second image (image2) space
warped_corners = cv2.perspectiveTransform(corners_img1, H)
# Draw the warped corners in image2
img2_with_corners = img2.copy()
for i in range(len(warped_corners)):
start_point = tuple(warped_corners[i-1][0].astype(int))
end_point = tuple(warped_corners[i][0].astype(int))
cv2.line(img2_with_corners, start_point, end_point, (0, 255, 0), 4)
# Prepare keypoints and matches for drawMatches function
keypoints1 = [cv2.KeyPoint(p[0], p[1], 5) for p in ref_points]
keypoints2 = [cv2.KeyPoint(p[0], p[1], 5) for p in dst_points]
matches = [cv2.DMatch(i,i,0) for i in range(len(mask)) if mask[i]]
# Draw inlier matches
img_matches = cv2.drawMatches(img1, keypoints1, img2_with_corners, keypoints2, matches, None,
matchColor=(0, 255, 0), flags=2)
return img_matches
def stitch_pair(frame1, frame2):
output0 = xfeat.detectAndCompute(frame1, top_k = 4096)[0]
output1 = xfeat.detectAndCompute(frame2, top_k = 4096)[0]
# Update with image resolution (required)
output0.update({'image_size': (frame1.shape[1], frame1.shape[0])})
output1.update({'image_size': (frame2.shape[1], frame2.shape[0])})
mkpts_0, mkpts_1 = xfeat.match_lighterglue(output0, output1)
canvas = warp_corners_and_draw_matches(mkpts_0, mkpts_1, frame1, frame2)
plt.figure(figsize=(12,12))
plt.imshow(canvas[..., ::-1]), plt.show()
# Compute homography matrix
H, status = cv2.findHomography(mkpts_0, mkpts_1, cv2.RANSAC, 5.0)
# Get dimensions of the images
h1, w1 = frame1.shape[:2]
h2, w2 = frame2.shape[:2]
# Get the corners of the second image
corners2 = np.array([[0, 0], [0, h2], [w2, h2], [w2, 0]], dtype='float32').reshape(-1, 1, 2)
# Warp the corners of the second image using the homography matrix
corners2_transformed = cv2.perspectiveTransform(corners2, H)
# Get the corners of the first image
corners1 = np.array([[0, 0], [0, h1], [w1, h1], [w1, 0]], dtype='float32').reshape(-1, 1, 2)
# Combine all corners
all_corners = np.concatenate((corners1, corners2_transformed), axis=0)
# Find the bounding box of the combined corners
[x_min, y_min] = np.int32(all_corners.min(axis=0).ravel() - 0.5)
[x_max, y_max] = np.int32(all_corners.max(axis=0).ravel() + 0.5)
# Calculate the translation needed
translation_dist = [-x_min, -y_min]
# Create the translation matrix
translation_matrix = np.array([[1, 0, translation_dist[0]], [0, 1, translation_dist[1]], [0, 0, 1]])
# Warp the second image to the new canvas
result_img = cv2.warpPerspective(frame2, translation_matrix @ H, (x_max - x_min, y_max - y_min), flags=cv2.INTER_LINEAR)
# Place the first image on the new canvas
result_img[translation_dist[1]:h1 + translation_dist[1], translation_dist[0]:w1 + translation_dist[0]] = frame1
# Plot the final stitched image
plt.figure(figsize=(12, 12))
plt.imshow(result_img[..., ::-1])
plt.title('Stitched Image')
plt.show()
return result_img
def sort_stitch(frames):
if len(frames) == 1:
return frames
if len(frames) == 2:
return [stitch_pair(frames[0], frames[1])]
mid = len(frames) // 2
left = sort_stitch(frames[:mid])
right = sort_stitch(frames[mid:])
# Merge the results of left and right
result = [stitch_pair(left[-1], right[-1])]
return left + right + result
video_path = "video_file_path"
frames = load_video(video_path)
print(f"Total frame count: {len(frames)}")
batch_size = 32
# FOR TESTING
# Process only the first batch
current_batch = frames[:batch_size]
print(f"Current batch length: {len(current_batch)}")
result = sort_stitch(current_batch)
print("\nFinal result:", result[-1])
plt.figure(figsize=(12, 8))
plt.imshow(cv2.cvtColor(result[-1], cv2.COLOR_BGR2RGB))
plt.title('Final Panorama')
cv2.imwrite('final_panorama.jpg', result[-1])
plt.axis('off')
plt.show()
In the moment is just simple script, without any checks for match quality and dropping bad frames and so on , but there is time for those improvements I guess, now I have bigger problems
Is it something fundamentally wrong with my approach?Am I putting wrong image on top? Any ideas?