Panorama stitching issue

Hi folks, I am trying to build a little project here, but I am struggling with the panorama generation.

Simple video feed of a planar surface turned into a panorama image.

My approach is processing frames in batches then, match and stitch every consecutive frames and recursively repeat until I get one final image.

For features matching I use Xfeat + LighterGlue. As you can see the matching seems good enough between frames:

inlier ratio:  0.9881605544325729
inlier ratio:  0.9608355091383812
inlier ratio:  0.9502032520325203

The problem I am facing is the final panorama image. It looks like this:

Here is my code:

import numpy as np
import torch
import cv2
import matplotlib.pyplot as plt

xfeat = torch.hub.load('verlab/accelerated_features', 'XFeat', pretrained = True, top_k = 4096)

def load_video(video_path):
    cap = cv2.VideoCapture(video_path)
    frames = []
    while True:
        ret, frame = cap.read()
        if not ret:
            break
        frames.append(frame)
    cap.release()
    return frames

def warp_corners_and_draw_matches(ref_points, dst_points, img1, img2):
    # Calculate the Homography matrix
    H, mask = cv2.findHomography(ref_points, dst_points, cv2.USAC_MAGSAC, 3.5, maxIters=1_000, confidence=0.999)
    mask = mask.flatten()

    print('inlier ratio: ', np.sum(mask)/len(mask))

    # Get corners of the first image (image1)
    h, w = img1.shape[:2]
    corners_img1 = np.array([[0, 0], [w-1, 0], [w-1, h-1], [0, h-1]], dtype=np.float32).reshape(-1, 1, 2)

    # Warp corners to the second image (image2) space
    warped_corners = cv2.perspectiveTransform(corners_img1, H)

    # Draw the warped corners in image2
    img2_with_corners = img2.copy()
    for i in range(len(warped_corners)):
        start_point = tuple(warped_corners[i-1][0].astype(int))
        end_point = tuple(warped_corners[i][0].astype(int))
        cv2.line(img2_with_corners, start_point, end_point, (0, 255, 0), 4) 

    # Prepare keypoints and matches for drawMatches function
    keypoints1 = [cv2.KeyPoint(p[0], p[1], 5) for p in ref_points]
    keypoints2 = [cv2.KeyPoint(p[0], p[1], 5) for p in dst_points]
    matches = [cv2.DMatch(i,i,0) for i in range(len(mask)) if mask[i]]

    # Draw inlier matches
    img_matches = cv2.drawMatches(img1, keypoints1, img2_with_corners, keypoints2, matches, None,
                                  matchColor=(0, 255, 0), flags=2)

    return img_matches

def stitch_pair(frame1, frame2):
    
    output0 = xfeat.detectAndCompute(frame1, top_k = 4096)[0]
    output1 = xfeat.detectAndCompute(frame2, top_k = 4096)[0]

    # Update with image resolution (required)
    output0.update({'image_size': (frame1.shape[1], frame1.shape[0])})
    output1.update({'image_size': (frame2.shape[1], frame2.shape[0])})

    mkpts_0, mkpts_1 = xfeat.match_lighterglue(output0, output1)

    canvas = warp_corners_and_draw_matches(mkpts_0, mkpts_1, frame1, frame2)
    plt.figure(figsize=(12,12))
    plt.imshow(canvas[..., ::-1]), plt.show()

    # Compute homography matrix
    H, status = cv2.findHomography(mkpts_0, mkpts_1, cv2.RANSAC, 5.0)
    
    # Get dimensions of the images
    h1, w1 = frame1.shape[:2]
    h2, w2 = frame2.shape[:2]
    
    # Get the corners of the second image
    corners2 = np.array([[0, 0], [0, h2], [w2, h2], [w2, 0]], dtype='float32').reshape(-1, 1, 2)

    # Warp the corners of the second image using the homography matrix
    corners2_transformed = cv2.perspectiveTransform(corners2, H)

    # Get the corners of the first image
    corners1 = np.array([[0, 0], [0, h1], [w1, h1], [w1, 0]], dtype='float32').reshape(-1, 1, 2)

    # Combine all corners
    all_corners = np.concatenate((corners1, corners2_transformed), axis=0)

    # Find the bounding box of the combined corners
    [x_min, y_min] = np.int32(all_corners.min(axis=0).ravel() - 0.5)
    [x_max, y_max] = np.int32(all_corners.max(axis=0).ravel() + 0.5)

    # Calculate the translation needed
    translation_dist = [-x_min, -y_min]

    # Create the translation matrix
    translation_matrix = np.array([[1, 0, translation_dist[0]], [0, 1, translation_dist[1]], [0, 0, 1]])

    # Warp the second image to the new canvas
    result_img = cv2.warpPerspective(frame2, translation_matrix @ H, (x_max - x_min, y_max - y_min), flags=cv2.INTER_LINEAR)

    # Place the first image on the new canvas
    result_img[translation_dist[1]:h1 + translation_dist[1], translation_dist[0]:w1 + translation_dist[0]] = frame1

    # Plot the final stitched image
    plt.figure(figsize=(12, 12))
    plt.imshow(result_img[..., ::-1])
    plt.title('Stitched Image')
    plt.show()
    
    return result_img

def sort_stitch(frames):
    if len(frames) == 1:
        return frames
    if len(frames) == 2:
        return [stitch_pair(frames[0], frames[1])]

    mid = len(frames) // 2
    left = sort_stitch(frames[:mid])
    right = sort_stitch(frames[mid:])

    # Merge the results of left and right
    result = [stitch_pair(left[-1], right[-1])]
    return left + right + result



video_path = "video_file_path"

frames = load_video(video_path)
print(f"Total frame count: {len(frames)}")

batch_size = 32

# FOR TESTING

# Process only the first batch
current_batch = frames[:batch_size]
print(f"Current batch length: {len(current_batch)}")


result = sort_stitch(current_batch)

print("\nFinal result:", result[-1])

plt.figure(figsize=(12, 8))
plt.imshow(cv2.cvtColor(result[-1], cv2.COLOR_BGR2RGB))
plt.title('Final Panorama')
cv2.imwrite('final_panorama.jpg', result[-1])
plt.axis('off')
plt.show()

In the moment is just simple script, without any checks for match quality and dropping bad frames and so on , but there is time for those improvements I guess, now I have bigger problems :slight_smile:

Is it something fundamentally wrong with my approach?Am I putting wrong image on top? Any ideas?

which tutorials are you following?

I am not following a tutorial.

For matching features I looked here in Xfeat notebooks:

ok so, take one fixed picture (first frame?) and one picture continuously from the camera/video. just two pictures, not many.

then watch how your code behaves.

my bet is on the homography going the wrong way.

the problem is in my recursive logic.
I am trying few things now…

Pulling my hair now for couple of hours… I fixed a problem with the recursion but, the output is not much different than before.

What I am trying to achieve here is just “floor panorama” that works in any direction(not trying crazy rotations or something…just a slow walk). May be my current approach of “stitching” every two consecutive images until I get the final “Stitch” is not appropriate…but i did try as well the straightforward method of having the first image as the “panorama” and then stitching every next frame on top.

The problem I faced with this approach is always the dynamic scaling of the canvas, because the “incoming frame” could be a bit up, a bit right and etc…So far I can’t figure out how to do it :frowning: