Panorama stitching using OpenCV in Python

Given 18 quadratic cubemaps (aspect ratio 1:1, resolution 1000x1000) with a FOV of 90°, I’m trying to convert them into an equirectangular panorama (aspect ratio 2:1, resolution 4000x2000). The images overlap a lot. My first approach uses the Stitcher class and the following Python code.

import cv2 as cv
import glob

images = []
file_paths = glob.glob('*.png')

for file_path in file_paths:
    images.append(cv.imread(file_path, cv.IMREAD_COLOR))

stitcher = cv.Stitcher.create(cv.Stitcher_PANORAMA)
status, pano = stitcher.stitch(images)

if status != cv.Stitcher_OK:
    print('ERROR {0}: The images could not be stitched.'.format(status))
    exit()

cv.imwrite('panorama.png', pano)

This approach will either fail with error code 3 (ERR_CAMERA_PARAMS_ADJUST_FAIL) or it will produce artifacts and only half of the panorama will be stitched. Unfortunately, I have not been able to find out under which conditions the approach will fail completely and when it will at least produce a result.

I have already tried to change the order of the images, but this only results in different artifacts or the stitching fails completely with error code 3 (ERR_CAMERA_PARAMS_ADJUST_FAIL). Then I have tried to adjust the warper which does not seem to be possible in Python. Finally, I have tried to stitch the images manually, but the cv::detail::FeaturesFinder is not available in Python. The discontinued Microsoft Image Composite Editor can be used to successfully stitch the panorama without artifacts, but the stitching process cannot be automated (using Python).

Another approach would be to stitch only six cubemaps together, since each pixel can be uniquely assigned to a cubemap.

The only problem is that the lighting is different in each cubemap. I tried using the MultiBandBlender class to blend the edges, but I can’t get it to work properly. The function Panorama.load loads the image using imread and Panorama.cubes_to_panorama_gpu converts the six images to the panorama and returns the masks for each cubemap.

images = []
images.append(Panorama.load('N.png'))
images.append(Panorama.load('S.png'))
images.append(Panorama.load('E.png'))
images.append(Panorama.load('W.png'))
images.append(Panorama.load('U.png'))
images.append(Panorama.load('D.png'))

pano, faces = Panorama.cubes_to_panorama_gpu(images)

blender = cv.detail.MultiBandBlender(try_gpu=1, num_bands=5)
blender.prepare((0, 0, pano.shape[1], pano.shape[0]))

for face, image in enumerate(images):

    mask = np.zeros(faces.shape, dtype=np.uint8)
    mask[faces == face] = 255

    blender.feed(pano, mask, (0, 0))

result, _ = blender.blend(None, None)

Panorama.save('panorama.png', result)

related:

I finally solved the problem by using the known orientation of the camera to project each cubemap onto the sphere and blending between the resulting images. For the blending, I simply used the distance to the edge of each cubemap as the weight.

def calculate_R(orientation, up=cp.array([0, -1, 0])):

    orientation = orientation / cp.linalg.norm(orientation)

    if cp.allclose(orientation, up):
        return cp.array([[1, 0, 0], [0, 0, 1], [0, -1, 0]])

    if cp.allclose(orientation, -up):
        return cp.array([[1, 0, 0], [0, 0, -1], [0, 1, 0]])

    x = cp.cross(up, orientation)
    x = x / cp.linalg.norm(x)

    y = cp.cross(orientation, x)
    y = y / cp.linalg.norm(y)

    R = cp.array([x, y, orientation])

    return R

def rotate_xyz(R, x, y, z):

    xyz = cp.stack((x.ravel(), y.ravel(), z.ravel()))
    x, y, z = cp.matmul(R, xyz).reshape(3, *x.shape)
    
    return x, y, z

def uv_pano(orientations, cubemap_shape, pano_shape):

    u_pano, v_pano = cp.meshgrid(cp.arange(pano_shape[0]), cp.arange(pano_shape[1]), indexing='ij')

    masks   = cp.zeros((len(orientations), pano_shape[0], pano_shape[1]), dtype=cp.float32)
    u_cubes = cp.empty((len(orientations), pano_shape[0], pano_shape[1]), dtype=cp.float32)
    v_cubes = cp.empty((len(orientations), pano_shape[0], pano_shape[1]), dtype=cp.float32)

    for i in range(len(orientations)):

        phi = (u_pano / pano_shape[0] * 1 + 0.5) * cp.pi
        theta = (v_pano / pano_shape[1] * 2 + 0.5) * cp.pi

        x = cp.cos(phi) * cp.cos(theta)
        y = cp.sin(phi)
        z = cp.cos(phi) * cp.sin(theta)

        R = calculate_R(orientations[i])
        x, y, z = rotate_xyz(R, x, y, z)

        condition = (abs(z) > abs(x)) & (abs(z) > abs(y)) & (z < 0)
        masks[i][condition] = 1
        a =  y
        b = -x
        max_abs_coord = abs(z)

        u_cubes[i][condition] = ((a[condition] / max_abs_coord[condition]) + 1.0) * 0.5 * (cubemap_shape[0] - 1)
        v_cubes[i][condition] = ((b[condition] / max_abs_coord[condition]) + 1.0) * 0.5 * (cubemap_shape[1] - 1)

        u_cubes = u_cubes.astype(int)
        v_cubes = v_cubes.astype(int)

    return masks, u_cubes, v_cubes

def transform(cubemaps):
    
    cubemap_shape = cubemaps[0].shape
    if len(cubemaps[0].shape) > 2:
        pano_shape = (int(cubemap_shape[0] * 2), int(cubemap_shape[1] * 4), cubemap_shape[2])
    else:
        pano_shape = (int(cubemap_shape[0] * 2), int(cubemap_shape[1] * 4))
    pano = cp.zeros(pano_shape, dtype=cp.float32)

    masks, u_cubes, v_cubes = uv_pano(orientations, cubemap_shape, pano_shape)

    for i in range(len(cubemaps)):
        masks[i] = ndimage.distance_transform_edt(masks[i])
        if cp.max(masks[i]) > 0:
            masks[i] = masks[i] / cp.max(masks[i])

    for i in range(len(cubemaps)):
        weight = cp.stack([masks[i][masks[i] > 0], masks[i][masks[i] > 0], masks[i][masks[i] > 0]], axis=1)
        pano[masks[i] > 0] += cubemaps[i][u_cubes[i][masks[i] > 0], v_cubes[i][masks[i] > 0]] * weight

    weight_sum = cp.stack([cp.sum(masks, axis=0), cp.sum(masks, axis=0), cp.sum(masks, axis=0)], axis=2)
    weight_sum = cp.where(weight_sum <= 0, 1, weight_sum)
    pano = pano / weight_sum

    pano = cp.clip(pano, 0, 255).astype(cp.uint8)

    return pano