I finally solved the problem by using the known orientation of the camera to project each cubemap onto the sphere and blending between the resulting images. For the blending, I simply used the distance to the edge of each cubemap as the weight.
def calculate_R(orientation, up=cp.array([0, -1, 0])):
orientation = orientation / cp.linalg.norm(orientation)
if cp.allclose(orientation, up):
return cp.array([[1, 0, 0], [0, 0, 1], [0, -1, 0]])
if cp.allclose(orientation, -up):
return cp.array([[1, 0, 0], [0, 0, -1], [0, 1, 0]])
x = cp.cross(up, orientation)
x = x / cp.linalg.norm(x)
y = cp.cross(orientation, x)
y = y / cp.linalg.norm(y)
R = cp.array([x, y, orientation])
return R
def rotate_xyz(R, x, y, z):
xyz = cp.stack((x.ravel(), y.ravel(), z.ravel()))
x, y, z = cp.matmul(R, xyz).reshape(3, *x.shape)
return x, y, z
def uv_pano(orientations, cubemap_shape, pano_shape):
u_pano, v_pano = cp.meshgrid(cp.arange(pano_shape[0]), cp.arange(pano_shape[1]), indexing='ij')
masks = cp.zeros((len(orientations), pano_shape[0], pano_shape[1]), dtype=cp.float32)
u_cubes = cp.empty((len(orientations), pano_shape[0], pano_shape[1]), dtype=cp.float32)
v_cubes = cp.empty((len(orientations), pano_shape[0], pano_shape[1]), dtype=cp.float32)
for i in range(len(orientations)):
phi = (u_pano / pano_shape[0] * 1 + 0.5) * cp.pi
theta = (v_pano / pano_shape[1] * 2 + 0.5) * cp.pi
x = cp.cos(phi) * cp.cos(theta)
y = cp.sin(phi)
z = cp.cos(phi) * cp.sin(theta)
R = calculate_R(orientations[i])
x, y, z = rotate_xyz(R, x, y, z)
condition = (abs(z) > abs(x)) & (abs(z) > abs(y)) & (z < 0)
masks[i][condition] = 1
a = y
b = -x
max_abs_coord = abs(z)
u_cubes[i][condition] = ((a[condition] / max_abs_coord[condition]) + 1.0) * 0.5 * (cubemap_shape[0] - 1)
v_cubes[i][condition] = ((b[condition] / max_abs_coord[condition]) + 1.0) * 0.5 * (cubemap_shape[1] - 1)
u_cubes = u_cubes.astype(int)
v_cubes = v_cubes.astype(int)
return masks, u_cubes, v_cubes
def transform(cubemaps):
cubemap_shape = cubemaps[0].shape
if len(cubemaps[0].shape) > 2:
pano_shape = (int(cubemap_shape[0] * 2), int(cubemap_shape[1] * 4), cubemap_shape[2])
else:
pano_shape = (int(cubemap_shape[0] * 2), int(cubemap_shape[1] * 4))
pano = cp.zeros(pano_shape, dtype=cp.float32)
masks, u_cubes, v_cubes = uv_pano(orientations, cubemap_shape, pano_shape)
for i in range(len(cubemaps)):
masks[i] = ndimage.distance_transform_edt(masks[i])
if cp.max(masks[i]) > 0:
masks[i] = masks[i] / cp.max(masks[i])
for i in range(len(cubemaps)):
weight = cp.stack([masks[i][masks[i] > 0], masks[i][masks[i] > 0], masks[i][masks[i] > 0]], axis=1)
pano[masks[i] > 0] += cubemaps[i][u_cubes[i][masks[i] > 0], v_cubes[i][masks[i] > 0]] * weight
weight_sum = cp.stack([cp.sum(masks, axis=0), cp.sum(masks, axis=0), cp.sum(masks, axis=0)], axis=2)
weight_sum = cp.where(weight_sum <= 0, 1, weight_sum)
pano = pano / weight_sum
pano = cp.clip(pano, 0, 255).astype(cp.uint8)
return pano