Is it possible to use Solvepnp to only optimize for the camera translation vector while having a fixed rvec ?
You cannot fix the rotation and only refine for the translation with SOLVEPNP_ITERATIVE
.
But you can for sure code it yourself (or modify the OpenCV code):
- 16. Gauss–Newton method
- Pose Estimation for Augmented Reality: A Hands-On Survey
- since the Jacobian when optimizing for the full 3D pose is:
- you can also optimise for just the translation part
1 Like
Some references:
- Pose Estimation for Augmented Reality: A Hands-On Survey, Eric Marchand, Hideaki Uchiyama, Fabien Spindler
- Visual servo control, Part I: Basic approaches, François Chaumette, S. Hutchinson
- An Invitation to 3-D Vision - From Images to Models, Yi Ma, Stefano Soatto, J. Kosecka, S. Sastry
- A tutorial on SE(3) transformation parameterizations and on-manifold optimization, Jose-Luis Blanco
As an exercise of style, I wanted to implement myself a pose refinement method, and estimating only the translation part (known rotation), or only the rotation or both.
Not sure if what I did is fully correct, but at least it was a good exercise.
If it can be useful to someone, I am happy.
#! python3
# -*- coding: utf-8 -*-
from enum import Enum
import numpy as np
from numpy.linalg import pinv
from pathlib import Path
import argparse
import cv2 as cv
class RefinementType(Enum):
TRANSLATION = 1
ROTATION = 2
BOTH = 3
def project(pt3d, K):
pt2d = K @ pt3d
u = pt2d[0] / pt2d[2]
v = pt2d[1] / pt2d[2]
pt2d = np.empty((2,1))
pt2d[0] = u
pt2d[1] = v
return pt2d
def calcJacobian(pt3d_objs, cTo_est, refinement_type):
trans_idx_start = 0
if refinement_type == RefinementType.TRANSLATION or refinement_type == RefinementType.ROTATION:
J_cols = 3
else:
J_cols = 6
trans_idx_start = 3
J = np.empty((2*pt3d_objs.shape[0], J_cols))
for i in range(pt3d_objs.shape[0]):
pt3d = pt3d_objs[i,:]
pt3d_in_cam = transform(pt3d, cTo_est)
Zi = pt3d_in_cam[2]
xi = pt3d_in_cam[0] / Zi
yi = pt3d_in_cam[1] / Zi
if refinement_type == RefinementType.ROTATION or refinement_type == RefinementType.BOTH:
J[2*i, 0] = xi * yi
J[2*i, 1] = -(1 + xi**2)
J[2*i, 2] = yi
J[2*i + 1, 0] = 1 + yi**2
J[2*i + 1, 1] = -xi * yi
J[2*i + 1, 2] = -xi
if refinement_type == RefinementType.TRANSLATION or refinement_type == RefinementType.BOTH:
J[2*i, trans_idx_start + 0] = -1 / Zi
J[2*i, trans_idx_start + 1] = 0
J[2*i, trans_idx_start + 2] = xi / Zi
J[2*i + 1, trans_idx_start + 0] = 0
J[2*i + 1, trans_idx_start + 1] = -1 / Zi
J[2*i + 1, trans_idx_start + 2] = yi / Zi
return J
def computeResiduals(pt2d_imgs_true, pt2d_imgs_est):
residuals = (pt2d_imgs_true.ravel() - pt2d_imgs_est.ravel())
return residuals
def transform(pt3d_obj, cTo):
pt3d_obj_cam = pt3d_obj @ cTo[0:3, 0:3] + cTo[0:3, 3]
return pt3d_obj_cam
def compute2d(pt3d_objs, cTo, K):
pt3d_objs_in_cam = np.empty((4,3))
for i in range(pt3d_objs.shape[0]):
pt3d = pt3d_objs[i,:]
pt3d_in_cam = transform(pt3d, cTo)
pt3d_objs_in_cam[i,:] = pt3d_in_cam
pt2d_imgs = np.empty((4,2))
for i in range(pt3d_objs_in_cam.shape[0]):
pt3d = pt3d_objs_in_cam[i,:]
pt2d = project(pt3d, K)
pt2d_imgs[i,:] = pt2d.ravel()
return pt2d_imgs
def main():
parser = argparse.ArgumentParser(description='Pose refinement testing.',
formatter_class=argparse.ArgumentDefaultsHelpFormatter)
parser.add_argument('--niters', type=int, default=10,
help='Number of iterations for the minimisation loop.')
parser.add_argument('--theta-perturb', type=float, default=5.4583,
help='Perturbation value for theta (rotation part) in degree.')
parser.add_argument('--u-perturb', nargs='+', type=float, default=[0.03, -0.02, 0.044],
help='Perturbation value for u (rotation part).')
parser.add_argument('--translation-perturb', nargs='+', type=float, default=[0.2, 0.3, -0.4],
help='Perturbation value for translation.')
parser.add_argument('--refinement', default='translation', choices=['translation', 'rotation', 'both'],
help='Type of refinement (translation, rotation, both), ex: translation will refineme only the translation part.')
args = parser.parse_args()
niters = args.niters
print(f"niters: {niters}")
theta_perturb = args.theta_perturb
print(f"theta perturbation: {theta_perturb}")
u_perturb = args.u_perturb
print(f"u perturbation: {u_perturb}")
translation_perturb = args.translation_perturb
print(f"translation perturbation: {translation_perturb}")
if args.refinement == 'rotation':
refinement_type = RefinementType.ROTATION
elif args.refinement == 'both':
refinement_type = RefinementType.BOTH
else:
refinement_type = RefinementType.TRANSLATION
print(f"refinement type: {refinement_type}")
K = np.eye(3)
print(f"K={K}")
pt3d_objs = np.empty((4,3))
pt3d_objs[0,0] = 2.5
pt3d_objs[0,1] = 3.4
pt3d_objs[0,2] = 10
pt3d_objs[1,0] = -2.5
pt3d_objs[1,1] = 2.75
pt3d_objs[1,2] = 8.4
pt3d_objs[2,0] = 1.4
pt3d_objs[2,1] = 4.5
pt3d_objs[2,2] = 3.7
pt3d_objs[3,0] = -0.75
pt3d_objs[3,1] = 3.5
pt3d_objs[3,2] = 2.75
theta_true = 24.5
u_true = np.array([1, 2, 3])
u_norm_true = u_true / np.linalg.norm(u_true)
thetau_true = np.deg2rad(theta_true) * u_norm_true
cRo_true, _ = cv.Rodrigues(thetau_true)
cTo_true = np.eye(4)
cTo_true[0:3, 0:3] = cRo_true
cTo_true[0, 3] = -1.45
cTo_true[1, 3] = 3.75
cTo_true[2, 3] = 4.12
print(f"thetau_true: {thetau_true}")
print(f"cTo_true:\n{cTo_true}")
cTo_est = np.eye(4)
if refinement_type == RefinementType.ROTATION or refinement_type == RefinementType.BOTH:
theta_est = theta_true + theta_perturb
u_est = u_true + u_perturb
u_norm_est = u_est / np.linalg.norm(u_est)
thetau_est = np.deg2rad(theta_est) * u_norm_est
cRo_est, _ = cv.Rodrigues(thetau_est)
cTo_est[0:3, 0:3] = cRo_est
cTo_est[0:3,3] = cTo_true[0:3,3]
if refinement_type == RefinementType.TRANSLATION or refinement_type == RefinementType.BOTH:
cTo_est[0:3, 0:3] = cTo_true[0:3, 0:3]
cTo_est[0:3,3] = cTo_true[0:3,3] + translation_perturb
print(f"cTo_est:\n{cTo_est}")
pt2d_imgs_true = compute2d(pt3d_objs, cTo_true, K)
# print(f"pt2d_imgs_true:\n{pt2d_imgs_true}")
pt2d_imgs_est = compute2d(pt3d_objs, cTo_est, K)
# print(f"pt2d_imgs_est:\n{pt2d_imgs_est}")
err = computeResiduals(pt2d_imgs_true, pt2d_imgs_est)
print(f"initial error: {np.linalg.norm(err)}")
for nb in range(niters):
print(f"\nIter: {nb}")
J = calcJacobian(pt3d_objs, cTo_est, refinement_type)
pt2d_imgs_est = compute2d(pt3d_objs, cTo_est, K)
# print(f"pt2d_imgs_est:\n{pt2d_imgs_est}")
err = computeResiduals(pt2d_imgs_true, pt2d_imgs_est)
err_norm = np.linalg.norm(err)
print(f"err_norm: {err_norm}")
dq = -np.linalg.pinv(J) @ err
print(f"dq: {dq}")
if refinement_type == RefinementType.ROTATION or refinement_type == RefinementType.BOTH:
rotation_vector_, _ = cv.Rodrigues(cTo_est[0:3, 0:3])
rotation_vector = rotation_vector_.T
rotation_vector -= dq[:3]
R, _ = cv.Rodrigues(rotation_vector)
cTo_est[0:3, 0:3] = R
if refinement_type == RefinementType.TRANSLATION or refinement_type == RefinementType.BOTH:
cTo_est[0:3,3] += dq if refinement_type == RefinementType.TRANSLATION else dq[3:]
print(f"cTo_est:\n{cTo_est}")
if err_norm < 1e-4:
print("err_norm < 1e-4, break")
break
print(f"\ncTo_true:\n{cTo_true}")
print(f"cTo_est:\n{cTo_est}")
if __name__ == '__main__':
main()
1 Like
On a similar topic: Feed known translation vector to SolvePnP
1 Like