Hi everyone,
I need help determining the position and rotation of an ArUco marker relative to another QR marker using stereo vision and OpenCV.
Brief introduction
I’m working on a 6DOF robot and planning to implement a position compensation mechanism using computer vision and ArUco markers. Due to the robot’s black structure, I used inverted QR codes and then invert the camera image to make marker detection easier.
Problem
I need to determine the rotation of a marker placed on the first arm relative to a base marker (mounted at the bottom). For this purpose, I read and store the rotation matrix of the base marker.
Question 1
If I multiply the rotation matrix of the marker on the first arm by the transposed rotation matrix of the base marker, will I obtain the angular difference between them?
Also, should this multiplication be done as left-multiplication or right-multiplication?
Assuming the answer to Question 1 is yes, I attempted to extract the rotation of the arm marker from its rotation matrix. In my case, the first rotation occurs at the robot base (around the Y-axis), followed by a rotation along the marker’s rotated axis (in my case, the Z-axis).
To obtain the roll and pitch angles, I multiplied the rotation matrices around Y and Z and then derived formulas for the angles of interest. This approach gives results closest to what I expect. However, rotation around the Y-axis also affects the rotation around the Z-axis, as shown in the attached images. The discrepancy is around 5–10 degrees, which seems too large to be attributed to noise.
I also tried using r.as_euler('xzy') and r.as_euler('yzx'), but these give significantly worse results and return rotations for all three axes. Note that rotation around the QR code’s local X-axis is not physically possible in my setup due to the robot’s mechanical constraints.
Question 2
What am I doing wrong that causes rotation around the Y-axis to influence rotation around the Z-axis?
Below is a snippet of the code I am using.
I would greatly appreciate any help. Thanks in advance!
import cv2
import numpy as np
import math
import time
from scipy.spatial.transform import Rotation as Rot
import config
marker_length = config.detected_marker_size_mm / 1000
camera_source = 2
base_marker_id = 54 # base marker ID
def move_to_base(home_tvec, tvec):
return tvec - home_tvec
def rotate_to_base(home_R, R):
return home_R.T @ R
camera_matrix = np.load(f"stored_calibrations/{config.current_camera}/camera_matrix.npy")
dist_coeffs = np.load(f"stored_calibrations/{config.current_camera}/dist_coeffs.npy")
aruco_dict = cv2.aruco.getPredefinedDictionary(config.ARUCO_DICT)
detector = cv2.aruco.ArucoDetector(aruco_dict)
cap = cv2.VideoCapture(camera_source)
ret, frame = cap.read()
gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
corners, ids, _ = detector.detectMarkers(cv2.bitwise_not(gray))
base_i = np.where(ids.flatten() == base_marker_id)[0]
rvecs, tvecs, _ = cv2.aruco.estimatePoseSingleMarkers(
corners,
marker_length,
camera_matrix,
dist_coeffs,
)
home_rvec = rvecs[base_i[0]]
home_tvec = tvecs[base_i[0]]
home_R, _ = cv2.Rodrigues(home_rvec)
while True:
ret, frame = cap.read()
gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
inv = cv2.bitwise_not(gray)
cv2.imshow("inv", inv)
corners, ids, _ = detector.detectMarkers(inv)
if ids is not None:
cv2.aruco.drawDetectedMarkers(frame, corners, ids)
rvecs, tvecs, _ = cv2.aruco.estimatePoseSingleMarkers(
corners,
marker_length,
camera_matrix,
dist_coeffs,
)
for i in range(len(ids)):
global_rvec = rvecs[i]
global_tvec = tvecs[i]
qr_id = ids[i][0]
global_R, _ = cv2.Rodrigues(global_rvec)
R = rotate_to_base(home_R, global_R)
rvec, _ = cv2.Rodrigues(global_R)
tvec = move_to_base(home_tvec, global_tvec)
cv2.drawFrameAxes(
frame,
camera_matrix,
dist_coeffs,
rvec,
global_tvec,
0.03,
)
# I assume marcer can only rotate around Y and then around Z axes
theta_y_deg = np.degrees(np.arctan2(R[0,2], R[2,2]))
theta_z_deg = np.degrees(np.arctan2(R[1,0], R[1,1]))
# I am not shure witch approach is correct
r = Rot.from_matrix(R)
angles = r.as_euler('xzy', degrees=True) # extrinsic
angles2 = r.as_euler('yzx', degrees=True) # intrinsic
lines = [
f"ID {ids[i][0]}",
f"X={tvec[0][0]:.3f} Y={tvec[0][1]:.3f} Z={tvec[0][2]:.3f} m",
f"rot angles X {angles[0]:.1f}, rot Y {angles[1]:.1f}, rot Z {angles[2]:.1f} deg",
f"rot angles2 X {angles2[0]:.1f}, rot Y {angles2[1]:.1f}, rot Z {angles2[2]:.1f} deg",
f"rot manual Y {theta_y_deg:.1f}, rot Z {theta_z_deg:.1f} deg"
]
print( lines[0] + " | " + lines[1] + " | " + lines[2] + " | " + lines[3] )
corner = corners[i][0][0].astype(int)
for j, line in enumerate(lines):
cv2.putText(
frame,
line,
(corner[0], max(corner[1] - 10 - j * 20, 0)),
cv2.FONT_HERSHEY_SIMPLEX,
0.5,
(0, 0, 255),
1,
cv2.LINE_AA,
)
scale = 1.0
resized = cv2.resize(frame, None, fx=scale, fy=scale)
cv2.imshow("Live pose estimation", resized)
if cv2.waitKey(1) & 0xFF == ord("q"):
break
cap.release()
cv2.destroyAllWindows()
