Reviewing My Approach for Relative Transformation Between ArUco Markers in OpenCV

I am working on estimating the relative transformation between ArUco markers using OpenCV. My setup consists of multiple markers placed in a scene, and I compute their rotation and translation matrices relative to a reference marker. However, the results I am getting do not seem to be logically consistent with my physical setup.
My Questions

  1. Is my formula for computing relative transformation correct in this context?
  2. Am I misinterpreting the way rotation and translation are derived from cv2.solvePnPRansac()?
  3. Should I consider a different method to estimate relative marker poses?
  4. Are there any recommended debugging steps to verify my implementation?
import cv2
import numpy as np
import glob
import os

# ======================== CONFIGURATION ======================== #

# Define the path to the directory containing the images
image_directory = r'C:\Users\AR07180\Documents\ZED\Images\left'
output_directory = r'C:\Users\AR07180\Documents\ZED\Phase1-Relative-transformation\pose_estimation_ref4-v2'
os.makedirs(output_directory, exist_ok=True)

# Define output file for saving relative transformations
relative_pose_output = os.path.join(output_directory, 'relative_transformations_ref4-v2.txt')

# Define the ArUco marker dictionary
aruco_dict_type = cv2.aruco.DICT_6X6_250  # Adjust based on your markers
aruco_dict = cv2.aruco.getPredefinedDictionary(aruco_dict_type)

# Initialize detector parameters
parameters = cv2.aruco.DetectorParameters()
detector = cv2.aruco.ArucoDetector(aruco_dict, parameters)

# Define the real-world marker size in meters
marker_length = 0.16  # Adjust based on your actual marker size

# Camera intrinsic parameters 
fx, fy, cx, cy = 1065.91, 1066.58, 1101.56, 616.181
camera_matrix = np.array([[fx, 0, cx], [0, fy, cy], [0, 0, 1]], dtype=np.float32)
dist_coeffs = np.array([-0.0724717, 0.0593418, 0.000271638, -0.000149516, -0.0257159], dtype=np.float32).reshape(-1, 1)

# Define the 3D coordinates of the marker corners in the marker's coordinate system
marker_corners_3d = np.array([
    [-marker_length / 2, marker_length / 2, 0],
    [marker_length / 2, marker_length / 2, 0],
    [marker_length / 2, -marker_length / 2, 0],
    [-marker_length / 2, -marker_length / 2, 0]
], dtype=np.float32)

# Define reference marker ID
reference_id = 4

# Store marker transformations
marker_poses = {}

# Define reprojection error threshold
reprojection_error_threshold = 1.0  # Adjust based on your tolerance

# ======================== IMAGE PROCESSING ======================== #

# Get all image files in the directory
image_files = glob.glob(os.path.join(image_directory, '*.png'))

if not image_files:
    print(f"[ERROR] No images found in directory: {image_directory}")
    exit()
else:
    print(f"[INFO] Found {len(image_files)} images in directory: {image_directory}")

# Process each image
for image_file in image_files:
    print(f"\n[INFO] Processing image: {os.path.basename(image_file)}")

    # Read the image
    image = cv2.imread(image_file)

    if image is None:
        print(f"[ERROR] Failed to load image: {image_file}")
        continue

    # Apply distortion correction to the image
    undistorted_image = cv2.undistort(image, camera_matrix, dist_coeffs)

    # Convert the image to grayscale
    gray = cv2.cvtColor(undistorted_image, cv2.COLOR_BGR2GRAY)

    # Detect ArUco markers
    corners, ids, rejected = detector.detectMarkers(gray)

    if ids is not None:
        print(f"[INFO] Detected {len(ids)} marker(s) in {os.path.basename(image_file)}: {ids.flatten()}")

        # Iterate over each detected marker
        for i, marker_id in enumerate(ids.flatten()):
            print(f"[DEBUG] Processing Marker ID: {marker_id}")

            # Get the 2D image points of the marker corners
            marker_corners_2d = corners[i].reshape(-1, 2)

            # Estimate the pose of the marker using solvePnPRansac
            success, rvec, tvec, inliers = cv2.solvePnPRansac(
                marker_corners_3d,
                marker_corners_2d,
                camera_matrix,
                dist_coeffs
            )

            if success:
                # Convert rotation vector to rotation matrix
                R, _ = cv2.Rodrigues(rvec)
                t = tvec.flatten()

                # Store transformation matrix
                marker_poses[marker_id] = (R, t)

                print(f"[INFO] Pose estimated for Marker {marker_id}")
                print(f"[DEBUG] Rotation Matrix:\n{R}")
                print(f"[DEBUG] Translation Vector:\n{t}")

                # Draw detected markers
                cv2.aruco.drawDetectedMarkers(undistorted_image, corners, ids)
                cv2.drawFrameAxes(undistorted_image, camera_matrix, dist_coeffs, rvec, tvec, 0.1)

            else:
                print(f"[WARNING] Pose estimation failed for Marker {marker_id}")

    else:
        print(f"[WARNING] No markers detected in {os.path.basename(image_file)}.")

    # Display the processed image
    cv2.imshow('Marker Pose Estimation', undistorted_image)
    cv2.waitKey(500)

cv2.destroyAllWindows()

# ======================== COMPUTE RELATIVE TRANSFORMATIONS ======================== #

if reference_id not in marker_poses:
    print(f"[ERROR] Reference marker ID {reference_id} not found in detected markers. Cannot compute relative transformations.")
else:
    print("\n[INFO] Computing relative transformations to Reference Marker ID:", reference_id)
    
    # Get reference marker transformation
    R1, t1 = marker_poses[reference_id]
    
    # Compute relative transformations
    relative_transformations = {}

    for marker_id, (R2, t2) in marker_poses.items():
        if marker_id == reference_id:
            continue  # Skip the reference marker itself

        # Compute relative rotation
        R_rel = R2 @ R1.T

        # Compute relative translation
        t_rel = t2.T - R_rel @ t1.T

        # Store the relative transformation
        relative_transformations[marker_id] = (R_rel, t_rel)

        print(f"[INFO] Relative transformation computed: Marker {reference_id} → Marker {marker_id}")
        print(f"[DEBUG] Relative Rotation Matrix:\n{R_rel}")
        print(f"[DEBUG] Relative Translation Vector:\n{t_rel}")

    # Save the relative transformations to a file
    with open(relative_pose_output, 'w') as f:
        for marker_id, (R_rel, t_rel) in relative_transformations.items():
            f.write(f"Relative transformation from Marker {reference_id} to Marker {marker_id}:\n")
            f.write("Relative Rotation Matrix:\n")
            f.write(np.array2string(R_rel, formatter={'float_kind': lambda x: f'{x:12.8f}'}))
            f.write("\n")
            f.write("Relative Translation Vector:\n")
            f.write(np.array2string(t_rel, formatter={'float_kind': lambda x: f'{x:12.8f}'}))
            f.write("\n\n")

    print(f"[INFO] Relative transformations saved in {relative_pose_output}")

calculate 4x4 matrices from rvec (+cv.Rodrigues) and tvec.

def mat_from_rtvec(rvec, tvec):
    T = np.eye(4)
    (R, jac) = cv.Rodrigues(rvec)
    T[0:3, 0:3] = R
    T[0:3, 3] = tvec
    return T

then you can multiply and invert those 4x4 matrices.

give them names encoding each frame involved in the transformation. that way, they line up like dominos.

an aruco transform encodes the marker’s pose in the camera frame, or equivalently, a transform from marker space to camera space: T_cam_marker.

if you had T_cam_refmarker and T_cam_marker2 from detecting those markers, then T_refmarker_marker2 = T_refmarker_cam @ T_cam_marker2 where T_refmarker_cam = np.linalg.inv(T_cam_refmarker)

Thank you for your time and advice.
I made the changes, but still the result is not aligned with reality