Eye In Hand calibration

Dear community,

I am currently working on calibrating my Intel RealSense D435 camera, which is mounted on the tool flange of a 6 DOF robot arm, for a vision-based Pick and Place application. I have written a code that captures images of a chessboard at various user-defined positions and generates the rotation (R_cam2gripper) and translation (T_cam2gripper) matrices to calibrate the camera to the robot’s gripper. The relevant code snippet is included below.

I have two specific questions:

  1. Am I performing the calculations correctly in my code?

  2. After obtaining the R_cam2gripper and T_cam2gripper matrices, what are the next steps to detect objects in the robot’s base frame and perform the Pick and Place operation?

Here’s the relevant portion of my code:

import time
import pyrealsense2 as rs
import cv2
import numpy as np
import robot
import os
from math import sin, cos
os.environ["QT_QPA_PLATFORM"] = "xcb"

def initialize_camera():
    """
    Initialize the Intel RealSense camera.

    Returns:
        pipeline (rs.pipeline): The configured RealSense pipeline.
    """
    pipeline = rs.pipeline()
    config = rs.config()
    config.enable_stream(rs.stream.color, 640, 480, rs.format.bgr8, 30)
    pipeline.start(config)
    return pipeline

def initialize_robot(ip_address="192.168.3.10"):
    """
    Initialize the UR robot.

    Args:
        ip_address (str): The IP address of the UR robot.

    Returns:
        robot (urx.Robot): The initialized robot object.
    """
    my_robot = robot.Robot(ip_address)
    return my_robot

def capture_calibration_images(pipeline, robot, chessboard_size, square_size, num_images=10):
    """
    Capture images from the camera and detect chessboard corners.

    Args:
        pipeline (rs.pipeline): The RealSense pipeline.
        robot (urx.Robot): The UR robot object.
        chessboard_size (tuple): The number of inner corners per a chessboard row and column.
        square_size (float): The size of one square on the chessboard in meters.
        num_images (int): The number of images to capture for calibration.

    Returns:
        objpoints (list): The 3D points in real world space.
        imgpoints (list): The 2D points in image plane.
        robot_poses (list): The robot poses corresponding to each successful image capture.
    """
    criteria = (cv2.TERM_CRITERIA_EPS + cv2.TERM_CRITERIA_MAX_ITER, 30, 0.001)
    objp = np.zeros((np.prod(chessboard_size), 3), np.float32)
    objp[:, :2] = np.mgrid[0:chessboard_size[0], 0:chessboard_size[1]].T.reshape(-1, 2)
    objp *= square_size

    objpoints = []  # 3d points in real world space
    imgpoints = []  # 2d points in image plane
    robot_poses = []

    for i in range(num_images):
        input("Move the robot to a new position and press Enter...")
        robot_pose = robot.get_pose()
        time.sleep(1)
        # receive the latest robot pose
        robot_pose = robot.get_pose()
        
        robot_poses.append(robot_pose)

        frames = pipeline.wait_for_frames()
        color_frame = frames.get_color_frame()
        color_image = np.asanyarray(color_frame.get_data())

        gray = cv2.cvtColor(color_image, cv2.COLOR_BGR2GRAY)
        ret, corners = cv2.findChessboardCorners(gray, chessboard_size, None)

        if ret:
            objpoints.append(objp)
            corners2 = cv2.cornerSubPix(gray, corners, (11, 11), (-1, -1), criteria)
            imgpoints.append(corners2)
            cv2.drawChessboardCorners(color_image, chessboard_size, corners2, ret)
            cv2.imshow('Calibration Image', color_image)
            cv2.waitKey(500)
        else:
            print(f"Failed to detect chessboard at position {i+1}")

    cv2.destroyAllWindows()
    return objpoints, imgpoints, robot_poses

def calibrate_camera(objpoints, imgpoints, image_shape):
    """
    Calibrate the camera given object points and image points.

    Args:
        objpoints (list): The 3D points in real world space.
        imgpoints (list): The 2D points in image plane.
        image_shape (tuple): The shape of the image used for calibration.

    Returns:
        camera_matrix (numpy.ndarray): The camera intrinsic matrix.
        dist_coeffs (numpy.ndarray): The distortion coefficients.
    """
    ret, camera_matrix, dist_coeffs, rvecs, tvecs = cv2.calibrateCamera(
        objpoints, imgpoints, image_shape, None, None
    )
    return camera_matrix, dist_coeffs, rvecs, tvecs

def ur_pose_to_opencv(pose):
    """
    Convert a UR pose to the OpenCV format.

    Args:
        pose (list): The pose from the UR robot in the form [x, y, z, rx, ry, rz].

    Returns:
        rotation (numpy.ndarray): The rotation matrix.
        translation (numpy.ndarray): The translation vector.
    """
    translation = np.array([pose[0], pose[1], pose[2]])

    # Extract the rotation vector (rx, ry, rz)
    rx, ry, rz = pose[3:6]

    # Convert the rotation vector to a rotation matrix using Rodrigues' formula
    def rodrigues_rot_matrix(rx, ry, rz):
        theta = np.linalg.norm([rx, ry, rz])
        if theta < 1e-6:
            return np.eye(3)
        r = np.array([rx, ry, rz]) / theta
        c = cos(theta)
        s = sin(theta)
        C = 1 - c
        x, y, z = r
        R = np.array([
            [c + x * x * C, x * y * C - z * s, x * z * C + y * s],
            [y * x * C + z * s, c + y * y * C, y * z * C - x * s],
            [z * x * C - y * s, z * y * C + x * s, c + z * z * C]
        ])
        return R

    rotation = rodrigues_rot_matrix(rx, ry, rz)

    return rotation, translation

def perform_hand_eye_calibration(robot_poses, rvecs, tvecs):
    """
    Perform hand-eye calibration.

    Args:
        robot_poses (list): The robot poses.
        rvecs (list): The rotation vectors (from camera calibration).
        tvecs (list): The translation vectors (from camera calibration).

    Returns:
        R_cam2gripper (numpy.ndarray): Rotation from camera to gripper.
        T_cam2gripper (numpy.ndarray): Translation from camera to gripper.
    """
    robot_rotations = []
    robot_translations = []

    for pose in robot_poses:
        rot, trans = ur_pose_to_opencv(pose)
        robot_rotations.append(rot)
        robot_translations.append(trans)

    R_cam2gripper, T_cam2gripper = cv2.calibrateHandEye(
        robot_rotations, robot_translations, rvecs, tvecs, method=cv2.CALIB_HAND_EYE_TSAI
    )

    return R_cam2gripper, T_cam2gripper

def main():
    # Initialize the camera and robot
    pipeline = initialize_camera()
    robot = initialize_robot()

    # Chessboard parameters
    chessboard_size = (8, 6)
    square_size = 0.025  # Size of a square in meters

    # Capture calibration images
    objpoints, imgpoints, robot_poses = capture_calibration_images(
        pipeline, robot, chessboard_size, square_size, num_images=10
    )
    
    # Stop the camera pipeline
    pipeline.stop()

    # Calibrate the camera
    camera_matrix, dist_coeffs, rvecs, tvecs = calibrate_camera(
        objpoints, imgpoints, (640,480)
    )

    # Perform hand-eye calibration
    R_cam2gripper, T_cam2gripper = perform_hand_eye_calibration(
        robot_poses, rvecs, tvecs
    )

    print(f"R_cam2gripper:\n{R_cam2gripper}")
    # 3x3 rotation matrix from camera to gripper.
    
    print(f"T_cam2gripper:\n{T_cam2gripper}")
    # 3x1 translation vector from camera to gripper.

    print(f"Camera Matrix:\n{camera_matrix}")
    # Camera intrinsic matrix

    print(f"Distortion Coefficients:\n{dist_coeffs}")
    # Distortion coefficients

if __name__ == "__main__":
    main()