[Q] Translate a set of coordinates into the same translated image space?

weight_theta · August 9, 2022, 10:18am

Sure, please find the entire code snipped below. You wont be able to see a boundig box in the second image, because the translation offsets them out of the image or makes them so large that they are outside the image plane.

def random_translation(image_path, bounding_box, factor = 20):
    colors = [[255, 0, 0], [0, 255, 0], [0, 0, 255],
              [255, 255, 0], [255, 127, 127], [255, 165, 0],
              [255, 105, 180], [64, 224, 208], [134, 1, 175],
              [127, 127, 127], [116, 86, 74], [0, 0, 0],
              [128, 0, 128], [0, 128, 128], [128, 0, 0],
              [0, 255, 255], [128, 128, 128], [255, 0, 255],
              [0, 0, 128], [255, 105, 180], [128, 128, 0]]
    
    class_names = ["aeroplane","bicycle","bird","boat","bottle","bus","car",
        "cat","chair","cow","diningtable","dog","horse","motorbike","person",
        "pottedplant","sheep","sofa","train","tvmonitor"]
    
    image = cv.imread(image_path)
    height, width = image.shape[:2]
    
    # 0. Translation Variables
    translate_upper_bound = float(height / 100 * factor)
    translate_lower_bound = float(height / 100 * factor) * -1
    
    # uniform vals to translate into x coord t_x and y coord t_y
    t_x = np.random.uniform(low=translate_lower_bound, high=translate_upper_bound)
    t_y = np.random.uniform(low=translate_lower_bound, high=translate_upper_bound)
    
    # Translation matrix T
    T = np.float32([[1, 0, t_x], [0, 1, t_y]])
    img_translation = cv.warpAffine(image, T, (width, height)) 
    
    # 1. Original Image and Bounding Box
    height, width, _  = image.shape
    
    class_pred = int(bounding_box[0])
    bounding_box = bounding_box[1:]
    assert len(bounding_box) == 4, "Bounding box prediction exceed x,y,w,h."
    # extract x midpoint, y midpoint, w width and h height
    x = bounding_box[0] 
    y = bounding_box[1]
    w = bounding_box[2]
    h = bounding_box[3]
    l = int((x - w / 2) * width) 
    r = int((x + w / 2) * width)
    t = int((y - h / 2) * height)
    b = int((y + h / 2) * height)
    
    if l < 0:
        l = 0
    if r > width - 1:
        r = width - 1
    if t < 0:
        t = 0
    if b > height - 1:
        b = height - 1

    image = cv.rectangle(image, (l, t), (r, b), colors[class_pred], 2)
    (width, height), _ = cv.getTextSize(class_names[class_pred], cv.FONT_HERSHEY_SIMPLEX, 0.6, 2)
         
    image = cv.rectangle(image, (l, t + 20), (l + width, t), colors[class_pred], -1)
    image = cv.putText(image, class_names[class_pred], (l, t + 15),
                      cv.FONT_HERSHEY_SIMPLEX, 0.6, [255, 255, 255], 2)
    
    #2. Translated Image and Bounding Box

    height, width, _  = img_translation.shape
    x = bounding_box[0] 
    y = bounding_box[1]
    w = bounding_box[2] 
    h = bounding_box[3] 
    
    # this works but is incorrect, as it doesnt transform the annotated x,y labels
    # it draws the box where it would be and then shifts the entire bounding box
    #l = int((x - w / 2) * width + t_x)
    #r = int((x + w / 2) * width + t_x)
    #t = int((y - h / 2) * height + t_y)
    #b = int((y + h / 2) * height + t_y)
    l = int((x - w / 2) * width)
    r = int((x + w / 2) * width)
    t = int((y - h / 2) * height)
    b = int((y + h / 2) * height)
    if l < 0:
        l = 0
    if r > width - 1:
        r = width - 1
    if t < 0:
        t = 0
    if b > height - 1:
        b = height - 1

    img_translation = cv.rectangle(img_translation, (l, t), (r, b), colors[class_pred], 2)
    (width, height), _ = cv.getTextSize(class_names[class_pred], cv.FONT_HERSHEY_SIMPLEX, 0.6, 2)
         
    img_translation = cv.rectangle(img_translation, (l, t + 20), (l + width, t), colors[class_pred], -1)
    img_translation = cv.putText(img_translation, class_names[class_pred], (l, t + 15),
                      cv.FONT_HERSHEY_SIMPLEX, 0.6, [255, 255, 255], 2)
    
    # 3. Plot results
    plt.subplot(1,2,1)
    plt.imshow(image)
    plt.subplot(1,2,2)
    plt.imshow(img_translation)

bbox = [11, 0.34419263456090654, 0.611, 0.4164305949008499, 0.262]
random_translation(image_path = 'C:/Users/username/anaconda3/envs/yolo/yolo/data/images/000001.jpg', bounding_box = bbox, factor= 20)

Topic		Replies	Views
Homography matrix Python calib3d	2	134	June 27, 2024
Perspective transform on single coordinate Python	5	4639	January 9, 2021
Position coordinates on geometric image Python	8	1088	September 5, 2024
Rotate the bounding box of object detection and crop it	1	935	March 15, 2022
Doubt in Bounding Rectangle Algorithm programming	13	844	September 16, 2023

[Q] Translate a set of coordinates into the same translated image space?

Related topics