How do I know which coordinates go to which coordinates in the warped image?

JeongHo_Ju · June 12, 2025, 3:53am

Based on the code below, I transformed the image, found which coordinates in the warped image were shifted to which coordinates in the warped image, and drew a Bounding Box. However, the Bounding Box is not being drawn correctly as shown in the given result. I was wondering if I need to do some additional post-processing on the specific coordinates I got, or if there is another way.

class Distort:
    def __init__(self, rng=None):
        self.rng = np.random.default_rng() if rng is None else rng
        self.tps = cv2.createThinPlateSplineShapeTransformer()

    def __call__(self, img, entities, mag=0, prob=1.):
        if self.rng.uniform(0, 1) > prob:
            return img

        w, h = img.size
        img = np.asarray(img)
        srcpt = []
        dstpt = []

        w_33 = 0.33 * w
        w_66 = 0.66 * w

        h_50 = 0.50 * h

        p = 0
        # frac = 0.4

        b = [.2, .3, .4]
        if mag < 0 or mag >= len(b):
            index = len(b) - 1
        else:
            index = mag
        frac = b[index]

        # top pts
        srcpt.append([p, p])
        x = self.rng.uniform(0, frac) * w_33
        y = self.rng.uniform(0, frac) * h_50
        dstpt.append([p + x, p + y])

        srcpt.append([p + w_33, p])
        x = self.rng.uniform(-frac, frac) * w_33
        y = self.rng.uniform(0, frac) * h_50
        dstpt.append([p + w_33 + x, p + y])

        srcpt.append([p + w_66, p])
        x = self.rng.uniform(-frac, frac) * w_33
        y = self.rng.uniform(0, frac) * h_50
        dstpt.append([p + w_66 + x, p + y])

        srcpt.append([w - p, p])
        x = self.rng.uniform(-frac, 0) * w_33
        y = self.rng.uniform(0, frac) * h_50
        dstpt.append([w - p + x, p + y])

        # bottom pts
        srcpt.append([p, h - p])
        x = self.rng.uniform(0, frac) * w_33
        y = self.rng.uniform(-frac, 0) * h_50
        dstpt.append([p + x, h - p + y])

        srcpt.append([p + w_33, h - p])
        x = self.rng.uniform(-frac, frac) * w_33
        y = self.rng.uniform(-frac, 0) * h_50
        dstpt.append([p + w_33 + x, h - p + y])

        srcpt.append([p + w_66, h - p])
        x = self.rng.uniform(-frac, frac) * w_33
        y = self.rng.uniform(-frac, 0) * h_50
        dstpt.append([p + w_66 + x, h - p + y])

        srcpt.append([w - p, h - p])
        x = self.rng.uniform(-frac, 0) * w_33
        y = self.rng.uniform(-frac, 0) * h_50
        dstpt.append([w - p + x, h - p + y])

        n = len(dstpt)
        matches = [cv2.DMatch(i, i, 0) for i in range(n)]
        dst_shape = np.asarray(dstpt).reshape((-1, n, 2))
        src_shape = np.asarray(srcpt).reshape((-1, n, 2))
        self.tps.estimateTransformation(dst_shape, src_shape, matches)
        img = self.tps.warpImage(img)
        img = Image.fromarray(img)
        
        self.tps.estimateTransformation(src_shape, dst_shape, matches)
        
        transformed_entities = []
        for entity in entities:
            phrase, coords = entity
            x1, y1, x2, y2 = coords[0] * w, coords[1] * h, coords[2] * w, coords[3] * h

            left_up_x, left_up_y, right_down_x, right_down_y = float('inf'), float('inf'), -1, -1
            for x, y in [[x1, y1], [x2, y1], [x1, y2], [x2, y2]]:
                transformed_x, transformed_y = self.tps.applyTransformation(np.array([[[x, y]]], dtype=np.float32))[1].tolist()[0][0]
                if left_up_x > transformed_x:
                    left_up_x = transformed_x
                if left_up_y > transformed_y:
                    left_up_y = transformed_y
                if right_down_x < transformed_x:
                    right_down_x = transformed_x
                if right_down_y < transformed_y:
                    right_down_y = transformed_y

            left_up_x, left_up_y, right_down_x, right_down_y = round(left_up_x / w, 3), round(left_up_y / h, 3), round(right_down_x / w, 3), round(right_down_y / h, 3)

            if 0.0 <= left_up_x <= 1.0 and 0.0 <= left_up_y <= 1.0 and 0.0 <= right_down_x <= 1.0 and 0.0 <= right_down_y <= 1.0 and left_up_x < right_down_x and left_up_y < right_down_y:
                transformed_entities.append([phrase, [left_up_x, left_up_y, right_down_x, right_down_y]])

        return img, transformed_entities

Topic		Replies	Views
Perspective transform on single coordinate Python	5	4703	January 9, 2021
[Q] Translate a set of coordinates into the same translated image space? Python numpy , programming	9	702	August 9, 2022
Warp image by keypoints	7	1866	April 4, 2021
Trying to GetPerspective perfectly Python	1	418	February 18, 2022
Rotate the bounding box of object detection and crop it	1	946	March 15, 2022

How do I know which coordinates go to which coordinates in the warped image?

Related topics