I have the following Python / OpenCV code which is supposed to take a filled out document (new.png), line it up with the reference document (ref.png) and put the result in output.png. It runs fine, locally, in 3.5s, however, on prod, it will sometimes take upwards of 1.5m and that’s assuming Python doesn’t just kill it all together for using too many resources.
Here’s my code:
import sys
import cv2
import numpy as np
if len(sys.argv) != 4:
print('USAGE')
print(' python3 diff.py ref.png new.png output.png')
sys.exit()
GOOD_MATCH_PERCENT = 0.15
def alignImages(im1, im2):
# Convert images to grayscale
#im1Gray = cv2.cvtColor(im1, cv2.COLOR_BGR2GRAY)
#im2Gray = cv2.cvtColor(im2, cv2.COLOR_BGR2GRAY)
# Detect ORB features and compute descriptors.
orb = cv2.AKAZE_create()
#orb = cv2.ORB_create(500)
keypoints1, descriptors1 = orb.detectAndCompute(im1, None)
keypoints2, descriptors2 = orb.detectAndCompute(im2, None)
# Match features.
matcher = cv2.DescriptorMatcher_create(cv2.DESCRIPTOR_MATCHER_BRUTEFORCE_HAMMING)
matches = matcher.match(descriptors1, descriptors2, None)
# Sort matches by score
matches.sort(key=lambda x: x.distance, reverse=False)
# Remove not so good matches
numGoodMatches = int(len(matches) * GOOD_MATCH_PERCENT)
print(matches[numGoodMatches].distance)
matches = matches[:numGoodMatches]
# Draw top matches
imMatches = cv2.drawMatches(im1, keypoints1, im2, keypoints2, matches, None)
cv2.imwrite("matches.jpg", imMatches)
# Extract location of good matches
points1 = np.zeros((len(matches), 2), dtype=np.float32)
points2 = np.zeros((len(matches), 2), dtype=np.float32)
for i, match in enumerate(matches):
points1[i, :] = keypoints1[match.queryIdx].pt
points2[i, :] = keypoints2[match.trainIdx].pt
# Find homography
h, mask = cv2.findHomography(points1, points2, cv2.RANSAC)
# Use homography
height, width = im2.shape
im1Reg = cv2.warpPerspective(im1, h, (width, height))
return im1Reg, h
def removeOverlap(refBW, newBW):
# invert each
refBW = 255 - refBW
newBW = 255 - newBW
# get absdiff
xor = cv2.absdiff(refBW, newBW)
result = cv2.bitwise_and(xor, newBW)
# invert
result = 255 - result
return result
def offset(img, xOffset, yOffset):
# The number of pixels
num_rows, num_cols = img.shape[:2]
# Creating a translation matrix
translation_matrix = np.float32([ [1,0,xOffset], [0,1,yOffset] ])
# Image translation
img_translation = cv2.warpAffine(img, translation_matrix, (num_cols,num_rows), borderValue = (255,255,255))
return img_translation
# the ink will often bleed out on printouts ever so slightly
# to eliminate that we'll apply a "jitter" of sorts
refFilename = sys.argv[1]
imFilename = sys.argv[2]
outFilename = sys.argv[3]
imRef = cv2.imread(refFilename, cv2.IMREAD_GRAYSCALE)
im = cv2.imread(imFilename, cv2.IMREAD_GRAYSCALE)
imNew, h = alignImages(im, imRef)
refBW = cv2.threshold(imRef, 0, 255, cv2.THRESH_BINARY+cv2.THRESH_OTSU)[1]
newBW = cv2.threshold(imNew, 0, 255, cv2.THRESH_BINARY+cv2.THRESH_OTSU)[1]
for x in range (-2, 2):
for y in range (-2, 2):
newBW = removeOverlap(offset(refBW, x, y), newBW)
cv2.imwrite(outFilename, newBW)
ref.png can be found at https://www.terrafrost.com/ref.png and new.png can be found at https://www.terrafrost.com/new.png
Is there anything obvious that I could be doing to improve the speed of my program?
On prod I’m running Python 3.5.3 and OpenCV 4.4.0. uname -r
returns 4.14.301-224.520.amzn2.x86_64 (Linux).
Locally I’m running Python 3.5.3 and OpenCVC 4.4.0 as well. uname -r
returns 4.19.128-microsoft-standard
They’re both running in Docker containers - prod is running on an AWS ECS instance whilst locally I’m running it on Ubuntu 20.04 in WSL2 / Windows 11.
Any ideas?