I am trying to make contours only in red rectangle areas in the original image. So that OpenCV will send only specific text for tesseract OCR. My current code is making contours over the whole tables. I am not much aware of OpenCV. How I can add red Square detection part in my code?
Code:
import cv2port cv2
import matplotlib.pyplot as plt
def mark_region(image_path):
image = cv2.imread(image_path)
#CONFUSION IN THIS PART
lower = np.array([0,175,20], np.uint8)
upper = np.array([10, 255,255], np.uint8)
mask = cv2.inRange(image, lower, upper)
#CONFUSION IN THIS PART
define threshold of regions to ignore
THRESHOLD_REGION_IGNORE = 40
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
blur = cv2.GaussianBlur(gray, (3,3), 0)
thresh = cv2.adaptiveThreshold(blur,255,cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY_INV,11,30)
Dilate to combine adjacent text contours
kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (3,3))
dilate = cv2.dilate(thresh, kernel, iterations=4)
Find contours, highlight text areas, and extract ROIs
cnts = cv2.findContours(dilate, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
cnts = cnts[0] if len(cnts) == 2 else cnts[1]
line_items_coordinates =
for c in cnts:
area = cv2.contourArea(c)
x, y, w, h = cv2.boundingRect(c)
if w < THRESHOLD_REGION_IGNORE or h < THRESHOLD_REGION_IGNORE:
continue
image = cv2.rectangle(image, (x,y), (x+w, y+h), color=(255,0,255), thickness=1)
line_items_coordinates.append([(x,y), (x+w, y+h)])
return image, line_items_coordinates
#define the mark_region method
FILENAME = r"images1.jpeg" # <— change this to be the file you want
image, line_items_coordinates = mark_region(FILENAME)
plt.figure(figsize=(20,20))
plt.imshow(image)
plt.savefig(“image-with-regions.png”) # <— added this to output an image
mark_region
Original image: