I have an example here and I’m stuck. I want to import a JPG file and extract the number from it. I want to remove the shading and only retain the contours in the middle. Is there a way to accomplish this effectively?
import cv2
import pytesseract
import numpy as np
# Path to the marked file
marking_path = "markierung_4.jpg"
# Method for preprocessing the image
def preprocess_image(image):
# Convert the image to grayscale
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
# Apply adaptive thresholding
thresh = cv2.adaptiveThreshold(gray, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY_INV, 11, 2)
# Use morphological operations to improve the contours
kernel = np.ones((3, 3), np.uint8)
eroded = cv2.erode(thresh, kernel, iterations=1)
dilated = cv2.dilate(eroded, kernel, iterations=1)
# Display the preprocessing steps as images
cv2.imshow("Original Image", image)
cv2.imshow("Grayscale", gray)
cv2.imshow("Threshold", thresh)
cv2.imshow("Preprocessed Image", dilated)
cv2.waitKey(0)
cv2.destroyAllWindows()
return dilated
# Method for number recognition using Tesseract
def recognize_numbers(image):
recognized_text = pytesseract.image_to_string(image, config='--psm 10 --oem 3')
return recognized_text
# Load the marked image
marked_image = cv2.imread(marking_path)
if marked_image is not None:
# Preprocess the image
preprocessed_image = preprocess_image(marked_image)
# Recognize numbers using Tesseract
recognized_number = recognize_numbers(preprocessed_image)
print("Recognized Number:", recognized_number)
else:
print(f"Error loading the image {marking_path}")
Here the original Photo:
and the result is:
Recognized Number: 4)