Remove Grid in the Floor Plan

Hi all,

I’m trying to remove all the grid lines from this floorplan to extract the text on the floor plan such as RPP A11, A12. I have tried this code so far:

import cv2
import numpy as np
import sys
import pickle
import pytesseract

image = cv2.imread(‘images/test.jpg’)
image1 = cv2.cvtColor(image,cv2.COLOR_BGR2GRAY)
#image = cv2.cvtColor(image1,cv2.COLOR_BGR2HSV)
ret,thresh = cv2.threshold(image1,130,240,0)

Any ideas or thoughts would be appreciated.

Additional Info: I tried findcontours function and return thousand of contours. What would be the best way to select the contours that I’m interested among thousand of them?

crosspost:

UPDATED: I’m able to remove horizontal and vertical lines but seem like it is not reading correct on the text.

Here is the Updated Code:

import cv2
import numpy as np
import sys
import pickle
import pytesseract
image = cv2.imread('images/test.jpg')
image1 = cv2.cvtColor(image,cv2.COLOR_BGR2GRAY)
gray = cv2.bitwise_not(image1)
bw = cv2.adaptiveThreshold(gray, 255, cv2.ADAPTIVE_THRESH_MEAN_C, \
                                cv2.THRESH_BINARY, 19, -11)
horizontal = np.copy(bw)
vertical = np.copy(bw)
cols = horizontal.shape[1]
rows = vertical.shape[0]

verticalsize = rows // 30
horizontal_size = cols // 30

horizontalStructure = cv2.getStructuringElement(cv2.MORPH_RECT, (horizontal_size, 1))

detected_horizontal_lines = cv2.morphologyEx(bw, cv2.MORPH_OPEN, horizontalStructure, iterations=2)
cnts = cv2.findContours(detected_horizontal_lines, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
cnts = cnts[0] if len(cnts) == 2 else cnts[1]
for c in cnts:
    cv2.drawContours(image, [c], -1, (255,255,255), 2)

verticalStructure = cv2.getStructuringElement(cv2.MORPH_RECT, (1, verticalsize))
detected_vertical_lines = cv2.morphologyEx(bw, cv2.MORPH_OPEN, verticalStructure, iterations=2)
cnts = cv2.findContours(detected_vertical_lines, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
cnts = cnts[0] if len(cnts) == 2 else cnts[1]
for c in cnts:
    cv2.drawContours(image, [c], -1, (255,255,255), 2)

pytesseract.pytesseract.tesseract_cmd = ('C:\Program Files\Tesseract-OCR\/tesseract.exe')
text = pytesseract.image_to_string(image)


print(text)
cv2.imshow('net',image)
cv2.waitKey(0)

Here is the result: