我正在尝试清理包含OCR数字表的图像。 您可以在此处查看示例:
测试图片1
我当前的管道如下:
1 /调整图片的宽度为256,保持宽高比
h, w = img.shape[:2]
ratio = 256 / w
img = cv2.resize(img, None, fx=ratio, fy=ratio, interpolation=cv2.INTER_LANCZOS4)
2 /将其转换为灰度
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
3 /由于图像往往在边缘附近有表格边框,因此我从图像边框中删除了3px。
gray = gray[3:-3, 3:-3]
以下两个步骤来自PyImageSearch
4 /应用高斯模糊以消除一些噪声
blurred = cv2.GaussianBlur(gray, (3,3), 0)
5 /应用blackhat运算符(不确定是否需要)
kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (13,5))
blackhat = cv2.morphologyEx(blurred, cv2.MORPH_BLACKHAT, kernel)
6 /使用HoughLines检测并删除长行(表格边框)
edges = imutils.auto_canny(blurred)
# horizontal lines
hlines = cv2.HoughLines(edges,1,np.pi/180,min(100,int(w*.8)),
min_theta=np.radians(85),
max_theta=np.radians(95))
horizontal = [] if hlines is None else [line[0] for line in hlines]
# vertical lines
vlines = cv2.HoughLines(edges,1,np.pi/180,min(100,int(h*.8)),
min_theta=np.radians(-5),
max_theta=np.radians(5))
vertical = [] if vlines is None else [line[0] for line in vlines]
# merge nearby lines using a long and boring function
horizontal = merge_lines(horizontal)
vertical = merge_lines(horizontal)
# draw all the remaining lines onto the blackhat image
# width=3px, color=0 (black) to remove table borders
blackhat = draw_lines(horizontal, blackhat, 0, 3)
blackhat = draw_lines(vertical, blackhat, 0, 3)
7 /(来自PyImageSearch)计算Scharr梯度,然后使用Otsu阈值检测文本区域
def scharr_gradient(img):
sobel_x = cv2.Sobel(img, ddepth=cv2.CV_32F, dx=1, dy=0, ksize=-1)
sobel_x = np.absolute(sobel_x)
(min_, max_) = (np.min(sobel_x), np.max(sobel_x))
sobel_x = (255 * ((sobel_x - min_) / (max_ - min_))).astype(np.uint8)
return sobel_x
scharr = scharr_gradient(blackhat)
kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (20,5))
closed = cv2.morphologyEx(scharr, cv2.MORPH_CLOSE, kernel)
_, thresh = cv2.threshold(closed, 0, 255, cv2.THRESH_OTSU)
8 /将蒙版应用于原始灰度图像以获得清晰的图像
mask = np.bitwise_not(thresh).astype(np.float32)
masked = np.clip(mask + gray, 0, 255).astype(np.uint8)
问题:
我尝试过的其他事情: