我正在尝试对tesseract进行OCR,以获得更好的结果,我想在将背景噪声发送到tessseract之前将其去除。
我已经知道文本具有固定的颜色,并使用cv2.inrange去除噪点背景,但是问题是背景噪点具有与文本颜色相似的颜色,因此我被这种情况所困扰。 / p>
这是我处理原始测试的图像:
我尝试了什么:
from PIL import Image
from pytesseract import *
import cv2
import numpy as np
def img_hsv_mask_white(img):
hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)
# for hsv, OpenCV uses H: 0-179, S: 0-255, V: 0-255
lower_hsv = np.array([0,0,185])
upper_hsv = np.array([179,17,235])
mask = cv2.inRange(hsv, lower_hsv, upper_hsv)
blur = cv2.blur(mask,(3,3))
img2 = cv2.bitwise_and(img, img, mask = blur)
#cv2.imshow("mask", mask)
#cv2.waitKey (0)
return img2
def img_hsv_mask_black(img):
hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)
# for hsv, OpenCV uses H: 0-179, S: 0-255, V: 0-255
lower_hsv = np.array([0,0,0])
upper_hsv = np.array([60,80,70])
mask = cv2.inRange(hsv, lower_hsv, upper_hsv)
blur = cv2.blur(mask,(8,8))
#return blur
img2 = cv2.bitwise_and(img, img, mask = blur)
#cv2.imshow("mask", mask)
#cv2.waitKey (0)
return img2
def immerge(img1, img2):
img = cv2.bitwise_and(img1,img2)
return img
#require module: numpy, opencv-python, Pillow, pytesseract
if __name__ == "__main__":
pytesseract.tesseract_cmd = r'C:\Program Files\Tesseract-OCR\tesseract'
#print(pytesseract.get_tesseract_version())
for x in range(1,9):
file = str.format("0711/{0}.png",x)
srcimg = cv2.imread(file,cv2.IMREAD_UNCHANGED)
white = img_hsv_mask_white(srcimg)
black = img_hsv_mask_black(srcimg)
merged = immerge(white, black)
#cv2.imwrite("result.png",mask)
code = pytesseract.image_to_string(merged, lang ='eng')
print(code)
cv2.imshow(file, merged)
cv2.waitKey(0)
#break
答案 0 :(得分:0)
从第一个结果开始,您可以消除以下噪声:
太大或太小而不能变成字母
未与其余文本垂直居中
import cv2 as cv
import numpy as np
im = cv.imread('ocr.png')
imgray = cv.cvtColor(im, cv.COLOR_BGR2GRAY)
ret, thresh = cv.threshold(imgray, 127, 255, 0)
def size_threshold(bw, minimum, maximum):
retval, labels, stats, centroids = cv.connectedComponentsWithStats(bw)
for val in np.where((stats[:, 4] < minimum) + (stats[:, 4] > maximum))[0]:
labels[labels==val] = 0
return (labels > 0).astype(np.uint8) * 255
def y_centroid_threshold(bw, minimum, maximum):
retval, labels, stats, centroids = cv.connectedComponentsWithStats(bw)
for val in np.where((centroids[:, 1] < minimum) + (centroids[:, 1] > maximum))[0]:
labels[labels==val] = 0
return (labels > 0).astype(np.uint8) * 255
sized = size_threshold(thresh, 60, 300)
centered = y_centroid_threshold(sized, 40, 63)
cv.imwrite('ocr_out.png', centered)