我目前正在处理一项任务,我必须从图像中提取字母或数字。我已经使用OpenCV从背景中提取文本,对图像进行阈值处理,使用图像中每个字符下方的行对图像进行去歪斜,然后使用pytesseract
将其馈送到tesseract。我使用的是默认的tesseract eng
受过训练的数据,但对字符的结果非常差。
从该图像中,我运行cv2.HoughLinesP
来查找线和群集线,这些线和线之间的距离都在设定的范围内。从线簇中,我选择y像素值最高的线作为基线,并使用图像的角度对一组固定的ROI进行校正。
您可以看到对字母here进行去偏和裁剪的结果
但是,如果仔细观察彩色图像和更多images,您会发现在某些字母上,尤其是在“ D”上,字符笔画的一部分被“移位”,就像是剪断。似乎该功能使Tesseract经常失败。 我应该采用哪种方法来校正变形?
下面的代码是我用来预处理图像的代码:
def threshold_and_preprocess(input_image):
"""
thresholds input image and performs preprocesing operations
:param input_image: BGR image
:return: Binary image with text as white
"""
input_gray = cv2.cvtColor(input_image, cv2.COLOR_BGR2GRAY)
blur = cv2.GaussianBlur(input_gray, (3, 3), 0)
sharpened = cv2.addWeighted(blur, 1.5, input_gray, -0.5, 0)
denoised = cv2.fastNlMeansDenoising(sharpened, h=13, templateWindowSize=7, searchWindowSize=21)
ret, thresh = cv2.threshold(denoised, 0, 255, cv2.THRESH_BINARY | cv2.THRESH_OTSU)
operated = thresh
return operated
def find_baseline_and_deskew(input_img, line_group):
"""
Given binary input_image and cluster data line_group, return list of tuples(img, point) where image is binary
deskewed image with size of self.output_width and height, and point which is center point of reference line.
:param input_img: Binary image where background is black, features are white
:param line_group: this is a tuple of line coordinate cluster
:return: list of tuples (img, point) where image is binary deskewed image where features are black,
with width, height of self.output_width and self.output_height, point which is center point coordinate,
relative to ROI region, of reference line.
"""
return_list = []
for group in self.cluster_line_groups:
sorted_group = sorted(group, key=lambda x: max(x[0][1], x[0][3]), reverse=True)
representation_line = sorted_group[0]
r_c = representation_line[0]
r_centroid = representation_line[1]
# representation_line is the line selected to use as the baseline
# r_c is the 4 coordinates of the start, end points of the line segment
# r_centroid is just the x,y coordinates of the center of the line
angle = math.atan2(r_c[3] - r_c[1], r_c[2] - r_c[0]) * 180.0 / math.pi
t_img = cv2.bitwise_not(input_img).copy()
rows, cols = t_img.shape
root_mat = cv2.getRotationMatrix2D(representation_line[1], angle, 1)
rotated = cv2.warpAffine(t_img, root_mat, (cols, rows), borderMode=cv2.BORDER_CONSTANT,
borderValue=(255, 255, 255))
cropped_result = rotated[max(int(r_centroid[1] - self.output_height), 0):min(int(r_centroid[1] + 8), rotated.shape[0]),max(0, int(r_centroid[0] - self.output_width)):min(rotated.shape[1],int(r_centroid[0] + self.output_width))]
return_list.append((cropped_result, r_centroid))
return return_list
input_img = cv2.imread("input.png", cv2.IMREAD_COLOR)
processed = threshold_and_preprocess(input_img)
cv2.imshow("black", processed)
lines = calculate_lines(processed)
for img, coord in find_baseline_and_deskew(processed, lines):
cv2.imshow(pytesseract.image_to_string(img,lang="eng", config="--psm 10"), img)
cv2.waitKey(0)
cv2.destroyAllWindows()