我想定位图像的特定区域,以便使用python中的OCR从图像中提取一些特定的文本块。我已经写了一些代码从图像中提取文本,但是我不需要图像上的所有内容,例如我有一张中国身份证,上面有姓名,出生日期,地址和身份证号,我只想提取身份证顶部的姓名和图像底部的身份证号。这是我到目前为止编写的代码。
import cv2
import numpy as np
from PIL import Image
import pytesseract
import argparse
import os
test_image = cv2.imread("E:/sh.jpg")
gray = cv2.cvtColor(test_image, cv2.COLOR_BGR2GRAY)
dilated_img = cv2.dilate(gray, np.ones((5, 5)), iterations=1)
bg_img = cv2.medianBlur(dilated_img, 23)
#--- finding absolute difference to preserve edges ---
diff_img = 255 - cv2.absdiff(test_image[:,:,1], bg_img)
#--- normalizing between 0 to 255 ---
norm_img = cv2.normalize(diff_img, None, alpha=0, beta=255,
norm_type=cv2.NORM_MINMAX, dtype=cv2.CV_8UC1)
cv2.imshow('norm_img', cv2.resize(norm_img, (0, 0), fx = 0.5, fy = 0.5))
#--- Otsu threshold ---
th = cv2.threshold(norm_img, 0, 255, cv2.THRESH_BINARY | cv2.THRESH_OTSU)[1]
cv2.imshow('th', cv2.resize(th, (0, 0), fx = 0.5, fy = 0.5))
filename = "{}.png".format(os.getpid())
cv2.imwrite(filename,th)
text = pytesseract.image_to_string(Image.open(filename), lang='chi_sim')
print(text)
os.remove(filename)
cv2.waitKey(0)