Question

我想定位图像的特定区域，以便使用python中的OCR从图像中提取一些特定的文本块。我已经写了一些代码从图像中提取文本，但是我不需要图像上的所有内容，例如我有一张中国身份证，上面有姓名，出生日期，地址和身份证号，我只想提取身份证顶部的姓名和图像底部的身份证号。这是我到目前为止编写的代码。

import cv2
import numpy as np
from PIL import Image
import pytesseract
import argparse
import os

test_image = cv2.imread("E:/sh.jpg")

gray = cv2.cvtColor(test_image, cv2.COLOR_BGR2GRAY)
dilated_img = cv2.dilate(gray, np.ones((5, 5)), iterations=1)
bg_img = cv2.medianBlur(dilated_img, 23)

#--- finding absolute difference to preserve edges ---
diff_img = 255 - cv2.absdiff(test_image[:,:,1], bg_img)

#--- normalizing between 0 to 255 ---
norm_img = cv2.normalize(diff_img, None, alpha=0, beta=255, 
norm_type=cv2.NORM_MINMAX, dtype=cv2.CV_8UC1)
cv2.imshow('norm_img', cv2.resize(norm_img, (0, 0), fx = 0.5, fy = 0.5))

#--- Otsu threshold ---
th = cv2.threshold(norm_img, 0, 255, cv2.THRESH_BINARY | cv2.THRESH_OTSU)[1]
cv2.imshow('th', cv2.resize(th, (0, 0), fx = 0.5, fy = 0.5))

filename = "{}.png".format(os.getpid())
cv2.imwrite(filename,th)

text = pytesseract.image_to_string(Image.open(filename), lang='chi_sim')
print(text)
os.remove(filename)

cv2.waitKey(0)

如果有人有任何解决方案，请帮助我。

使用python中的OCR从图像中提取特定文本块

0 个答案: