我正在尝试使用OCR和Python从一组图像中提取数据。该图像包含表格数据。我面临的问题是我无法正确读取图像的深灰色/黑色底色部分。我尝试了形态学,使用了伽玛和一些图像变换。但是他们似乎没有帮助。我该怎么做才能读取图像的这些部分? 我正在使用python 3.7,pytesseract和OpenCV(CV2) 我的示例图片看起来像这样-
这是我的代码-
import cv2
import os
import numpy as np
import pytesseract
import PIL.Image
import xlwt
from pdf2jpg import pdf2jpg
import glob
from os import listdir
from os.path import isfile, join
input_path = r".pdf"
output_path = r"..."
result = pdf2jpg.convert_pdf2jpg(input_path, output_path, pages= "ALL")
#print(result)
path = "...\\*"
empty_list = []
for x in glob.glob(path):
empty_list.append(x)
print(empty_list) #prints path of all images in the folder
def change_path(image_path):
print(type(path))
new_path = path.replace("\\", "\\\\")
print(new_path)
return new_path
def read_and_display(window_name, image_name):
ori_img = cv2.imread(image_name)
print(image_name)
img = cv2.imshow(window_name, ori_img)
cv2.waitKey(0)
return img
def resize_the_image(image_name): #def resize_the_image(window_name, image_name):
image_name = cv2.imread(image_name)
resized = cv2.resize(image_name,(1500,1300),interpolation=cv2.INTER_AREA)
return resized
def gray_the_image(image_name): #def gray_the_image(window_name, image_name):
image_name = cv2.imread(image_name)
grayedimg = cv2.cvtColor(image_name, cv2.COLOR_BGR2GRAY)
grayedimg = cv2.threshold(grayedimg, 0, 255, cv2.THRESH_BINARY | cv2.THRESH_OTSU)[1]
return grayedimg
def dilate_the_image(image_name): #def dilate_the_image(window_name, image_name):
image_name = cv2.imread(image_name)
dilated_img = cv2.dilate(image_name, np.ones((7,7), np.uint8))
return dilated_img
def bg_the_image(image_name): #def bg_the_image(window_name, image_name):
image_name = cv2.imread(image_name)
bg_img = cv2.medianBlur(image_name, 21)
return bg_img
def difference_the_image(image_name):
image_name = cv2.imread(image_name)
bg_img = cv2.medianBlur(image_name, 21)
diff_img = 255 - cv2.absdiff(image_name, bg_img)
return diff_img
def normalize_the_image(image_name):
image_name = cv2.imread(image_name)
bg_img = cv2.medianBlur(image_name, 21)
diff_img = 255 - cv2.absdiff(image_name, bg_img)
norm_img = diff_img.copy() # Needed for 3.x compatibility
norm_img = cv2.normalize(diff_img, norm_img, alpha=0, beta=255, norm_type=cv2.NORM_MINMAX, dtype=cv2.CV_8UC1)
return norm_img
def threshold_the_image(image_name):
image_name = cv2.imread(image_name)
bg_img = cv2.medianBlur(image_name, 21)
diff_img = 255 - cv2.absdiff(image_name, bg_img)
norm_img = diff_img.copy() # Needed for 3.x compatibility
norm_img = cv2.normalize(diff_img, norm_img, alpha=0, beta=255, norm_type=cv2.NORM_MINMAX, dtype=cv2.CV_8UC1)
_, threshold_image = cv2.threshold(norm_img, 230, 0, cv2.THRESH_TRUNC)
threshold_image = cv2.normalize(threshold_image, threshold_image, alpha=0, beta=255, norm_type=cv2.NORM_MINMAX, dtype=cv2.CV_8UC1)
return threshold_image
def adjust_gamma(image_name, gamma=1.0):
invGamma = 1.0 / gamma
table = np.array([((i / 255.0) ** invGamma) * 255
for i in np.arange(0, 256)]).astype("uint8")
return cv2.LUT(image_name, table)
def adjust_the_image(image_name):
gamma = 0.5 # change the value here to get different result
image_name = cv2.imread(image_name)
bg_img = cv2.medianBlur(image_name, 21)
diff_img = 255 - cv2.absdiff(image_name, bg_img)
norm_img = diff_img.copy() # Needed for 3.x compatibility
norm_img = cv2.normalize(diff_img, norm_img, alpha=0, beta=255, norm_type=cv2.NORM_MINMAX, dtype=cv2.CV_8UC1)
_, threshold_image = cv2.threshold(norm_img, 230, 0, cv2.THRESH_TRUNC)
threshold_image = cv2.normalize(threshold_image, threshold_image, alpha=0, beta=255, norm_type=cv2.NORM_MINMAX, dtype=cv2.CV_8UC1)
adjusted_image = adjust_gamma(threshold_image, gamma=gamma)
adjusted_image = cv2.putText(adjusted_image, "g={}".format(gamma), (10, 30),cv2.FONT_HERSHEY_SIMPLEX, 0.8, (0, 0, 255), 3)
return adjusted_image
def tesseract_call(image_path):
# image_name = cv2.imread(image_path)
cv2.imread(image_path)
pytesseract.pytesseract.tesseract_cmd = r"C:\Program Files (x86)\Tesseract-OCR\tesseract.exe"
#config = ('-1 eng')
config = ('-psm 6')
text = pytesseract.image_to_string(PIL.Image.open(image_path), config = config)
# print(text)
file = open("f.txt","a+", encoding = "utf-8")
file.write(text)
file.close()
return text
for i in empty_list:
redefinedpath = (change_path(i))
print(redefinedpath)
read_and_display("newWindow", i)
resize_the_image(i)
gray_the_image(i)
dilate_the_image(i)
bg_the_image(i)
difference_the_image(i)
normalize_the_image(i)
threshold_the_image(i)
adjust_the_image(i)
tesseract_call(i)
尝试了各种“ gamma”值,无济于事。