我正在使用python 3.7和tesseract 4.00,并且尝试使用tesseract进行表检测。
我遵循了讨论here中给出的方法,但是在当前代码中,所有元素的块类型都为1,即“未知”
作为参考,我提供了tesseract中存在的块类型的参考
TableDetector类:
__TRAINED_DATA_PATH = #Tessdata path
def detect_table(self, image, tx_id, do_pre_process=True):
try:
pre_processed_image = image
if do_pre_process:
pre_processed_image = cvtColor(image, COLOR_BGR2GRAY)
pre_processed_image = medianBlur(pre_processed_image, 3)
pre_processed_image = GaussianBlur(pre_processed_image, (3, 3), 0)
conf_score = 0
with PyTessBaseAPI(psm=6, oem=1, lang="eng",
path=self.__TRAINED_DATA_PATH) as api:
pil_image = Image.fromarray(pre_processed_image)
api.SetImage(pil_image)
api.SetVariable("textord_tabfind_find_tables", "true")
api.SetVariable("textord_tablefind_recognize_tables", "true")
api.SetVariable("textord_show_tables", "true")
api.SetVariable("textord_tablefind_show_stats", "true")
x=api.AnalyseLayout()
# level = RIL.BLOCK
for e in iterate_level(x, RIL.BLOCK):
print(e.Orientation())
print(e.BlockType())
except Exception as e:
Logger.log.error("Error in image_to_data : %s" % e, exc_info=True)
return result_dec