我需要使用tesseract来阅读僧伽罗语。我正在使用centOS7。我已将训练后的数据复制到/ usr / share / tesseract / tessdata位置。
我已将训练后的数据复制到/ usr / share / tesseract / tessdata位置。
import cv2
import numpy as np
import pytesseract
from pytesseract import image_to_string
img = cv2.imread("sinhala.png")
#crop_img = img[y1:y1+height,x1:x1+width]
text = pytesseract.image_to_string(img,lang="sin")
错误
File "readSinhala.py", line 17, in <module>
text = pytesseract.image_to_string(img,lang="sin")
File "/usr/local/lib/python3.6/site-packages/pytesseract/pytesseract.py", line 309, in image_to_string
}[output_type]()
File "/usr/local/lib/python3.6/site-packages/pytesseract/pytesseract.py", line 308, in <lambda>
Output.STRING: lambda: run_and_get_output(*args),
File "/usr/local/lib/python3.6/site-packages/pytesseract/pytesseract.py", line 218, in run_and_get_output
run_tesseract(**kwargs)
File "/usr/local/lib/python3.6/site-packages/pytesseract/pytesseract.py", line 196, in run_tesseract
raise TesseractError(proc.returncode, get_errors(error_string))
pytesseract.pytesseract.TesseractError: (1, "Tesseract Open Source OCR Engine v3.04.00 with Leptonica read_params_file: Can't open txt Failed loading language 'sin' Tesseract couldn't load any languages! Could not initialize tesseract.")