我尝试使用OpenCV Python检测图像中的文本,特别是带引号的图像。为此,我首先训练一些文字图像。我检测图像中文本的每个字符进行训练。对于具有适当单词样式的图像,可以正确检测字符。但是对于某些图像,文本(字符)区域无法正确检测。我在下面附上了这个代码。如何修改代码以便正确检测字符
import sys
import numpy as np
import cv2
import os
MIN_CONTOUR_AREA = 100
RESIZED_IMAGE_WIDTH = 20
RESIZED_IMAGE_HEIGHT = 30
def main():
imgTrainingNumbers = cv2.imread("E:\God - Level 4 Research Project\Testings\Tharu\godd/jbpoetry.png")
if imgTrainingNumbers is None:
print ("error: image not read from file \n\n")
os.system("pause")
return
imgGray = cv2.cvtColor(imgTrainingNumbers, cv2.COLOR_BGR2GRAY)
imgBlurred = cv2.GaussianBlur(imgGray, (5,5), 0)
imgThresh = cv2.adaptiveThreshold(imgBlurred,
255,
cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
cv2.THRESH_BINARY_INV,
11,
2)
cv2.imshow("imgThresh", imgThresh)
imgThreshCopy = imgThresh.copy()
imgContours, npaContours, npaHierarchy = cv2.findContours(imgThreshCopy,
cv2.RETR_EXTERNAL,
cv2.CHAIN_APPROX_SIMPLE)
npaFlattenedImages = np.empty((0, RESIZED_IMAGE_WIDTH * RESIZED_IMAGE_HEIGHT))
intClassifications = []
intValidChars = [ord('0'), ord('1'), ord('2'), ord('3'), ord('4'), ord('5'), ord('6'), ord('7'), ord('8'), ord('9'),
ord('A'), ord('B'), ord('C'), ord('D'), ord('E'), ord('F'), ord('G'), ord('H'), ord('I'), ord('J'),
ord('K'), ord('L'), ord('M'), ord('N'), ord('O'), ord('P'), ord('Q'), ord('R'), ord('S'), ord('T'),
ord('U'), ord('V'), ord('W'), ord('X'), ord('Y'), ord('Z'),ord('a'),ord('b'),ord('c'),ord('d'),
ord('e'),ord('f'),ord('g'),ord('h'),ord('i'),ord('j'),ord('k'),ord('l'),ord('m'),ord('n'),ord('o'),
ord('p'),ord('q'),ord('r'),ord('s'),ord('t'),ord('u'),ord('v'),ord('w'),ord('x'),ord('y'),ord('z') ]
for npaContour in npaContours:
if cv2.contourArea(npaContour) > MIN_CONTOUR_AREA:
[intX, intY, intW, intH] = cv2.boundingRect(npaContour)
cv2.rectangle(imgTrainingNumbers,
(intX, intY),
(intX+intW,intY+intH),
(0, 0, 255),
2)
imgROI = imgThresh[intY:intY+intH, intX:intX+intW]
imgROIResized = cv2.resize(imgROI, (RESIZED_IMAGE_WIDTH, RESIZED_IMAGE_HEIGHT))
cv2.imshow("imgROI", imgROI)
cv2.imshow("imgROIResized", imgROIResized)
cv2.imshow("training_numbers.png", imgTrainingNumbers)
intChar = cv2.waitKey(0)
if intChar == 27:
sys.exit()
elif intChar in intValidChars:
print(intChar)
intClassifications.append(intChar)
print(intChar)
npaFlattenedImage = imgROIResized.reshape((1, RESIZED_IMAGE_WIDTH * RESIZED_IMAGE_HEIGHT))
npaFlattenedImages = np.append(npaFlattenedImages, npaFlattenedImage, 0)
fltClassifications = np.array(intClassifications, np.float32)
npaClassifications = fltClassifications.reshape((fltClassifications.size, 1))
print ("\n\ntraining complete !!\n")
np.savetxt("classificationsNEWG.txt", npaClassifications)
np.savetxt("flattened_imagesNEWG.txt", npaFlattenedImages)
cv2.destroyAllWindows()
return
if __name__ == "__main__":
main()
答案 0 :(得分:2)
您要做的是一种非常天真的方法,只需应用阈值并检测轮廓在此处不起作用。围绕这项任务发表了大量研究论文。您可以参考这些并尝试实施或可以使用着名的 tesseract OCR 的 image_to_boxes 功能。您可以从here下载它,当您使用python时,您可以从here安装 pytesseract - python wrapper for tesseract ,并使用以下代码来实现您的期望。
checkupdate{
2: true, // index of the row: whether checked or unchecked
5: false,
20: true
}
结果图像
请注意,有许多错误检测,您需要在培训过程中忽略它们。