我目前正在从事一个项目,其中的问题陈述是从特定形式的图像中检测手写文本。作为预处理步骤,我以边框的形式提取了文本,并且从图像形式中提取了大约1500张文本图像,其中有50幅是手写的。
问题是我现在如何使用这些提取的图像来训练分类器模型,该模型会将图像分类为打印文本还是手写文本。我没有深度学习的先验知识。任何帮助将不胜感激。我正在上传图像和提取的图像,以及从图像中提取文本的代码。
im_ns = cv.imread('~/Image processing/IMG_20180921_111952.png')
gray = cv.cvtColor(im_ns,cv.COLOR_BGR2GRAY)
blurred_g = cv.GaussianBlur(gray,(11,11),0)
ret, th1 = cv.threshold(blurred_g,127,255,cv.THRESH_BINARY)
th2 = cv.adaptiveThreshold(blurred_g,255,cv.ADAPTIVE_THRESH_MEAN_C,cv.THRESH_BINARY,11,2)
th3 = cv.adaptiveThreshold(blurred_g,255,cv.ADAPTIVE_THRESH_GAUSSIAN_C,cv.THRESH_BINARY,11,2)
##Detecting horizontal Lines and removing them
th3_di1 = th3_di.copy()
hor = int(round(th3_di1.shape[1]/30,0))
hor_struc = cv.getStructuringElement(cv.MORPH_RECT,(hor,1))
bw_hor_er = cv.erode(th3_di1,hor_struc,iterations=1)
bw_hor_di = cv.dilate(th3_di1,hor_struc,iterations=1)
for i in range(0,bw_hor_di.shape[0]):
for j in range(0,bw_hor_di.shape[1]):
if bw_hor_di[i,j] == 0:
th3_di1[i,j] = 255
else:
th3_di1[i,j] = th3_di1[i,j]
plt.figure(figsize=(20,25))
plt.imshow(th3_di1,'gray')
# perform a connected component analysis on the thresholded
# image, then initialize a mask to store only the "large"
# components
labels = measure.label(th3_di1, neighbors=4, background=255)
mask = np.zeros(th3_di1.shape, dtype="uint8")
plt.figure(figsize=(30,25))
plt.imshow(labels)
# loop over the unique components
for lab in np.unique(labels):
# if this is the background label, ignore it
if lab == 0:
continue
# otherwise, construct the label mask and count the
# number of pixels
labelMask = np.zeros(th3_di.shape, dtype="uint8")
labelMask[labels == lab] = 255
numPixels = cv.countNonZero(labelMask)
# if the number of pixels in the component is sufficiently
# large, then add it to our mask of "large blobs"
if numPixels > 8:
mask = cv.add(mask, labelMask)
plt.figure(figsize=(30,24))
plt.imshow(mask,'gray')
# find the contours in the mask, then sort them from left to
# right
cnts = cv.findContours(mask.copy(), cv.RETR_EXTERNAL,
cv.CHAIN_APPROX_SIMPLE)
cnts = cnts[0] if imutils.is_cv2() else cnts[1]
cnts = contours.sort_contours(cnts)[0]
# loop over the contours to make rectangles for the th3 image with gassian thresholding
for (i, c) in enumerate(cnts):
# draw the bright spot on the image
(x,y,w,h) = cv.boundingRect(c)
#((cX, cY), radius) = cv.minEnclosingCircle(c)
cv.rectangle(th3,(x,y),(x+w,y+h),(0,255),2)
cv.putText(th3, "",(x+w+10,y+h),0,0.3,(0,255,0))
# show the output image
cv.imshow("Image", th3)
cv.waitKey(10000)
cv.destroyAllWindows()
##Extracting the bounding boxes
idx=0
for (i, c) in enumerate(cnts):
# draw the bright spot on the image
idx += 1
x,y,w,h = cv.boundingRect(c)
roi = im_ns[y:y+h,x:x+w]
#((cX, cY), radius) = cv.minEnclosingCircle(c)
#cv.rectangle(im_ns,(x,y),(x+w,y+h),(0,255),2)
cv.imwrite(str(idx)+'.jpg',roi)