Question

我目前正在从事一个项目，其中的问题陈述是从特定形式的图像中检测手写文本。作为预处理步骤，我以边框的形式提取了文本，并且从图像形式中提取了大约1500张文本图像，其中有50幅是手写的。

问题是我现在如何使用这些提取的图像来训练分类器模型，该模型会将图像分类为打印文本还是手写文本。我没有深度学习的先验知识。任何帮助将不胜感激。我正在上传图像和提取的图像，以及从图像中提取文本的代码。

im_ns = cv.imread('~/Image processing/IMG_20180921_111952.png')
gray = cv.cvtColor(im_ns,cv.COLOR_BGR2GRAY)
blurred_g = cv.GaussianBlur(gray,(11,11),0)

ret, th1 = cv.threshold(blurred_g,127,255,cv.THRESH_BINARY)
th2 = cv.adaptiveThreshold(blurred_g,255,cv.ADAPTIVE_THRESH_MEAN_C,cv.THRESH_BINARY,11,2)
th3 = cv.adaptiveThreshold(blurred_g,255,cv.ADAPTIVE_THRESH_GAUSSIAN_C,cv.THRESH_BINARY,11,2)

##Detecting horizontal Lines and removing them
th3_di1 = th3_di.copy()
hor = int(round(th3_di1.shape[1]/30,0))
hor_struc = cv.getStructuringElement(cv.MORPH_RECT,(hor,1))

bw_hor_er = cv.erode(th3_di1,hor_struc,iterations=1)
bw_hor_di = cv.dilate(th3_di1,hor_struc,iterations=1)

for i in range(0,bw_hor_di.shape[0]):
    for j in range(0,bw_hor_di.shape[1]):
        if bw_hor_di[i,j] == 0:
            th3_di1[i,j] = 255
        else:
            th3_di1[i,j] = th3_di1[i,j]

plt.figure(figsize=(20,25))
plt.imshow(th3_di1,'gray')

# perform a connected component analysis on the thresholded
# image, then initialize a mask to store only the "large"
# components
labels = measure.label(th3_di1, neighbors=4, background=255)
mask = np.zeros(th3_di1.shape, dtype="uint8")

plt.figure(figsize=(30,25))
plt.imshow(labels)


# loop over the unique components
for lab in np.unique(labels):
    # if this is the background label, ignore it
    if lab == 0:
        continue

    # otherwise, construct the label mask and count the
    # number of pixels 
    labelMask = np.zeros(th3_di.shape, dtype="uint8")
    labelMask[labels == lab] = 255
    numPixels = cv.countNonZero(labelMask)

    # if the number of pixels in the component is sufficiently
    # large, then add it to our mask of "large blobs"
    if numPixels > 8:
        mask = cv.add(mask, labelMask)


plt.figure(figsize=(30,24))
plt.imshow(mask,'gray')

# find the contours in the mask, then sort them from left to
# right
cnts = cv.findContours(mask.copy(), cv.RETR_EXTERNAL,
    cv.CHAIN_APPROX_SIMPLE)
cnts = cnts[0] if imutils.is_cv2() else cnts[1]
cnts = contours.sort_contours(cnts)[0]

# loop over the contours to make rectangles for the th3 image with gassian thresholding
for (i, c) in enumerate(cnts):
    # draw the bright spot on the image
    (x,y,w,h) = cv.boundingRect(c)
    #((cX, cY), radius) = cv.minEnclosingCircle(c)
    cv.rectangle(th3,(x,y),(x+w,y+h),(0,255),2)
    cv.putText(th3, "",(x+w+10,y+h),0,0.3,(0,255,0)) 
# show the output image
cv.imshow("Image", th3)
cv.waitKey(10000)
cv.destroyAllWindows()
##Extracting the bounding boxes
idx=0   
for (i, c) in enumerate(cnts):
    # draw the bright spot on the image
    idx += 1
    x,y,w,h = cv.boundingRect(c)
    roi = im_ns[y:y+h,x:x+w]
    #((cX, cY), radius) = cv.minEnclosingCircle(c)
    #cv.rectangle(im_ns,(x,y),(x+w,y+h),(0,255),2)
    cv.imwrite(str(idx)+'.jpg',roi)

图片：

使用openCV进行图像分类，特征提取和模型构建

0 个答案: