Question

我面临着一个严重的问题，找不到任何解决方案。

目标是：我正在尝试制作captcha recognition，我分离了字母和数字的图像，对图像进行了预处理，然后建立了模型，然后尝试使用真实数据，结果都是课：

这不是唯一的结果，因为大多数预测都是针对某个模型的ssss，如果我尝试另一次尝试则为1111 这是来自输入图像的示例：

大多数数据是清晰的，字母周围没有黑块这是我完成的步骤和代码：

1-准备数据矩阵作为输入矩阵：

data = []
labels = []
LETTER_IMAGES_FOLDER = "/content/drive/My Drive/Data/extracted_letter_images"

for image_file in paths.list_images(LETTER_IMAGES_FOLDER):
# # Load the image and convert it to grayscale
  image = cv2.imread(image_file)
  image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
  image = cv2.blur(image,(5,5))

  image = cv2.resize(image, (40, 40),interpolation = cv2.INTER_CUBIC)
  # plt.imshow(image)
  image = np.expand_dims(image, axis=2)
  # Grab the name of the letter based on the folder it was in
  label = image_file.split(os.path.sep)[-2]
  # Add the letter image and it's label to our training data
  data.append(image)
  labels.append(label)

# scale the raw pixel intensities to the range [0, 1] (this improves training)
data = np.array(data, dtype="float32") / 255.0
labels = np.array(labels)

结果接近平衡，数量为： 33 labels

(array(['1', '2', '3', '4', '5', '6', '7', '8', '9', 'A', 'B', 'C', 'D',
    'E', 'F', 'G', 'H', 'J', 'K', 'L', 'M', 'N', 'P', 'Q', 'R', 'S',
    'T', 'U', 'V', 'W', 'X', 'Y', 'Z'], dtype='<U1'),
array([8102, 6393, 6332, 6316, 6427, 6173, 6699, 6404, 6956, 6272, 6331,
    6353, 6328, 6607, 6250, 6396, 6466, 5985, 6421, 6314, 6196, 6502,
    6542, 6417, 6435, 6421, 6396, 6341, 6107, 6131, 6360, 6383, 6457]))

2-次训练拆分：

lb = LabelBinarizer()
lb_label = lb.fit_transform(label)
(X_train, X_test, Y_train, Y_test) = train_test_split(data,lb_label, test_size=0.25, random_state=0)

3-建立模型并保存：

MODEL_FILENAME = "/content/drive/My Drive/Data/model1.hdf5"
MODEL_LABELS_FILENAME = "/content/drive/My Drive/Data/labels1.dat"
# Save the mapping from labels to one-hot encodings.
# We'll need this later when we use the model to decode what it's predictions mean
with open(MODEL_LABELS_FILENAME, "wb") as f:
    pickle.dump(lb, f)

model=Sequential()
# First convolutional layer with max pooling
model.add(Conv2D(35, (2, 2), padding="same", input_shape=(40, 40, 1), activation="relu"))
model.add(MaxPooling2D(pool_size=(2, 2),  strides=(1, 1)))

# Second convolutional layer with max pooling
model.add(Conv2D(40, (2, 2), padding="same", activation="relu"))
model.add(MaxPooling2D(pool_size=(2, 2),  strides=(1, 1)))
# Second convolutional layer with max pooling
model.add(Conv2D(50, (2, 2), padding="same", activation="relu"))
model.add(MaxPooling2D(pool_size=(2, 2),  strides=(1, 1)))
# Hidden layer with 500 nodes
# S4 Pooling Layer


#Flatten the CNN output so that we can connect it with fully connected layers
model.add(Flatten())

model.add(Dense(33, activation="softmax"))

# Ask Keras to build the TensorFlow model behind the scenes
model.compile(loss="categorical_crossentropy", optimizer='adam', metrics=["accuracy"])

model.fit(X_train, Y_train, validation_data=(X_test, Y_test), batch_size=32, epochs=10, verbose=1)
model.save(MODEL_FILENAME)

该模型的摘要为：

Train on 159159 samples, validate on 53054 samples
Epoch 1/10
159159/159159 [==============================] - 44s 275us/step - loss: 0.4009 - acc: 0.8936 - val_loss: 0.1439 - val_acc: 0.9642
Epoch 2/10
159159/159159 [==============================] - 43s 273us/step - loss: 0.0982 - acc: 0.9752 - val_loss: 0.0638 - val_acc: 0.9849
Epoch 3/10
159159/159159 [==============================] - 43s 272us/step - loss: 0.0619 - acc: 0.9837 - val_loss: 0.0471 - val_acc: 0.9882
Epoch 4/10
159159/159159 [==============================] - 43s 273us/step - loss: 0.0431 - acc: 0.9879 - val_loss: 0.0499 - val_acc: 0.9882
Epoch 5/10
159159/159159 [==============================] - 44s 274us/step - loss: 0.0318 - acc: 0.9910 - val_loss: 0.0540 - val_acc: 0.9864
Epoch 6/10
159159/159159 [==============================] - 43s 273us/step - loss: 0.0252 - acc: 0.9929 - val_loss: 0.0530 - val_acc: 0.9878
Epoch 7/10
159159/159159 [==============================] - 43s 272us/step - loss: 0.0202 - acc: 0.9940 - val_loss: 0.0463 - val_acc: 0.9904
Epoch 8/10
159159/159159 [==============================] - 43s 271us/step - loss: 0.0175 - acc: 0.9948 - val_loss: 0.0543 - val_acc: 0.9887
Epoch 9/10
159159/159159 [==============================] - 43s 272us/step - loss: 0.0159 - acc: 0.9951 - val_loss: 0.0478 - val_acc: 0.9888
Epoch 10/10
159159/159159 [==============================] - 43s 271us/step - loss: 0.0129 - acc: 0.9960 - val_loss: 0.0387 - val_acc: 0.9922

4-然后我将模型应用于真实图像，将其清除，然后根据自己的计算来检测数量：

img = cv2.imread(image_paths[5845])#path of the image
img = cv2.cvtColor(img,cv2.COLOR_BGR2GRAY)
# img = cv2.medianBlur(img, 1)
# img = cv2.GaussianBlur(img , (25, 25), 1)

#thresholding to remove background
thr = cv2.threshold(img, 0, 255,  cv2.THRESH_OTSU)[1]
#control kernel
kernel = np.ones((3,3), np.uint8)
img=cv2.dilate(thr,kernel)

kernel = np.ones((3, 3), np.uint8)
img = cv2.erode(img, kernel)

kernel=np.ones((2,2),np.uint8)
img=cv2.dilate(img,kernel)
#add border arround the image to prevent loss of information
img = cv2.copyMakeBorder(img, 8, 8, 8, 8, cv2.BORDER_CONSTANT, value=255)
# img = cv2.Canny(img, 30, 30)


#invert the image if you not invert them will detect outer contour
img = cv2.bitwise_not(img)

contours, hierarchy = cv2.findContours(img,cv2.RETR_TREE,cv2.CHAIN_APPROX_SIMPLE)
len(contours)
imgplot = plt.imshow(img,cmap='gray')        
plt.show()

#make a new list for the targeted contours
newContours=[]#for the specified shapes only , no outer image nor inner of items
xs=[]
ys=[]
ws=[]
hs=[]
WdevH=[]
a=[]
letter_image_regions=[]

#loop over zip of contours and hierarchy so to get matched cuples of contours and hierarchs
for contour,hierarch in zip(contours,hierarchy[0]):#it is dimensional
    if hierarch[3] == -1:#get the parent only which is the last of the last of each list
        newContours.append(contour)
    else:
        continue

lenghtContours=len(newContours)

for contour in newContours:
    area = cv2.contourArea(contour)
    a.append(area)
    if area > 50:
        a.append(area)
        # Get the rectangle that contains the contour
        x, y, w, h = cv2.boundingRect(contour) 
    #     xs.append(x) 
    #     ys.append(y)
    #     ws.append(w)
    #     hs.append(h)
    #     WdevH.append(wdevh)
        if  lenghtContours==5:
            roi = img[y:y+h, x:x+w]
#             cv2.imwrite("{}.png".format(str(w/h)), roi)
            letter_image_regions.append((x,y,w,h))
        elif  lenghtContours==4:
            if w/h >1.2:
                    half_width = int(w / 2)
                    letter_image_regions.append((x, y, half_width, h))
                    letter_image_regions.append((x + half_width, y, half_width, h))
            else:
                    # This is a normal letter by itself
                    letter_image_regions.append((x, y, w, h))

        elif lenghtContours==1:
            if w/h >=2 and w/h <=4 :
                    one_item=int(w/4)
                    letter_image_regions.append((x,y,one_item,h))
                    letter_image_regions.append((x+one_item,y,one_item,h))
                    letter_image_regions.append((x+(2*one_item),y,one_item,h))
                    letter_image_regions.append((x+(3*one_item),y,one_item,h))
            elif w/h > 4 :
                    one_item_from5=int(w/5)
                    letter_image_regions.append((x,y,one_item_from5,h))
                    letter_image_regions.append((x+one_item_from5,y,one_item_from5,h))
                    letter_image_regions.append((x+(2*one_item_from5),y,one_item_from5,h))
                    letter_image_regions.append((x+(3*one_item_from5),y,one_item_from5,h))
                    letter_image_regions.append((x+(4*one_item_from5),y,one_item_from5,h))
            else:
                    letter_image_regions.append((x,y,w,h))
        elif lenghtContours==3:
            if w/h >= 1.2 and w/h <= 2.1  :
                    half_width = int(w / 2)
                    print(half_width)
                    letter_image_regions.append((x, y, half_width, h))
                    letter_image_regions.append((x + half_width, y, half_width, h))
            elif w/h > 2.1:
                    one_item_from3 = int(w/3)
                    letter_image_regions.append((x,y,one_item_from3,h))
                    letter_image_regions.append((x+one_item_from3,y,one_item_from3,h))
                    letter_image_regions.append((x+(2*one_item_from3),y,one_item_from3,h))

            else:
                    letter_image_regions.append((x,y,w,h))
        elif lenghtContours==2:

            if w/h > 1.2 and w/h <= 2:
                    one_item_from3=int(w/3)
                    letter_image_regions.append((x,y,one_item_from3,h))
                    letter_image_regions.append((x+one_item_from3,y,one_item_from3,h))
                    letter_image_regions.append((x+(2*one_item_from3),y,one_item_from3,h))   
            elif w/h >2:
                    one_item=int(w/4)
                    letter_image_regions.append((x,y,one_item,h))
                    letter_image_regions.append((x+one_item,y,one_item,h))
                    letter_image_regions.append((x+(2*one_item),y,one_item,h))
                    letter_image_regions.append((x+(3*one_item),y,one_item,h))
            else:
                    letter_image_regions.append((x,y,w,h))   
    else:
        continue



letter_image_regions = sorted(letter_image_regions, key=lambda x: x[0])

img= cv2.subtract(255, img)#reverse color again 
# Create an output image and a list to hold our predicted letters
output = cv2.merge([img] * 3)
predictions = []    
# cv2.imwrite('x.png', output)

for i in letter_image_regions:
    x,y,w,h = i
    roi = img[y:y+h, x:x+w]
#     print(roi)
    #add border arround the image to prevent loss of information
#     img = cv2.copyMakeBorder(roi, 8, 8, 8, 8, cv2.BORDER_CONSTANT, value=255)
#     #     print(roi)
#     cv2.imwrite("{}.png".format(str(x)), roi)
#         print('I am 4')
#         print(x)
#         # Compare the width and height of the contour to detect letters that
#         # are conjoined into one chunk


# loop over the letters
for letter_bounding_box in letter_image_regions:
    # Grab the coordinates of the letter in the image
    x, y, w, h = letter_bounding_box
#     print(x,y,w,h)
     # Extract the letter from the original image with a 2-pixel margin around the edge
    letter_image = img[y - 2:y + h + 2, x - 2:x + w + 2]

        # thr [ thr < 125 ] = 0#convert any pixel not white into black
    letter_image[letter_image<150]=0

    letter_image[letter_image>150]=255


    letter_image = cv2.copyMakeBorder(letter_image, 8, 8,8 , 8, cv2.BORDER_CONSTANT, value=255)
#     letter_image = resize_to_fit(letter_image, 40,40)
#     roi = letter_image[y:y+h, x:x+w]
#     cv2.imwrite("{}.png".format(str(x)), letter_image)

    letter_image = cv2.resize(letter_image, (40, 40),interpolation = cv2.INTER_CUBIC)
#     letter_image= cv2.subtract(255, letter_image)#reverse color again 
        # Turn the single image into a 4d list of images to make Keras happy
    cv2.imwrite("{}.png".format(str(x)), letter_image)
    letter_image = np.expand_dims(letter_image, axis=2)
    letter_image = np.expand_dims(letter_image, axis=0)
#      # Ask the neural network to make a prediction

    prediction = model.predict(letter_image)
#     print(prediction)
#     # Convert the one-hot-encoded prediction back to a normal letter
    letter = lb.inverse_transform(prediction)[0]
    print(letter)
    predictions.append(letter)
#     # draw the prediction on the output image
    cv2.rectangle(output, (x - 2, y - 2), (x + w + 4, y + h + 4), (0, 255, 0), 1)
    cv2.putText(output, letter, (x - 5, y - 5), cv2.FONT_HERSHEY_SIMPLEX, 0.55, (0, 255, 0), 2)

# # Print the captcha's text
captcha_text = "".join(predictions)
print("CAPTCHA text is: {}".format(captcha_text))

预测结果是

CAPTCHA文字为：1111

我在100张不同的图像上尝试了该模型，并预测全部为1111

有关该项目的一些说明：

输入图像可能是问题，因为输入训练数据是黑白的，因此归一化为255,0后使输入矩阵为0,1
高精度表明过度拟合，但我的数据几乎平衡了所有课程

我正在寻找使该模型的预测结果都相同的问题有人可以帮忙吗？

CNN模型预测一级，准确率为99％

0 个答案: