我面临着一个严重的问题,找不到任何解决方案。
目标是:我正在尝试制作captcha recognition
,我分离了字母和数字的图像,对图像进行了预处理,然后建立了模型,然后尝试使用真实数据,结果都是课:
这不是唯一的结果,因为大多数预测都是针对某个模型的ssss,如果我尝试另一次尝试则为1111 这是来自输入图像的示例:
大多数数据是清晰的,字母周围没有黑块 这是我完成的步骤和代码:
1-准备数据矩阵作为输入矩阵:
data = []
labels = []
LETTER_IMAGES_FOLDER = "/content/drive/My Drive/Data/extracted_letter_images"
for image_file in paths.list_images(LETTER_IMAGES_FOLDER):
# # Load the image and convert it to grayscale
image = cv2.imread(image_file)
image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
image = cv2.blur(image,(5,5))
image = cv2.resize(image, (40, 40),interpolation = cv2.INTER_CUBIC)
# plt.imshow(image)
image = np.expand_dims(image, axis=2)
# Grab the name of the letter based on the folder it was in
label = image_file.split(os.path.sep)[-2]
# Add the letter image and it's label to our training data
data.append(image)
labels.append(label)
# scale the raw pixel intensities to the range [0, 1] (this improves training)
data = np.array(data, dtype="float32") / 255.0
labels = np.array(labels)
结果接近平衡,数量为:
33 labels
(array(['1', '2', '3', '4', '5', '6', '7', '8', '9', 'A', 'B', 'C', 'D',
'E', 'F', 'G', 'H', 'J', 'K', 'L', 'M', 'N', 'P', 'Q', 'R', 'S',
'T', 'U', 'V', 'W', 'X', 'Y', 'Z'], dtype='<U1'),
array([8102, 6393, 6332, 6316, 6427, 6173, 6699, 6404, 6956, 6272, 6331,
6353, 6328, 6607, 6250, 6396, 6466, 5985, 6421, 6314, 6196, 6502,
6542, 6417, 6435, 6421, 6396, 6341, 6107, 6131, 6360, 6383, 6457]))
2-次训练拆分:
lb = LabelBinarizer()
lb_label = lb.fit_transform(label)
(X_train, X_test, Y_train, Y_test) = train_test_split(data,lb_label, test_size=0.25, random_state=0)
3-建立模型并保存:
MODEL_FILENAME = "/content/drive/My Drive/Data/model1.hdf5"
MODEL_LABELS_FILENAME = "/content/drive/My Drive/Data/labels1.dat"
# Save the mapping from labels to one-hot encodings.
# We'll need this later when we use the model to decode what it's predictions mean
with open(MODEL_LABELS_FILENAME, "wb") as f:
pickle.dump(lb, f)
model=Sequential()
# First convolutional layer with max pooling
model.add(Conv2D(35, (2, 2), padding="same", input_shape=(40, 40, 1), activation="relu"))
model.add(MaxPooling2D(pool_size=(2, 2), strides=(1, 1)))
# Second convolutional layer with max pooling
model.add(Conv2D(40, (2, 2), padding="same", activation="relu"))
model.add(MaxPooling2D(pool_size=(2, 2), strides=(1, 1)))
# Second convolutional layer with max pooling
model.add(Conv2D(50, (2, 2), padding="same", activation="relu"))
model.add(MaxPooling2D(pool_size=(2, 2), strides=(1, 1)))
# Hidden layer with 500 nodes
# S4 Pooling Layer
#Flatten the CNN output so that we can connect it with fully connected layers
model.add(Flatten())
model.add(Dense(33, activation="softmax"))
# Ask Keras to build the TensorFlow model behind the scenes
model.compile(loss="categorical_crossentropy", optimizer='adam', metrics=["accuracy"])
model.fit(X_train, Y_train, validation_data=(X_test, Y_test), batch_size=32, epochs=10, verbose=1)
model.save(MODEL_FILENAME)
该模型的摘要为:
Train on 159159 samples, validate on 53054 samples
Epoch 1/10
159159/159159 [==============================] - 44s 275us/step - loss: 0.4009 - acc: 0.8936 - val_loss: 0.1439 - val_acc: 0.9642
Epoch 2/10
159159/159159 [==============================] - 43s 273us/step - loss: 0.0982 - acc: 0.9752 - val_loss: 0.0638 - val_acc: 0.9849
Epoch 3/10
159159/159159 [==============================] - 43s 272us/step - loss: 0.0619 - acc: 0.9837 - val_loss: 0.0471 - val_acc: 0.9882
Epoch 4/10
159159/159159 [==============================] - 43s 273us/step - loss: 0.0431 - acc: 0.9879 - val_loss: 0.0499 - val_acc: 0.9882
Epoch 5/10
159159/159159 [==============================] - 44s 274us/step - loss: 0.0318 - acc: 0.9910 - val_loss: 0.0540 - val_acc: 0.9864
Epoch 6/10
159159/159159 [==============================] - 43s 273us/step - loss: 0.0252 - acc: 0.9929 - val_loss: 0.0530 - val_acc: 0.9878
Epoch 7/10
159159/159159 [==============================] - 43s 272us/step - loss: 0.0202 - acc: 0.9940 - val_loss: 0.0463 - val_acc: 0.9904
Epoch 8/10
159159/159159 [==============================] - 43s 271us/step - loss: 0.0175 - acc: 0.9948 - val_loss: 0.0543 - val_acc: 0.9887
Epoch 9/10
159159/159159 [==============================] - 43s 272us/step - loss: 0.0159 - acc: 0.9951 - val_loss: 0.0478 - val_acc: 0.9888
Epoch 10/10
159159/159159 [==============================] - 43s 271us/step - loss: 0.0129 - acc: 0.9960 - val_loss: 0.0387 - val_acc: 0.9922
4-然后我将模型应用于真实图像,将其清除,然后根据自己的计算来检测数量:
img = cv2.imread(image_paths[5845])#path of the image
img = cv2.cvtColor(img,cv2.COLOR_BGR2GRAY)
# img = cv2.medianBlur(img, 1)
# img = cv2.GaussianBlur(img , (25, 25), 1)
#thresholding to remove background
thr = cv2.threshold(img, 0, 255, cv2.THRESH_OTSU)[1]
#control kernel
kernel = np.ones((3,3), np.uint8)
img=cv2.dilate(thr,kernel)
kernel = np.ones((3, 3), np.uint8)
img = cv2.erode(img, kernel)
kernel=np.ones((2,2),np.uint8)
img=cv2.dilate(img,kernel)
#add border arround the image to prevent loss of information
img = cv2.copyMakeBorder(img, 8, 8, 8, 8, cv2.BORDER_CONSTANT, value=255)
# img = cv2.Canny(img, 30, 30)
#invert the image if you not invert them will detect outer contour
img = cv2.bitwise_not(img)
contours, hierarchy = cv2.findContours(img,cv2.RETR_TREE,cv2.CHAIN_APPROX_SIMPLE)
len(contours)
imgplot = plt.imshow(img,cmap='gray')
plt.show()
#make a new list for the targeted contours
newContours=[]#for the specified shapes only , no outer image nor inner of items
xs=[]
ys=[]
ws=[]
hs=[]
WdevH=[]
a=[]
letter_image_regions=[]
#loop over zip of contours and hierarchy so to get matched cuples of contours and hierarchs
for contour,hierarch in zip(contours,hierarchy[0]):#it is dimensional
if hierarch[3] == -1:#get the parent only which is the last of the last of each list
newContours.append(contour)
else:
continue
lenghtContours=len(newContours)
for contour in newContours:
area = cv2.contourArea(contour)
a.append(area)
if area > 50:
a.append(area)
# Get the rectangle that contains the contour
x, y, w, h = cv2.boundingRect(contour)
# xs.append(x)
# ys.append(y)
# ws.append(w)
# hs.append(h)
# WdevH.append(wdevh)
if lenghtContours==5:
roi = img[y:y+h, x:x+w]
# cv2.imwrite("{}.png".format(str(w/h)), roi)
letter_image_regions.append((x,y,w,h))
elif lenghtContours==4:
if w/h >1.2:
half_width = int(w / 2)
letter_image_regions.append((x, y, half_width, h))
letter_image_regions.append((x + half_width, y, half_width, h))
else:
# This is a normal letter by itself
letter_image_regions.append((x, y, w, h))
elif lenghtContours==1:
if w/h >=2 and w/h <=4 :
one_item=int(w/4)
letter_image_regions.append((x,y,one_item,h))
letter_image_regions.append((x+one_item,y,one_item,h))
letter_image_regions.append((x+(2*one_item),y,one_item,h))
letter_image_regions.append((x+(3*one_item),y,one_item,h))
elif w/h > 4 :
one_item_from5=int(w/5)
letter_image_regions.append((x,y,one_item_from5,h))
letter_image_regions.append((x+one_item_from5,y,one_item_from5,h))
letter_image_regions.append((x+(2*one_item_from5),y,one_item_from5,h))
letter_image_regions.append((x+(3*one_item_from5),y,one_item_from5,h))
letter_image_regions.append((x+(4*one_item_from5),y,one_item_from5,h))
else:
letter_image_regions.append((x,y,w,h))
elif lenghtContours==3:
if w/h >= 1.2 and w/h <= 2.1 :
half_width = int(w / 2)
print(half_width)
letter_image_regions.append((x, y, half_width, h))
letter_image_regions.append((x + half_width, y, half_width, h))
elif w/h > 2.1:
one_item_from3 = int(w/3)
letter_image_regions.append((x,y,one_item_from3,h))
letter_image_regions.append((x+one_item_from3,y,one_item_from3,h))
letter_image_regions.append((x+(2*one_item_from3),y,one_item_from3,h))
else:
letter_image_regions.append((x,y,w,h))
elif lenghtContours==2:
if w/h > 1.2 and w/h <= 2:
one_item_from3=int(w/3)
letter_image_regions.append((x,y,one_item_from3,h))
letter_image_regions.append((x+one_item_from3,y,one_item_from3,h))
letter_image_regions.append((x+(2*one_item_from3),y,one_item_from3,h))
elif w/h >2:
one_item=int(w/4)
letter_image_regions.append((x,y,one_item,h))
letter_image_regions.append((x+one_item,y,one_item,h))
letter_image_regions.append((x+(2*one_item),y,one_item,h))
letter_image_regions.append((x+(3*one_item),y,one_item,h))
else:
letter_image_regions.append((x,y,w,h))
else:
continue
letter_image_regions = sorted(letter_image_regions, key=lambda x: x[0])
img= cv2.subtract(255, img)#reverse color again
# Create an output image and a list to hold our predicted letters
output = cv2.merge([img] * 3)
predictions = []
# cv2.imwrite('x.png', output)
for i in letter_image_regions:
x,y,w,h = i
roi = img[y:y+h, x:x+w]
# print(roi)
#add border arround the image to prevent loss of information
# img = cv2.copyMakeBorder(roi, 8, 8, 8, 8, cv2.BORDER_CONSTANT, value=255)
# # print(roi)
# cv2.imwrite("{}.png".format(str(x)), roi)
# print('I am 4')
# print(x)
# # Compare the width and height of the contour to detect letters that
# # are conjoined into one chunk
# loop over the letters
for letter_bounding_box in letter_image_regions:
# Grab the coordinates of the letter in the image
x, y, w, h = letter_bounding_box
# print(x,y,w,h)
# Extract the letter from the original image with a 2-pixel margin around the edge
letter_image = img[y - 2:y + h + 2, x - 2:x + w + 2]
# thr [ thr < 125 ] = 0#convert any pixel not white into black
letter_image[letter_image<150]=0
letter_image[letter_image>150]=255
letter_image = cv2.copyMakeBorder(letter_image, 8, 8,8 , 8, cv2.BORDER_CONSTANT, value=255)
# letter_image = resize_to_fit(letter_image, 40,40)
# roi = letter_image[y:y+h, x:x+w]
# cv2.imwrite("{}.png".format(str(x)), letter_image)
letter_image = cv2.resize(letter_image, (40, 40),interpolation = cv2.INTER_CUBIC)
# letter_image= cv2.subtract(255, letter_image)#reverse color again
# Turn the single image into a 4d list of images to make Keras happy
cv2.imwrite("{}.png".format(str(x)), letter_image)
letter_image = np.expand_dims(letter_image, axis=2)
letter_image = np.expand_dims(letter_image, axis=0)
# # Ask the neural network to make a prediction
prediction = model.predict(letter_image)
# print(prediction)
# # Convert the one-hot-encoded prediction back to a normal letter
letter = lb.inverse_transform(prediction)[0]
print(letter)
predictions.append(letter)
# # draw the prediction on the output image
cv2.rectangle(output, (x - 2, y - 2), (x + w + 4, y + h + 4), (0, 255, 0), 1)
cv2.putText(output, letter, (x - 5, y - 5), cv2.FONT_HERSHEY_SIMPLEX, 0.55, (0, 255, 0), 2)
# # Print the captcha's text
captcha_text = "".join(predictions)
print("CAPTCHA text is: {}".format(captcha_text))
预测结果是
CAPTCHA文字为:1111
我在100张不同的图像上尝试了该模型,并预测全部为1111
有关该项目的一些说明:
255,0
后使输入矩阵为0,1
我正在寻找使该模型的预测结果都相同的问题 有人可以帮忙吗?