我正在尝试使用Keras分类器模型对手势进行分类。但是,它在训练时提供了荒谬的准确性,例如在第一次训练时,其准确率约为80%,然后跃升至90%,而在25次训练后的最终准确性为98%。我创建了自己的训练数据集,可以here找到。数据集中的0文件夹用于空白屏幕。
# Importing the Keras libraries and packages
from keras.models import Sequential
from keras.layers import Convolution2D
from keras.layers import MaxPooling2D
from keras.layers import Flatten
from keras.layers import Dense , Dropout
import os
os.environ["CUDA_VISIBLE_DEVICES"] = "1"
# Step 1 - Building the CNN
# Initializing the CNN
classifier = Sequential()
# First convolution layer and pooling
classifier.add(Convolution2D(32, (3, 3), input_shape=(310, 310, 1), activation='relu'))
classifier.add(MaxPooling2D(pool_size=(2, 2)))
# Second convolution layer and pooling
classifier.add(Convolution2D(32, (3, 3), activation='relu'))
# input_shape is going to be the pooled feature maps from the previous convolution layer
classifier.add(MaxPooling2D(pool_size=(2, 2)))
# Flattening the layers
classifier.add(Flatten())
# Adding a fully connected layer
classifier.add(Dense(units=128, activation='relu'))
classifier.add(Dropout(0.40))
classifier.add(Dense(units=96, activation='relu'))
classifier.add(Dropout(0.40))
classifier.add(Dense(units=64, activation='relu'))
classifier.add(Dense(units=27, activation='softmax')) # softmax for more than 2
# Compiling the CNN
classifier.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy']) # categorical_crossentropy for more than 2
# Step 2 - Preparing the train/test data and training the model
# Code copied from - https://keras.io/preprocessing/image/
from keras.preprocessing.image import ImageDataGenerator
train_datagen = ImageDataGenerator(
rescale=1./255,
shear_range=0.2,
zoom_range=0.2,
horizontal_flip=True)
test_datagen = ImageDataGenerator(rescale=1./255)
training_set = train_datagen.flow_from_directory('data/train',
target_size=(310, 310),
batch_size=10,
color_mode='grayscale',
class_mode='categorical')
test_set = test_datagen.flow_from_directory('data/test',
target_size=(310 , 310),
batch_size=10,
color_mode='grayscale',
class_mode='categorical')
classifier.fit_generator(
training_set,
steps_per_epoch=12841,#9603, # No of images in training set
epochs=25,
validation_data=test_set,
validation_steps=4268)#2580)# No of images in test set
# Saving the model
model_json = classifier.to_json()
with open("model-bw.json", "w") as json_file:
json_file.write(model_json)
classifier.save_weights('model-bw.h5')
当我使用以下代码进行实际测试时,它不会显示正确的输出,例如对于blan符号,它不会显示空白,而对于所有其他符号,它会显示空白。
import numpy as np
from keras.models import model_from_json
from image_processing import func
import operator
import time
import cv2
import sys, os
import matplotlib.pyplot as plt
# Loading the model
minValue = 70
json_file = open("model-bw.json", "r")
model_json = json_file.read()
json_file.close()
loaded_model = model_from_json(model_json)
# load weights into new model
#loaded model = load_model('model-bw.h5')
loaded_model.load_weights("model-bw.h5")
print("Loaded model from disk")
cap = cv2.VideoCapture(0)
# Category dictionary
categories = {0: 'ZERO', 1: 'ONE', 2: 'TWO', 3: 'THREE', 4: 'FOUR', 5: 'FIVE'}
while True:
_, frame = cap.read()
# Simulating mirror image
frame = cv2.flip(frame, 1)
#frame = (cv2.imread("/home/rc/Downloads/soe/train/A/001.jpg"))
# Got this from collect-data.py
# Coordinates of the ROI
x1 = int(0.5*frame.shape[1])
y1 = 10
x2 = frame.shape[1]-10
y2 = int(0.5*frame.shape[1])
# x1 = 220
# x2 = 620
# y1 = 10
# y2 = 310
print(x1 , y1 , x2 , y2)
# Drawing the ROI
# The increment/decrement by 1 is to compensate for the bounding box
cv2.rectangle(frame, (x1-1, y1-1), (x2+1, y2+1), (255,0,0) ,1)
# Extracting the ROI
roi = frame[y1:y2, x1:x2]
# Resizing the ROI so it can be fed to the model for prediction
#roi = cv2.resize(roi, (64, 64))
#roi = cv2.cvtColor(roi, cv2.COLOR_BGR2GRAY)
#_, test_image = cv2.threshold(roi, 120, 255, cv2.THRESH_BINARY)
gray = cv2.cvtColor(roi, cv2.COLOR_BGR2GRAY)
blur = cv2.GaussianBlur(gray,(5,5),2)
# #blur = cv2.bilateralFilter(roi,9,75,75)
th3 = cv2.adaptiveThreshold(blur,255,cv2.ADAPTIVE_THRESH_GAUSSIAN_C,cv2.THRESH_BINARY_INV,11,2)
ret, test_image = cv2.threshold(th3, minValue, 255, cv2.THRESH_BINARY_INV+cv2.THRESH_OTSU)
#time.sleep(5)
#cv2.imwrite("/home/rc/Downloads/soe/im1.jpg", roi)
#test_image = func("/home/rc/Downloads/soe/im1.jpg")
test_image = cv2.resize(test_image, (310,310))
cv2.imshow("test", test_image)
# Batch of 1
# print(test_image.reshape(1, 64, 64, 1))
# break
result = loaded_model.predict(test_image.reshape(1, 310, 310, 1))
prediction = {
'blank': result[0][0],
'A': result[0][1],
'B': result[0][2],
'C': result[0][3],
'D': result[0][4],
'E': result[0][5],
'F': result[0][6],
'G': result[0][7],
'H': result[0][8],
'I': result[0][9],
'J': result[0][10],
'K': result[0][11],
'L': result[0][12],
'M': result[0][13],
'N': result[0][14],
'O': result[0][15],
'P': result[0][16],
'Q': result[0][17],
'R': result[0][18],
'S': result[0][19],
'T': result[0][20],
'U': result[0][21],
'V': result[0][22],
'W': result[0][23],
'X': result[0][24],
'Y': result[0][25],
'Z': result[0][26],
}
# Sorting based on top prediction
prediction = sorted(prediction.items(), key=operator.itemgetter(1), reverse=True)
# Displaying the predictions
cv2.putText(frame, prediction[0][0], (10, 120), cv2.FONT_HERSHEY_PLAIN, 1, (0,255,255), 1)
# print(prediction)
cv2.imshow("Frame", frame)
interrupt = cv2.waitKey(10)
if interrupt & 0xFF == 27: # esc key
break
cap.release()
cv2.destroyAllWindows()
我用相同的模型训练了另一个数据集,该数据集没有空白符号,只有24个字母。在过去的第一阶段,从50%开始准确地进行了正确的训练,并逐渐增加。我们保留的输入大小为300 * 300。但是它只能正确检测到13-15个符号。不止于此。因此,我制作了一个更大的数据集(该数据集总共只有4000张图像,而我的数据集只有17000张),尺寸保持为310 * 310,并且行为荒唐。如果有人指出错误,那将非常有帮助>