Keras分类器为手势分类提供了荒谬的准确性

时间:2018-11-07 12:52:45

标签: python tensorflow keras classification

我正在尝试使用Keras分类器模型对手势进行分类。但是,它在训练时提供了荒谬的准确性,例如在第一次训练时,其准确率约为80%,然后跃升至90%,而在25次训练后的最终准确性为98%。我创建了自己的训练数据集,可以here找到。数据集中的0文件夹用于空白屏幕。

# Importing the Keras libraries and packages
from keras.models import Sequential
from keras.layers import Convolution2D
from keras.layers import MaxPooling2D
from keras.layers import Flatten
from keras.layers import Dense , Dropout
import os
os.environ["CUDA_VISIBLE_DEVICES"] = "1"

# Step 1 - Building the CNN

# Initializing the CNN
classifier = Sequential()

# First convolution layer and pooling
classifier.add(Convolution2D(32, (3, 3), input_shape=(310, 310, 1), activation='relu'))
classifier.add(MaxPooling2D(pool_size=(2, 2)))
# Second convolution layer and pooling
classifier.add(Convolution2D(32, (3, 3), activation='relu'))
# input_shape is going to be the pooled feature maps from the previous convolution layer
classifier.add(MaxPooling2D(pool_size=(2, 2)))

# Flattening the layers
classifier.add(Flatten())

# Adding a fully connected layer
classifier.add(Dense(units=128, activation='relu'))
classifier.add(Dropout(0.40))
classifier.add(Dense(units=96, activation='relu'))
classifier.add(Dropout(0.40))
classifier.add(Dense(units=64, activation='relu'))
classifier.add(Dense(units=27, activation='softmax')) # softmax for more than 2

# Compiling the CNN
classifier.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy']) # categorical_crossentropy for more than 2


# Step 2 - Preparing the train/test data and training the model

# Code copied from - https://keras.io/preprocessing/image/
from keras.preprocessing.image import ImageDataGenerator

train_datagen = ImageDataGenerator(
        rescale=1./255,
        shear_range=0.2,
        zoom_range=0.2,
        horizontal_flip=True)

test_datagen = ImageDataGenerator(rescale=1./255)

training_set = train_datagen.flow_from_directory('data/train',
                                                 target_size=(310, 310),
                                                 batch_size=10,
                                                 color_mode='grayscale',
                                                 class_mode='categorical')

test_set = test_datagen.flow_from_directory('data/test',
                                            target_size=(310 , 310),
                                            batch_size=10,
                                            color_mode='grayscale',
                                            class_mode='categorical') 


classifier.fit_generator(
        training_set,
        steps_per_epoch=12841,#9603, # No of images in training set
        epochs=25,
        validation_data=test_set,
        validation_steps=4268)#2580)# No of images in test set


# Saving the model
model_json = classifier.to_json()
with open("model-bw.json", "w") as json_file:
    json_file.write(model_json)
classifier.save_weights('model-bw.h5')

当我使用以下代码进行实际测试时,它不会显示正确的输出,例如对于blan符号,它不会显示空白,而对于所有其他符号,它会显示空白。

import numpy as np
from keras.models import model_from_json
from image_processing import func
import operator
import time 
import cv2
import sys, os
import matplotlib.pyplot as plt

# Loading the model
minValue = 70
json_file = open("model-bw.json", "r")
model_json = json_file.read()
json_file.close()
loaded_model = model_from_json(model_json)
# load weights into new model
#loaded model = load_model('model-bw.h5')
loaded_model.load_weights("model-bw.h5")
print("Loaded model from disk")

cap = cv2.VideoCapture(0)

# Category dictionary
categories = {0: 'ZERO', 1: 'ONE', 2: 'TWO', 3: 'THREE', 4: 'FOUR', 5: 'FIVE'}

while True:
    _, frame = cap.read()
    # Simulating mirror image
    frame = cv2.flip(frame, 1)
    #frame = (cv2.imread("/home/rc/Downloads/soe/train/A/001.jpg"))
    # Got this from collect-data.py
    # Coordinates of the ROI
    x1 = int(0.5*frame.shape[1])
    y1 = 10
    x2 = frame.shape[1]-10
    y2 = int(0.5*frame.shape[1])
    # x1 = 220
    # x2 = 620
    # y1 = 10
    # y2 = 310
    print(x1 , y1 , x2 , y2)
    # Drawing the ROI
    # The increment/decrement by 1 is to compensate for the bounding box
    cv2.rectangle(frame, (x1-1, y1-1), (x2+1, y2+1), (255,0,0) ,1)
    # Extracting the ROI
    roi = frame[y1:y2, x1:x2]

    # Resizing the ROI so it can be fed to the model for prediction
    #roi = cv2.resize(roi, (64, 64)) 
    #roi = cv2.cvtColor(roi, cv2.COLOR_BGR2GRAY)
    #_, test_image = cv2.threshold(roi, 120, 255, cv2.THRESH_BINARY)
    gray = cv2.cvtColor(roi, cv2.COLOR_BGR2GRAY)

    blur = cv2.GaussianBlur(gray,(5,5),2)
    # #blur = cv2.bilateralFilter(roi,9,75,75)

    th3 = cv2.adaptiveThreshold(blur,255,cv2.ADAPTIVE_THRESH_GAUSSIAN_C,cv2.THRESH_BINARY_INV,11,2)
    ret, test_image = cv2.threshold(th3, minValue, 255, cv2.THRESH_BINARY_INV+cv2.THRESH_OTSU)
    #time.sleep(5)
    #cv2.imwrite("/home/rc/Downloads/soe/im1.jpg", roi)
    #test_image = func("/home/rc/Downloads/soe/im1.jpg")



    test_image = cv2.resize(test_image, (310,310))
    cv2.imshow("test", test_image)
    # Batch of 1
    # print(test_image.reshape(1, 64, 64, 1))
    # break
    result = loaded_model.predict(test_image.reshape(1, 310, 310, 1))
    prediction = {
                  'blank': result[0][0],
                  'A': result[0][1], 
                  'B': result[0][2], 
                  'C': result[0][3],
                  'D': result[0][4],
                  'E': result[0][5],
                  'F': result[0][6],
                  'G': result[0][7],
                  'H': result[0][8],
                  'I': result[0][9],
                  'J': result[0][10],
                  'K': result[0][11],
                  'L': result[0][12],
                  'M': result[0][13],
                  'N': result[0][14],
                  'O': result[0][15],
                  'P': result[0][16],
                  'Q': result[0][17],
                  'R': result[0][18],
                  'S': result[0][19],
                  'T': result[0][20],
                  'U': result[0][21],
                  'V': result[0][22],
                  'W': result[0][23],
                  'X': result[0][24],
                  'Y': result[0][25],
                  'Z': result[0][26],
                }
    # Sorting based on top prediction
    prediction = sorted(prediction.items(), key=operator.itemgetter(1), reverse=True)

    # Displaying the predictions
    cv2.putText(frame, prediction[0][0], (10, 120), cv2.FONT_HERSHEY_PLAIN, 1, (0,255,255), 1)  
    # print(prediction)
    cv2.imshow("Frame", frame)

    interrupt = cv2.waitKey(10)
    if interrupt & 0xFF == 27: # esc key
        break


cap.release()
cv2.destroyAllWindows()

我用相同的模型训练了另一个数据集,该数据集没有空白符号,只有24个字母。在过去的第一阶段,从50%开始准确地进行了正确的训练,并逐渐增加。我们保留的输入大小为300 * 300。但是它只能正确检测到13-15个符号。不止于此。因此,我制作了一个更大的数据集(该数据集总共只有4000张图像,而我的数据集只有17000张),尺寸保持为310 * 310,并且行为荒唐。如果有人指出错误,那将非常有帮助

0 个答案:

没有答案