将手写图像传递到Keras模型中时,准确性异常低

时间:2019-07-18 14:37:41

标签: python machine-learning keras pygame ocr

我创建了一个机器学习程序,该程序能够正确识别99.947%的手写十进制数字。我使用了MNIST数据集来训练模型,并且正在开发一个程序,该程序可以预测在图像中写入了哪些数字。该图像是使用PyGame创建的。

此代码在具有python 3.7.1的Linux系统上运行。

我的代码如下。我正在尝试创建一个教育游戏,而我的AI似乎无法正确处理数据:

import sys, os                              # For modifying files, making directories, etc.
import keras                                # Simplified TensorFlow library
import matplotlib.pyplot as plt             # Image display from PyPlot
import numpy as np                          # NumPy for advanced math
import math                                 # If needed
import random                               # For picking a random value within the MNIST data set
import pygame                               # For drawing canvas, etc
from pygame.locals import *
import time
from keras.models import Sequential         # Enables sequential layers for AI learning
from keras.layers import Dense              # Enables Dense layers
from keras.layers import Conv2D             # Enables 2D Convoluted Neural Network layers
from keras.layers import MaxPooling2D       # Enables Maximum Pooling layers
from keras.layers import Dropout            # Enables Dropout layers
from keras.layers import Flatten            # Enables Flatten layers
from keras.datasets import mnist            # Imports MNIST for training artificial intelligence
from keras.callbacks import ModelCheckpoint # Imports ModelCheckpoint for saving progress
import cv2                                  # For loading images
model = Sequential()                         # Use sequential layers for AI training

# ML Configuration. Although this is included in a separate file, I included an option to train the AI within this file.
model.add(Conv2D(32, (3,3), activation='relu', input_shape = (28, 28, 1)))
model.add(Conv2D(64, (3,3), activation='relu'))
model.add(MaxPooling2D(pool_size = (2, 2)))
model.add(Flatten())
model.add(Dense(128, activation = 'relu'))
model.add(Dropout(rate = 0.3))
model.add(Dense(10, activation = 'softmax'))
load = True # Load previous best model fit?
save = True # Save best model fit?
if load:
    print("Loading weights from previous model fit...\t", end="")
    model.load_weights("./saves/main_MNIST.hdf5")
    print("Done")
(x_train, y_train), (x_test, y_test) = mnist.load_data()
x_train = x_train.reshape(-1, 28, 28, 1)
x_test = x_train.reshape(-1, 28, 28, 1)
y_train = keras.utils.to_categorical(y_train, num_classes=10)
y_test = keras.utils.to_categorical(y_test, num_classes=10)
model.compile(loss = keras.losses.categorical_crossentropy, optimizer=keras.optimizers.Adadelta(), metrics=['accuracy'])
callbacks_list = []
if save:
    filepath="./saves/main_MNIST.hdf5"
    checkpoint = ModelCheckpoint(filepath, monitor='acc', verbose=1, save_best_only=True, mode='max')
    callbacks_list = [checkpoint]

# Whether or not to retrain model within this file
if input("Retrain model? (Y/N)\t").lower() == "y":
    history = model.fit(x_train, y_train, epochs=int(input("Input epochs number. Recommended value is 5.\t"), 10), batch_size=64, callbacks=callbacks_list)

# Initialize PyGame
pygame.init()
pygame.font.init()
myfont = pygame.font.SysFont('Open Sans', 30)
pygame.display.set_caption("NumSmart AI")
mouse = pygame.mouse
fpsclock = pygame.time.Clock()

# Width and height of window
width = 1024
height = 1024

window = pygame.display.set_mode((width, height))
canvas = window.copy()

# Color definitions
BLACK = pygame.Color(0,0,0)
WHITE = pygame.Color(255,255,255)

canvas.fill(BLACK)

def quitAndExit():
    pygame.quit()
    sys.exit()
def rgb2gray(rgb):
    return np.dot(rgb[...,:3], [0.299, 0.587, 0.144])
while True:
    title = myfont.render('NumSmart AI Game', False, (255, 255, 255))
    add1 = random.randint(0, 10)
    add2 = random.randint(0, 10)
    dig2compl = False
    prompt = myfont.render(("Add the following numbers: " + str(add1) + " + " + str(add2)), False, (255, 255, 255))
    drawComplete = False
    while not drawComplete:
        left_pressed, middle_pressed, right_pressed = mouse.get_pressed()
        for event in pygame.event.get():
            if event.type == QUIT:
                quitAndExit()
            elif left_pressed:
                pygame.draw.circle(canvas, WHITE, (pygame.mouse.get_pos()), 20)
            elif middle_pressed:
                canvas.fill(BLACK)
            elif right_pressed:
                pygame.draw.circle(canvas, BLACK, (pygame.mouse.get_pos()), 10)
            elif event.type == pygame.KEYDOWN:
                if event.key == pygame.K_q:
                    quitAndExit()
                if event.key == pygame.K_c:
                    drawComplete = True
                    canvas.fill(BLACK)
                    if (add1 + add2 >= 10) and (not dig2compl):
                        drawComplete = False
                        dig2compl = True
                    # PROBLEM AREA BEGINS HERE
                    img = cv2.imread("./IMG/img.jpeg")
                    img = cv2.resize(img, dsize=(28, 28), interpolation=cv2.INTER_CUBIC)
                    plt.imshow(img)
                    plt.axis("off")
                    plt.show()
                    img = np.asarray(img)
                    img = rgb2gray(img)
                    img = img.reshape((-1, 28, 28, 1))
                    out = model.predict(img)
                    print(out)
                    print("AI Read: " + str(np.argmax(out)))
                    # PROBLEM AREA ENDS HERE
        window.blit(canvas, (0, 0))
        window.blit(prompt, (250, 0))
        window.blit(title, (0, 0))
        pygame.display.update()
        pygame.image.save(canvas, "./IMG/img.jpeg")

如您所见,如果您绘制数字,则AI的猜测很可能是不准确的。

相对于上述文件,这是ML文件,需要将其放置在./saves/main_MNIST.hdf5中: http://www.mediafire.com/file/1haza5nmxwxs24f/main_MNIST.hdf5/file

谢谢!

0 个答案:

没有答案