新手:使用TensorFlow MNIST代码示例的Keras:错误的预测

时间:2018-08-28 07:43:44

标签: python tensorflow keras mnist

(尝试#2,有关历史记录,请参见下文)

我是Deep Learning和Keras的新手,并按照https://elitedatascience.com/keras-tutorial-deep-learning-in-python上的教程尝试使用MNIST示例。

关于该示例,我仅对代码进行了一些最小的调整,以使其与最新的API兼容。然后,我添加了一个示例Python脚本,该脚本加载了保存的模型和一个BMP图像文件(指定为脚本的第一个参数),并以退出代码的形式输出类预测。

我训练了模型并获得了0.9911的精度,并保存了它。

但是,当使用某些BMP样本进行测试时,我得到的结果不一致:只有某些数字可以正确识别。可能在预处理数据时缺少明显的东西。在Toyo答复后,我在https://nextjournal.com/schmudde/ml4a-mnist之后更改了两个脚本,因此在此引用较新的版本。较老的在底部。

我还添加了一个表示图像的预处理数组的示例可视化,看着它,可以看到预期的数字“黑底白字”(在预测预处理中,我通过将每个值设置为等于来添加颜色反转值为255,因为我输入的BMP是“白底黑字”。

但是,问题仍然存在。

谢谢!

新脚本

培训:

# numpy with a specific seed for reproducibility
import numpy as np
np.random.seed(123)

# keras
from keras.models import Sequential
from keras.layers import Dense, Dropout, Activation, Flatten
from keras.layers import Convolution2D, MaxPooling2D
from keras.utils import np_utils
from keras.datasets import mnist

# load pre-shuffled MNIST data into train and test sets
# MNIST: 60k samples of 28x28 grayscale (0-255) images,
# and additional 10k samples for test.
((x_train, y_train), (x_test, y_test)) = mnist.load_data()
n_train, height, width = x_train.shape
n_test, _, _ = x_test.shape
print('x_train shape: {}'.format(x_train.shape))
print('n_train: {}'.format(n_train))
print('height: {}'.format(height))
print('width: {}'.format(width))
print('n_test: {}'.format(n_test))

# we require shape (count, height, width, channel), where count
# is the count of our samples, and channel is the number of channels,
# here equal to 1 as these are grayscale images (for RGB it would be 3).
x_train = x_train.reshape(n_train, height, width, 1).astype('float32')
x_test = x_test.reshape(n_test, height, width, 1).astype('float32')

# normalize from [0, 255] to [0, 1]
x_train /= 255
x_test /= 255

# show image array
print('Sample array: {}\n'.format(x_train[0].shape))
print(x_train[0])
from matplotlib import pyplot as plt
plt.imshow(x_train[0].reshape(28, 28))
plt.show()

# the labels need to be converted into one-hot vectors,
# which are nc-element arrays (nc is the number of classes),
# which are 0 for all classes except 1 for the class the label
# is assigned to.
# convert integer labels into one-hot vectors
n_classes = 10
y_train = np_utils.to_categorical(y_train, n_classes)
y_test = np_utils.to_categorical(y_test, n_classes)

# define model architecture.
model = Sequential()

# number of convolutional filters
n_filters = 32

# convolution filter size
n_conv = 3

# pooling window size
n_pool = 2

# (1) convolution
model.add(Convolution2D(
        n_filters, 
        kernel_size=(n_conv, n_conv),
        # we have a 28x28 single channel (grayscale) image
        # so the input shape should be (28, 28, 1)
        input_shape=(height, width, 1)
))
model.add(Activation('relu'))

model.add(Convolution2D(n_filters, kernel_size=(n_conv, n_conv)))
model.add(Activation('relu'))

# apply pooling to summarize the features extracted thus far
model.add(MaxPooling2D(pool_size=(n_pool, n_pool)))

# (2) connected layers
model.add(Dropout(0.25))

# flatten the data for the 1D layers
model.add(Flatten())

# Dense(n_outputs)
model.add(Dense(128))
model.add(Activation('relu'))
model.add(Dropout(0.5))

# (3) the softmax output layer gives us a probablity for each class
model.add(Dense(n_classes))
model.add(Activation('softmax'))

model.compile(
    loss='categorical_crossentropy',
    optimizer='adam',
    metrics=['accuracy']
)

# how many examples to look at during each update step
batch_size = 128

# how many times to run through the full set of examples
n_epochs = 10

model.fit(x_train,
          y_train,
          batch_size=batch_size,
          epochs=n_epochs,
          validation_data=(x_test, y_test))

# how'd we do?
loss, accuracy = model.evaluate(x_test, y_test, verbose=0)

print('loss:', loss)
print('accuracy:', accuracy)

# save the model. Use load_model(path) to load it later
model.save('/projects/python/mnist/mnist.h5')

预测:

# get the path to the image to be recognized from 1st arg
import sys
if len(sys.argv) < 2:
    print('Missing image file path')
    exit(-1)

# load the pretrained MNIST model
print('Loading model...')
import numpy as np
import keras
model = keras.models.load_model('/projects/python/mnist/mnist.h5')

# load the image ensuring that its size is 28x28
print('Loading image ' + sys.argv[1])
from keras.preprocessing import image
x = image.load_img(sys.argv[1], color_mode='grayscale', target_size=(28, 28))

# convert image to NumPy array
x = image.img_to_array(x, data_format='channels_last')
print('np from image:\n')
print(x.shape)
print(x)

# add a major dimension to represent the number of samples, i.e. 1
x = x.reshape(1, 28, 28, 1)
# convert data type to float32 and normalize 0-255 values to range [0, 1]
x = x.astype('float32')
# invert colors, we need white on black
x = 255 - x
x /= 255
print('np before predicting:\n')
print(x.shape)
print(x)

from matplotlib import pyplot as plt
plt.imshow(x.reshape(28, 28))
plt.show()

# predict
classes = model.predict_classes(x)
print(classes[0])
exit(classes[0])

旧脚本

# numpy with a specific seed for reproducibility
import numpy as np
np.random.seed(123)

from keras.models import Sequential
from keras.layers import Dense, Dropout, Activation, Flatten
from keras.layers import Convolution2D, MaxPooling2D
from keras.utils import np_utils
from keras.datasets import mnist
from matplotlib import pyplot as plt

# load pre-shuffled MNIST data into train and test sets
# 60k samples of 28x28 images
((x_train, y_train), (x_test, y_test)) = mnist.load_data()

print(x_train.shape)
plt.imshow(x_train[0])
input("Press enter to continue...")

# preprocess input data, as Theano requires explicit depth:
# from shape (n, width, height) to (n, depth=1, width, height)
x_train = x_train.reshape(x_train.shape[0], 1, 28, 28)
x_test = x_test.reshape(x_test.shape[0], 1, 28, 28)
# convert our data type to float32 and normalize our data values to the range [0, 1]
x_train = x_train.astype('float32')
x_test = x_test.astype('float32')
x_train /= 255
x_test /= 255

# convert 1-dimensional class arrays (with values 0-9)
# to 10-dimensional class matrices (with values 0-1)
y_train = np_utils.to_categorical(y_train, 10)
y_test = np_utils.to_categorical(y_test, 10)

# define model architecture
model = Sequential()

# input_shape is the shape of each single parameter:
# here it is depth=1, width=28, height=28
# 32 convolution filters
# 3 rows in each convolution kernel
# 3 cols in each convolution kernel
model.add(Convolution2D(32, (3, 3), activation='relu', input_shape=(1, 28, 28), data_format="channels_first"))
model.add(Convolution2D(32, (3, 3), activation='relu', data_format="channels_first"))
# reduce the model's parameters by sliding a 2x2 pooling filter
# across the previous layer and taking the max of the 4 values in the 2x2 filter
model.add(MaxPooling2D(pool_size=(2,2)))
# regularize the model to prevent overfitting
model.add(Dropout(0.25))

# flatten (make 1-dimensional)
model.add(Flatten())
# dense: 128=output size of the layer
model.add(Dense(128, activation='relu'))
model.add(Dropout(0.5))
# final layer has size 10 for the 10 digits
model.add(Dense(10, activation='softmax'))

# compile model (loss function and optimizer)
model.compile(loss='categorical_crossentropy',
              optimizer='adam',
              metrics=['accuracy'])

# fit model on training data
model.fit(x_train, y_train, 
          batch_size=32, nb_epochs=10, verbose=1)

# evaluate model on test data
score = model.evaluate(x_test, y_test, verbose=0)

# save the model. Use load_model(path) to load it later
model.save('/projects/python/mnist/mnist.h5')

使用脚本:

# get the path to the image to be recognized from 1st arg
import sys
if len(sys.argv) < 2:
    print('Missing image file path')
    exit(-1)

# load the pretrained MNIST model
print('Loading model...')
import numpy as np
import keras
model = keras.models.load_model('/projects/python/mnist/mnist.h5')

# load the image ensuring that its size is 28x28
print('Loading image ' + sys.argv[1])
from keras.preprocessing import image
x = image.load_img(sys.argv[1], color_mode='grayscale', target_size=(28, 28))

# convert image to NumPy array
x = image.img_to_array(x, data_format='channels_first')
# add a major dimension to represent the number of samples, i.e. 1
x = x.reshape(1, 1, 28, 28)
# convert data type to float32 and normalize 0-255 values to range [0, 1]
x = x.astype('float32')
x /= 255
print(x.shape)
print(x)

# evaluate
classes = model.predict_classes(x)
print(classes[0])
exit(classes[0])

0 个答案:

没有答案