Question

我正在关注此站点的代码：

https://blog.luisfred.com.br/reconhecimento-de-escrita-manual-com-redes-neurais-convolucionais/

以下是网站浏览的代码：

from keras. datasets import mnist
from keras. models import Sequential
from keras. layers import Dense
from keras. layers import Dropout
from keras. layers import Flatten
import numpy as np
from matplotlib import pyplot as plt
from keras. layers . convolutional import Conv2D
from keras. layers . convolutional import MaxPooling2D
from keras. utils import np_utils
from keras import backend as K
K . set_image_dim_ordering ( 'th' )
import cv2
import matplotlib. pyplot as plt
#% inline matplotlib # If you are using Jupyter, it will be useful for plotting graphics or figures inside cells

#Divided the data into subsets of training and testing.
( X_train , y_train ) , ( X_test , y_test ) = mnist. load_data ( )
# Since we are working in gray scale we can
# set the depth to the value 1.
X_train = X_train . reshape ( X_train . shape [ 0 ] , 1 , 28 , 28 ) . astype ( 'float32' )
X_test = X_test . reshape ( X_test . shape [ 0 ] , 1 , 28 , 28 ) . astype ( 'float32' )
# We normalize our data according to the
# gray scale. The floating point values are in the range [0,1], instead of [.255]
X_train = X_train / 255
X_test = X_test / 255
# Converts y_train and y_test, which are class vectors, to a binary class array (one-hot vectors)
y_train = np_utils. to_categorical ( y_train )
y_test = np_utils. to_categorical ( y_test )
# Number of digit types found in MNIST. In this case, the value is 10, corresponding to (0,1,2,3,4,5,6,7,8,9).
num_classes = y_test. shape [ 1 ]


def deeper_cnn_model ( ) :
    model = Sequential ( )
    # Convolution2D will be our input layer. We can observe that it has
    # 30 feature maps with size of 5 × 5 and an activation function of type ReLU.
    model.add ( Conv2D ( 30 , ( 5 , 5 ) , input_shape = ( 1 , 28 , 28 ) , activation = 'relu' ) )
    # The MaxPooling2D layer will be our second layer where we will have a sample window of size 2 x 2
    model.add ( MaxPooling2D ( pool_size = ( 2 , 2 ) ) )
    # A new convolutional layer, with 15 feature maps of size 3 × 3, and activation function ReLU
    model.add ( Conv2D ( 15 , ( 3 , 3 ) , activation = 'relu' ) )
    # A new subsampling with a 2x2 dimension pooling.
    model.add ( MaxPooling2D ( pool_size = ( 2 , 2 ) ) )

    # We include a dropout with a 20% probability (you can try other values)
    model.add ( Dropout ( 0.2 ) )
    # We need to convert the output of the convolutional layer, so that it can be used as input to the densely connected layer that is next.
    # What this does is "flatten / flatten" the structure of the output of the convolutional layers, creating a single long vector of features
    # that will be used by the Fully Connected layer.
    model.add ( Flatten ( ) )
    # Fully connected layer with 128 neurons.
    model.add ( Dense ( 128 , activation = 'relu' ) )
    # Followed by a new fully connected layer with 64 neurons
    model.add ( Dense ( 64 , activation = 'relu' ) )

    # Followed by a new fully connected layer with 32 neurons
    model.add ( Dense ( 32 , activation = 'relu' ) )
    # The output layer has the number of neurons compatible with the
    # number of classes to be obtained. Notice that we are using a softmax activation function,
    model.add ( Dense ( num_classes, activation = 'softmax' , name = 'preds' ) )
    # Configure the entire training process of the neural network
    model.compile ( loss = 'categorical_crossentropy' , optimizer = 'adam' , metrics = [ 'accuracy' ] )

    return model


model = deeper_cnn_model ( )
model.summary ( )
model.fit ( X_train , y_train, validation_data = ( X_test , y_test ) , epochs = 10 , batch_size = 200 )
scores = model. evaluate ( X_test , y_test, verbose = 0 )
print ( "\ nacc:% .2f %%" % (scores [1] * 100))


###enhance to check multiple numbers after the training is done

img_pred = cv2. imread ( 'five.JPG' ,   0 )

plt.imshow(img_pred, cmap='gray')
# forces the image to have the input dimensions equal to those used in the training data (28x28)
if img_pred. shape != [ 28 , 28 ] :
    img2 = cv2. resize ( img_pred, ( 28 , 28 ) )
    img_pred = img2. reshape ( 28 , 28 , - 1 ) ;
else :
    img_pred = img_pred. reshape ( 28 , 28 , - 1 ) ;

# here also we inform the value for the depth = 1, number of rows and columns, which correspond 28x28 of the image.
img_pred = img_pred. reshape ( 1 , 1 , 28 , 28 )
pred = model. predict_classes ( img_pred )
pred_proba = model. predict_proba ( img_pred )
pred_proba = "% .2f %%" % (pred_proba [0] [pred] * 100)
print ( pred [ 0 ] , "with probability of" , pred_proba )

在最后，我尝试对我绘制和导入的数字5进行预测（我尝试过使用其他手绘数字以及同样差的结果）：

img_pred = cv2. imread ( 'five.JPG' ,   0 )

plt.imshow(img_pred, cmap='gray')
# forces the image to have the input dimensions equal to those used in the training data (28x28)
if img_pred. shape != [ 28 , 28 ] :
    img2 = cv2. resize ( img_pred, ( 28 , 28 ) )
    img_pred = img2. reshape ( 28 , 28 , - 1 ) ;
else :
    img_pred = img_pred. reshape ( 28 , 28 , - 1 ) ;

# here also we inform the value for the depth = 1, number of rows and columns, which correspond 28x28 of the image.
img_pred = img_pred. reshape ( 1 , 1 , 28 , 28 )
pred = model. predict_classes ( img_pred )
pred_proba = model. predict_proba ( img_pred )
pred_proba = "% .2f %%" % (pred_proba [0] [pred] * 100)
print ( pred [ 0 ] , "with probability of" , pred_proba )

以下是five.jpg：

hand drawn five image

但是当我输入自己的号码时，模型预测错误。有什么想法可能是这样吗？我承认我是ML的新手，刚开始涉足它。我的想法可能是图像的居中或图像的标准化是关闭的？非常感谢任何帮助！

EDIT1：

MNIST测试编号看起来像这样：

white numbers black backgrounds

Answer 1

看起来您有两个问题，正如您所怀疑的那样，这些问题与您的数据预处理有关。

首先，您的图像相对于训练数据是反转的：

在使用img_pred = cv2. imread ( 'five.JPG' , 0 )读取.jpg的一个频道后，背景像素接近白色，其值在215-238附近。
如果您查看X_train中的训练数据，背景像素全部为零，数字为白色或近白色（上部为210-255）。

尝试在X_train的某些选项旁边绘制图像，然后您会看到它们被反转。

另一个问题是cv2.resize()中的默认插值不会保留数据的缩放。调整数据大小后，最小值会跳到60，而不是0.在重新缩放步骤之前和之后比较img.pred.min()和img.pred.max()的值。

您可以使用如下函数反转和缩放数据，使其看起来更像MNIST输入数据：

 def mnist_bytescale(image):
    # Use float for rescaling
    img_temp = image.astype(np.float32)
    #Re-zero the data
    img_temp -= img_temp.min()
    #Re-scale and invert
    img_temp /= (img_temp.max()-img_temp.min())
    img_temp *= 255
    return 255 - img_temp.astype('uint')

这将翻转您的数据，并将其从0到255线性缩放，就像网络正在训练的数据一样。但是，如果您绘制mnist_bytescale(img_pred)，您会注意到大多数像素中的背景级别仍然不是0，因为原始图像的背景级别不是恒定的（可能是由于JPEG压缩。）如果您的网络仍然对于此翻转和缩放数据存在问题，您可以尝试使用np.clip将背景级别清零，看看是否有帮助。

Python机器学习数字识别

1 个答案: