使用CNN的时尚MNIST是否过拟合?

时间:2019-11-14 13:59:57

标签: python tensorflow keras neural-network conv-neural-network

cnn_model = Sequential()

cnn_model.add(Conv2D(64,3, 3, input_shape = (28,28,1), activation='relu'))
cnn_model.add(MaxPooling2D(pool_size = (2, 2)))

cnn_model.add(Dropout(0.25))

cnn_model.add(Flatten())
cnn_model.add(Dense(output_dim = 32, activation = 'relu'))
cnn_model.add(Dense(output_dim = 10, activation = 'sigmoid'))

cnn_model.compile(loss ='sparse_categorical_crossentropy', optimizer=Adam(lr=0.001),metrics =['accuracy'])

epochs = 50

history = cnn_model.fit(X_train,
                        y_train,
                        batch_size = 512,
                        nb_epoch = epochs,
                        verbose = 1,
                        validation_data = (X_validate, y_validate))

我最终得到这个结果:

Epoch 50/50
48000/48000 [==============================] - 35s 728us/step - loss: 0.1265 - accuracy: 0.9537 - val_loss: 0.2425 - val_accuracy: 0.9167
training loss=0.125 ,validation loss=0.2425
training accuracy=95.3% ,validation accuracy=91.67

我的问题如下:

  1. 模型是过拟合还是欠拟合?
  2. 我应该增加数字吗?时代?

Graph of Losses

1 个答案:

答案 0 :(得分:2)

由于模型过拟合,您可以

    通过使用Shuffle中的shuffle=True
  1. cnn_model.fit数据。
  2. 使用Early Stopping
  3. 使用Regularization

使用与您的体系结构相同的完整代码,并减少过度拟合的情况,如下所示。损失有所增加,但是我们可以通过添加更多的卷积和池化层来改善它。

# To support both python 2 and python 3
from __future__ import division, print_function, unicode_literals
from io import open

# Common imports
import numpy as np
import os
import tensorflow as tf
from tensorflow.keras.layers import Input, Conv2D, MaxPool2D, Dense, Dropout, Flatten
from tensorflow.keras.models import Sequential
from tensorflow.keras.optimizers import Adam
import matplotlib.pyplot as plt
from tensorflow.keras.regularizers import l2
from matplotlib.pyplot import axis as ax

tf.__version__

X = tf.placeholder(tf.float32, shape=[None, 784], name="X")
X_reshaped = tf.reshape(X, shape=[-1, 28, 28, 1])
y = tf.placeholder(tf.int32, shape=[None], name="y")

# instantiate regularizer
Regularizer = l2(0.001)

cnn_model = Sequential()

cnn_model.add(Conv2D(64,3, 3, input_shape = (28,28,1), activation='relu', data_format='channels_last', 
                    activity_regularizer=Regularizer, kernel_regularizer=Regularizer))

cnn_model.add(MaxPool2D(pool_size = (2, 2)))

cnn_model.add(Dropout(0.25))

cnn_model.add(Flatten())

cnn_model.add(Dense(units = 32, activation = 'relu', 
                    activity_regularizer=Regularizer, kernel_regularizer=Regularizer))

cnn_model.add(Dense(units = 10, activation = 'sigmoid', 
                    activity_regularizer=Regularizer, kernel_regularizer=Regularizer))

cnn_model.compile(loss ='sparse_categorical_crossentropy', optimizer=Adam(lr=0.001),metrics =['accuracy'])

epochs = 50

cnn_model.summary()

(X_train, y_train), (X_test, y_test) = tf.keras.datasets.fashion_mnist.load_data()
X_train = X_train.astype(np.float32).reshape(-1, 28*28) / 255.0
X_train_reshaped = tf.reshape(X_train, shape=[-1, 28, 28, 1])
X_test = X_test.astype(np.float32).reshape(-1, 28*28) / 255.0
X_test_reshaped = tf.reshape(X_test, shape=[-1, 28, 28, 1])

y_train = tf.cast(y_train, dtype = tf.int32)

y_test = tf.cast(y_test, dtype = tf.int32)

def shuffle_batch(X, y, batch_size):
    rnd_idx = np.random.permutation(len(X))
    n_batches = len(X) // batch_size
    for batch_idx in np.array_split(rnd_idx, n_batches):
        X_batch, y_batch = X[batch_idx], y[batch_idx]
        yield X_batch, y_batch

#steps_per_epoch = X_train_reshaped.shape[0]//512
steps_per_epoch = X_train_reshaped.shape[0].value//512

epochs = 50

callback = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=15)

history = cnn_model.fit(x = X_train_reshaped,
                        y = y_train,
                        batch_size = 512,
                        epochs = epochs, callbacks=[callback],
                        verbose = 1, validation_data = (X_test_reshaped, y_test),
                        validation_steps = 10, steps_per_epoch=steps_per_epoch, shuffle = True)

print(history.history.keys())

#  "Accuracy"
plt.plot(history.history['acc'])
plt.plot(history.history['val_acc'])
plt.title('model accuracy')
plt.ylabel('accuracy')
plt.xlabel('epoch')
plt.legend(['train', 'validation'], loc='upper left')
plt.show()
# "Loss"
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('model loss')
axes = plt.axes()
axes.set_ylim([0.2, 1.5])
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['train', 'validation'], loc='upper left')
plt.show()