MNIST的过度拟合CNN

时间:2019-07-30 15:33:13

标签: tensorflow keras conv-neural-network mnist

我编码了一个简单的CNN来对mnist位数进行分类,这相当简单,但是该模型非常快地过度拟合了

我实现了过高的技术,例如辍学,批处理规范,数据扩充,但是简单的模型却永远无法改善

import tensorflow as tf
import tensorflow
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import random
from PIL import Image

class ConvBlock(tf.keras.layers.Layer):
    """Convolutional Block featuring Conv2D + Pooling"""

    def __init__(self, conv_deep=1, kernels=32, kernel_size=3, pool_size=2, dropout_rate=0.4):
        super(ConvBlock, self).__init__(self)

        self.conv_layers = []
        self.pooling_layers = []
        self.bnorm_layers = []
        self.dropout_layers = []
        for index in range(0, conv_deep):
            self.conv_layers.append(tf.keras.layers.Conv2D(filters=kernels, kernel_size=kernel_size, padding="same", activation="relu"))
            self.pooling_layers.append(tf.keras.layers.MaxPool2D(pool_size=pool_size))
            self.bnorm_layers.append(tf.keras.layers.BatchNormalization())
            self.dropout_layers.append(tf.keras.layers.Dropout(dropout_rate))

    def call(self, inputs, training=False):
        output = inputs

        for (conv, pooling, bnorm, dropout) in zip(self.conv_layers, self.pooling_layers, self.bnorm_layers, self.dropout_layers):
            output = conv(output)
            output = pooling(output)
            output = bnorm(output)

            if training:
                output = dropout(output)

        return output

class DigitsClassifier(tf.keras.Model):
    """MNIST Digit Classifier"""

    def __init__(self):
        super(DigitsClassifier, self).__init__(self)

        self.conv_input = ConvBlock(conv_deep=2, kernels=32)
        self.conv_hiden = ConvBlock(conv_deep=1, kernels=16)

        self.flatten = tf.keras.layers.Flatten()
        self.hiden = tf.keras.layers.Dense(50, "relu")
        self.bnorm = tf.keras.layers.BatchNormalization()
        self.softmax = tf.keras.layers.Dense(10, "softmax")

    def call(self, inputs):

        output = self.conv_input(inputs)
        output = self.conv_hiden(output)

        output = self.flatten(output)
        output = self.hiden(output)
        output = self.bnorm(output)
        output = self.softmax(output)

        return output

#Load Train Data
(train_digits, train_labels), (eval_digits, eval_labels) = tf.keras.datasets.mnist.load_data("./Resources")
kaggle_digits = pd.read_csv("./Resources/test.csv").values

#Preprocess
train_digits = np.reshape(train_digits, [np.shape(train_digits)[0], 28, 28, 1])/255.0
eval_digits = np.reshape(eval_digits, [np.shape(eval_digits)[0], 28, 28, 1])/255.0
kaggle_digits = np.reshape(kaggle_digits, [np.shape(kaggle_digits)[0], 28, 28, 1])/255.0

#Generator
def get_sample(digits, return_labels=False, labels=None):
    if(return_labels):
        if(np.shape(digits)[0] == np.shape(labels)[0]):
            for index in range(0, np.shape(digits)[0]):
                yield (digits[index], labels[index])
        else:
            raise ValueError("Digits and Labels dont have the same numberof samples")
    else:
        for index in range(0, np.shape(digits)[0]):
            yield (digits[index])

def transform_sample(digit, label):
    rot = random.randint(-1, 2)
    t_digit = digit
    t_digit = tf.compat.v2.image.rot90(t_digit, rot)

    return t_digit, label

#Define datasets
train_ds = tf.data.Dataset.from_generator(get_sample, (tf.float32, tf.int32), args=[train_digits, True, train_labels]).map(transform_sample, 100).batch(1000).prefetch(2)
eval_ds = tf.data.Dataset.from_generator(get_sample, (tf.float32, tf.int32), args=[eval_digits, True, eval_labels]).batch(1000).prefetch(2)
kaggle_ds = tf.data.Dataset.from_generator(get_sample, (tf.float32), args=[kaggle_digits]).batch(1000).prefetch(2)


for digits, label in train_ds.take(1):
    print(label)
    sns.regplot(data=digits)
    plt.show()


#Define model and load weights (Pretrained on google colab notebook)
model = DigitsClassifier()
model.compile(tf.keras.optimizers.Adadelta(7.0), tf.keras.losses.SparseCategoricalCrossentropy())
model.fit(train_ds, epochs=50, verbose=2, validation_data=eval_ds)

在这一点上,我真的不知道该怎么做,我会降低模型的复杂性,但我认为这不会有所帮助

PD:违反直觉,请停止使用数据增强技术来改善模型,我的数据增强简单地包含在一个映射函数transform_sample中,该映射函数对每个图像执行随机90度旋转,或者完全不旋转< / p>

1 个答案:

答案 0 :(得分:1)

由于模型过度拟合,您可以

    通过使用[Object Object]中的Shuffle
  1. shuffle=True数据。代码如下所示:

    cnn_model.fit

  2. 使用model.fit(train_ds, epochs=50, verbose=2, shuffle = True, validation_data=eval_ds)。代码如下所示

    Early Stopping

    callback = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=15)

  3. 使用model.fit(train_ds, epochs=50, verbose=2, callbacks=[callback], validation_data=eval_ds)Regularization的代码如下所示(您也可以尝试Regularizationl1 Regularization)。相同的代码如下所示。

l1_l2 Regularization

from tensorflow.keras.regularizers import l2

Regularizer = l2(0.001)

self.conv_layers.append(tf.keras.layers.Conv2D(filters=kernels, kernel_size=kernel_size, padding="same", activation="relu", activity_regularizer=Regularizer, kernel_regularizer=Regularizer))

self.hiden = tf.keras.layers.Dense(50, "relu", activity_regularizer=Regularizer, kernel_regularizer=Regularizer))

  1. 最后,如果仍然没有更改,则可以尝试使用self.softmax = tf.keras.layers.Dense(10, "softmax", activity_regularizer=Regularizer, kernel_regularizer=Regularizer)Pre-Trained Models之类的ResNet,等等。