keras cifar10 dateset的损失并没有降低

时间:2019-06-11 13:34:22

标签: python-3.x tensorflow keras

我在tensorflow中创建了一个类似于vgg的模型,并在keras中使用cifar10对其进行了训练,但损失没有减少,您能找到问题所在吗?

cifar10日期集

from keras.datasets import cifar100, cifar10
from keras.utils import np_utils
from keras.preprocessing.image import ImageDataGenerator


def load_cifar(num_class=100):
    if num_class == 100:
        nb_classes = 100
        (trainX, trainY), (testX, testY) = cifar100.load_data()
    else:
        nb_classes = 10
        (trainX, trainY), (testX, testY) = cifar10.load_data()

    trainX = trainX.astype('float32')
    testX = testX.astype('float32')

    trainX /= 255.
    testX /= 255.

    Y_train = np_utils.to_categorical(trainY, nb_classes)
    Y_test = np_utils.to_categorical(testY, nb_classes)

    generator = ImageDataGenerator(rotation_range=15,
                                   width_shift_range=5. / 32,
                                   height_shift_range=5. / 32,
                                   horizontal_flip=True if num_class ==10 else False)

    generator.fit(trainX, seed=0)
    return trainX, Y_train, testX, Y_test, generator

模型和训练

import sys
import os
sys.path.append(os.path.abspath('../'))
print(sys.path)
import tensorflow as tf
from Input import cifar

BatchSize = 2

def gen_gen(batch_size=BatchSize):
    trainX, Y_train, testX, Y_test, generator = cifar.load_cifar(10)
    gen = generator.flow(trainX, Y_train, batch_size=batch_size)
    return gen


class vgg16TF():
    def __init__(self, ih=32, iw=32, batch_size=BatchSize):
        self.ih = ih
        self.iw = iw
        self.batch_size = batch_size

    def unit(self, x, conv_nums, filters, name=None):
        for i in range(1, conv_nums+1):
            x = tf.layers.conv2d(inputs=x, filters=filters, kernel_size=(5, 5), padding='same',
                                 use_bias=True, activation=tf.nn.relu, name=name+'_conv'+str(i))
        x = tf.layers.max_pooling2d(x, (3, 3), strides=(2, 2), name=name+'_pool')
        return x

    def net(self):
        input_x = tf.placeholder(shape=(self.batch_size, self.ih, self.iw, 3), dtype=tf.float32)
        x = self.unit(input_x, 2, 64, name='blook1')
        x = self.unit(x, 2, 64, name='blook2')
        # x = self.unit(x, 3, 256, name='blook3')
        # x = self.unit(x, 3, 512, name='blook4')
        # x = self.unit(x, 3, 512, name='blook5')
        x = tf.layers.flatten(x, name='flatten')
        x = tf.layers.dense(x, 384, activation=tf.nn.relu, name='fc1', use_bias=True)
        x = tf.layers.dense(x, 192, activation=tf.nn.relu, name='fc2', use_bias=True)
        y = tf.layers.dense(x, 10, name='prediction', use_bias=True)
        print(y)
        return input_x, y

    def loss(self, labels, logits):
        labels = tf.cast(labels, tf.int64)
        cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(
            labels=tf.arg_max(labels,1), logits=logits
        )
        cross_entropy_mean = tf.reduce_mean(cross_entropy)
        tf.add_to_collection('losses', cross_entropy_mean)
        return tf.add_n(tf.get_collection('losses'))


    def train(self):
        input_x, y_ = self.net()
        input_y = tf.placeholder(shape=(self.batch_size, 10), dtype=tf.float64)
        loss = self.loss(input_y, y_)
        optimizer = tf.train.AdamOptimizer().minimize(loss=loss)
        # correct_pred = tf.equal(tf.arg_max(y_, 1), tf.arg_max(input_y, 1))
        # accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))
        with tf.Session() as sess:
            tf.global_variables_initializer().run()
            gen = gen_gen()
            for i in range(10000):
                train_x, train_y = gen.next()
                # print(train_x.shape)
                loss_val, _ = sess.run([loss, optimizer], feed_dict={input_x: train_x, input_y: train_y})
                if i % 10 == 0:
                    print(loss_val)

一些失步的步骤: 2.2985106 2.2944324 2.3120923 2.306837 2.304546 2.2818785 2.3069105 2.3087378 2.3094501 2.2966876 2.3119392 2.2941442 2.2990022 2.2830834 2.3137615

1 个答案:

答案 0 :(得分:0)

我没有在代码中看到任何明显的错误,但是我可以分享我的经验,由于大量的参数,Adam优化器无法始终在类似VGG的网络上正常工作,并且症状是丢失不减少。在这种情况下,您应该使用具有适当学习率和时间表的普通SGD。