深度学习训练的准确性正在提高,但验证结果几乎是随机的结果

时间:2018-05-07 15:04:34

标签: tensorflow deep-learning batch-normalization

当我使用VGG16模型训练分类器时,我遇到了问题。

当我在转换层之后使用批量标准化时,训练精度似乎在增加,但是,验证准确性总是不会增加。

我也做了一个比较实验。当我删除代码with slim.arg_scope([slim.conv2d], normalizer_fn=slim.batch_norm, normalizer_params=batch_norm_params):时,性能正常,验证准确性也在提高。

环境:

  • Tensorflow 1.4.0
  • affiNist数据集

代码模型

def VGG16Model(input_tensor, weight_decay, is_training):
    batch_norm_params = {
    'decay': 0.997,
    'epsilon': 1e-5,
    'scale': True,
    'is_training': is_training
    }
        with slim.arg_scope(vgg.vgg_arg_scope(weight_decay)):
    with slim.arg_scope([slim.conv2d], normalizer_fn=slim.batch_norm, normalizer_params=batch_norm_params):
        input_tensor = tf.image.resize_images(input_tensor, [224, 224])
        input_tensor = tf.concat([input_tensor, input_tensor, input_tensor], axis=-1)
        _, end_points = vgg.vgg_16(input_tensor, is_training=is_training)
        logits = end_points['vgg_16/pool5']
        feature = tf.reduce_mean(logits, reduction_indices=[1, 2])
        fc1 = tf.contrib.layers.fully_connected(feature, num_outputs=1024)
        fc2 = tf.contrib.layers.fully_connected(fc1, num_outputs=512)
        fc3 = tf.contrib.layers.fully_connected(fc2, num_outputs=10)
        return fc3

代码列车

def train():
    batch_size = 64
    train_total = 1920000
    epoch_num = 5
    input_image_tensor = tf.placeholder(dtype=tf.float32, shape=[None, 40, 40, 1], name='image-input')
    input_label_tensor = tf.placeholder(dtype=tf.int32, shape=[None, 10], name='label-input')
    global_step = tf.Variable(initial_value=0, trainable=False)
    predicted_tensor = VGG16Model(input_image_tensor, weight_decay=1e-5, is_training=True)
    cross_entropy_loss = tf.reduce_mean(
    tf.nn.softmax_cross_entropy_with_logits(logits=predicted_tensor, labels=input_label_tensor))

    optimizer = tf.train.AdamOptimizer(learning_rate=0.000005)
    train_op = optimizer.minimize(cross_entropy_loss, global_step=global_step)
    accuracy = tf.contrib.metrics.accuracy(labels=tf.argmax(input_label_tensor, axis=1),

    predictions=tf.argmax(tf.nn.softmax(predicted_tensor), axis=1))
    saver = tf.train.Saver(tf.global_variables(), max_to_keep=10)

    reader = Reader(
    '/home/give/homework/cv/dataset/affiNist/training_and_validation_batches',
    '/home/give/homework/cv/dataset/affiNist/test.mat',
    batch_size=batch_size
    )
    print 'reader operation finished!'
    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True
    if FLAGS.pretrained_flag:
        variable_restore_op = slim.assign_from_checkpoint_fn(FLAGS.pretrained_model_path,
                                                         slim.get_trainable_variables(),
                                                         ignore_missing_vars=True)
with tf.Session(config=config) as sess:
    init_op = tf.global_variables_initializer()
    sess.run(init_op)
    if FLAGS.pretrained_flag:
        print variable_restore_op
        variable_restore_op(sess)
    if FLAGS.restore_flag:
        ckpt = tf.train.latest_checkpoint(FLAGS.restore_model_path)
        print('continue training from previous checkpoint from %s' % ckpt)
        start_step = int(os.path.basename(ckpt).split('-')[1])
        variable_restore_op = slim.assign_from_checkpoint_fn(ckpt,
                                                             slim.get_trainable_variables(),
                                                             ignore_missing_vars=True)
        variable_restore_op(sess)
        sess.run(tf.assign(global_step, start_step))

    step = 0
    for epoch_id in range(epoch_num):
        start = 0
        while start < train_total:
            step += 1
            end = start + batch_size
            if end > train_total:
                end = train_total
            cur_batch_images = reader.train_images[start:end]
            cur_batch_labels = reader.train_labels[start:end]
            feed_dict = {
                input_image_tensor: np.expand_dims(cur_batch_images, axis=3),
                input_label_tensor: keras.utils.to_categorical(cur_batch_labels, num_classes=10)
            }
            _, train_loss, train_acc = sess.run([train_op, cross_entropy_loss, accuracy], feed_dict=feed_dict)
            print '[%d --> %d] / %d, training loss: %.4f, training acc: %.4f' % (
                start, end, train_total, train_loss, train_acc)
            start = end
            if step % 100 == 0:
                print 'save model at ', FLAGS.save_model_path + 'model.ckpt'
                saver.save(sess, FLAGS.save_model_path + 'model.ckpt', global_step=global_step)

代码的测试

def test():
    # tensor
    input_image_tensor = tf.placeholder(dtype=tf.float32, shape=[None, 40, 40, 1], name='image-input')
    predicted_tensor = VGG16Model(input_image_tensor, is_training=False, weight_decay=1e-5)
    predicted_tensor = tf.nn.softmax(predicted_tensor)
    global_step = tf.Variable(initial_value=0, trainable=False)

    # hyper-parameters
    test_total = 320000
    batch_size = 70
    epoch_num = 1

    reader = Reader(
    '/home/give/homework/cv/dataset/affiNist/training_and_validation_batches',
    '/home/give/homework/cv/dataset/affiNist/test.mat',
    batch_size=batch_size
    )
    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True
    with tf.Session(config=config) as sess:
        init_op = tf.global_variables_initializer()
        sess.run(init_op)

        ckpt = tf.train.latest_checkpoint(FLAGS.restore_model_path)
        print('continue training from previous checkpoint from %s' % ckpt)
        start_step = int(os.path.basename(ckpt).split('-')[1])
        variable_restore_op = slim.assign_from_checkpoint_fn(ckpt,
                                                         slim.get_trainable_variables(),
                                                         ignore_missing_vars=True)
        variable_restore_op(sess)
        sess.run(tf.assign(global_step, start_step))
        start = 0
        predicted = []
        while start < test_total:
            end = start + batch_size
            if end > test_total:
                end = test_total
            cur_batch_images = reader.test_images[start:end]
            predicted_array = sess.run(tf.argmax(predicted_tensor, axis=1), feed_dict={
            input_image_tensor: np.expand_dims(cur_batch_images, axis=3)
        })
            predicted.extend(predicted_array)
            print 'Batch Accuracy[%d, %d] : %.4f' % (
            start, test_total, np.mean(np.asarray(predicted_array == reader.test_labels[start:end], np.float32)))
        start = end
    predicted = np.array(predicted)
        print 'Total Accuracy: ', np.mean(np.asarray(predicted == reader.test_labels, np.float32))
        calculate_acc_error(predicted, reader.test_labels)

0 个答案:

没有答案