这是我的代码:
数据集:mnist;
型号:resnet
基于tensorflow 1.15.0
在resnet模型训练期间,我在实验中注意到了这种行为。训练损失减少,并且训练精度在0.99左右增加。但是在测试过程中,测试数据的精度为0.06,与训练精度有很大差异。
import tensorflow as tf
import tensorflow_datasets as tfds
import numpy as np
np.set_printoptions(threshold=np.inf)
np.random.seed(2020)
tf.set_random_seed(2020)
class ModelVGG11(object):
def __init__(self, data_train, data_test, session):
self.data_train = data_train
self.data_test = data_test
self.sess = session
self.num_epochs = 10
self.num_class = 10
self.batch_size = 256
self.learning_rate = 0.01
self.__def_placeholders()
self.__initial_dataset()
self.__def_model()
self.__def_loss()
self.__def_optimizer()
self.__def_metrics()
def __def_placeholders(self):
self.inputs = tf.placeholder(tf.float32, shape=(None, 224, 224, 1), name='model_input')
self.label = tf.placeholder(tf.int64, shape=(None, ), name='model_label')
self.is_training = tf.placeholder(tf.bool, shape=(), name='is_training')
self.handle = tf.placeholder(tf.string, shape=())
self.global_step = tf.Variable(0, trainable=False)
def __initial_dataset(self):
def __image_resize(data):
image = tf.cast(data['image'], tf.float32)
# image = image / 255
image = tf.image.resize_image_with_pad(image, 224, 224)
return image, data['label']
self.data_train = self.data_train.map(__image_resize).shuffle(buffer_size=1000).batch(self.batch_size).prefetch(1)
self.data_test = self.data_test.map(__image_resize).shuffle(buffer_size=1000).batch(self.batch_size).prefetch(1)
self.iterator_train = self.data_train.make_initializable_iterator()
self.iterator_test = self.data_test.make_initializable_iterator()
self.handle_train = self.iterator_train.string_handle()
self.handle_test = self.iterator_test.string_handle()
iterator = tf.data.Iterator.from_string_handle(self.handle, self.data_train.output_types, self.data_train.output_shapes)
self.next_batch = iterator.get_next()
def __res_block(self, inputs, out_channels, conv1x1=False, strides=1, name=None):
with tf.variable_scope(name):
outputs = tf.layers.Conv2D(out_channels, kernel_size=3, strides=strides, padding='same', activation=None, kernel_initializer=tf.glorot_normal_initializer)(inputs)
outputs = tf.layers.BatchNormalization()(outputs, training=self.is_training)
outputs = tf.nn.relu(outputs)
outputs = tf.layers.Conv2D(out_channels, kernel_size=3, padding='same', activation=None, kernel_initializer=tf.glorot_normal_initializer)(outputs)
outputs = tf.layers.BatchNormalization()(outputs, training=self.is_training)
# change input channels equal to out_channels
if conv1x1:
inputs = tf.layers.Conv2D(out_channels, kernel_size=1, strides=strides)(inputs)
return tf.nn.relu(outputs + inputs)
def __def_model(self):
with tf.variable_scope('resnet'):
# [batch, 224, 224, 1]
inputs = tf.layers.Conv2D(filters=64, kernel_size=7, strides=2, padding='same', activation=None, kernel_initializer=tf.glorot_normal_initializer)(self.inputs) # [batch, 112, 112, 64]
inputs = tf.layers.BatchNormalization()(inputs, training=self.is_training)
inputs = tf.nn.relu(inputs)
inputs = tf.layers.MaxPooling2D(pool_size=3, strides=2, padding='same')(inputs) # [batch, 56, 56, 64]
inputs = self.__res_block(inputs, 64, False, 1, 'res_bolock_1') # [batch, 56, 56, 64]
inputs = self.__res_block(inputs, 128, True, 2, 'res_bolock_2') # [batch, 28, 28, 128]
inputs = self.__res_block(inputs, 256, True, 2, 'res_bolock_3') # [batch, 14, 14, 256]
inputs = self.__res_block(inputs, 512, True, 2, 'res_bolock_4') # [batch, 7, 7, 512]
inputs = tf.reduce_mean(inputs, axis=[1, 2])
outputs = tf.layers.Dense(units=10, activation=None)(inputs)
self.outputs = outputs
def __def_loss(self):
y_true = tf.cast(tf.one_hot(self.label, self.num_class), tf.float32)
loss = tf.nn.softmax_cross_entropy_with_logits(labels=y_true, logits=self.outputs)
self.loss = tf.reduce_mean(loss)
tf.summary.scalar('loss', self.loss)
def __def_optimizer(self):
self.optimizer = tf.train.AdamOptimizer(self.learning_rate).minimize(self.loss, global_step=self.global_step)
def __def_metrics(self):
y_pred_label = tf.arg_max(tf.nn.softmax(self.outputs), 1)
# self.acc = tf.metrics.accuracy(labels=self.label, predictions=y_pred_label, name='acc')
self.acc = tf.reduce_mean(tf.cast(tf.equal(y_pred_label, self.label), tf.float32))
tf.summary.scalar('acc', self.acc)
def train_and_evaluate(self):
# merge_summary = tf.summary.merge_all()
# summary_writer = tf.summary.FileWriter(self.summary_file, self.sess.graph)
# summary_writer.add_summary(train_summary, step)
self.sess.run(tf.global_variables_initializer())
self.sess.run(tf.local_variables_initializer())
handle_train, handle_test = self.sess.run([self.handle_train, self.handle_test])
for i in range(self.num_epochs):
epoch_train_metrics = []
try:
self.sess.run(self.iterator_train.initializer)
while True:
batch_x, batch_y = self.sess.run(self.next_batch, feed_dict={self.handle: handle_train})
loss, acc, _ = self.sess.run([self.loss, self.acc, self.optimizer], feed_dict={self.inputs: batch_x, self.label: batch_y, self.is_training: True})
epoch_train_metrics.append((loss, acc))
except tf.errors.OutOfRangeError:
try:
self.sess.run(self.iterator_test.initializer)
while True:
batch_x_test, batch_y_test = self.sess.run(self.next_batch, feed_dict={self.handle: handle_test})
acc = self.sess.run(self.acc, feed_dict={self.inputs: batch_x_test, self.label: batch_y_test, self.is_training: False})
except tf.errors.OutOfRangeError:
print('epoch {} train minibatch loss and acc: {}, test minibatch acc: {}'.format(i + 1, np.mean(epoch_train_metrics, axis=0), acc))
if __name__ == "__main__":
mnist_data = tfds.load('mnist')
mnist_train, mnist_test = mnist_data['train'], mnist_data['test']
with tf.Session() as sess:
model = ModelVGG11(mnist_train, mnist_test, sess)
model.train_and_evaluate()
结果如下:
epoch 1 train minibatch loss and acc: [0.45032835 0.8764905 ], test minibatch acc: 0.0
epoch 2 train minibatch loss and acc: [0.06525008 0.9811669 ], test minibatch acc: 0.0
epoch 3 train minibatch loss and acc: [0.04049642 0.9874501 ], test minibatch acc: 0.125
epoch 4 train minibatch loss and acc: [0.02956291 0.99075246], test minibatch acc: 0.0
epoch 5 train minibatch loss and acc: [0.02403079 0.99252546], test minibatch acc: 0.0625
epoch 6 train minibatch loss and acc: [0.02128655 0.9933344 ], test minibatch acc: 0.0625
epoch 7 train minibatch loss and acc: [0.01614667 0.9947141 ], test minibatch acc: 0.0625
epoch 8 train minibatch loss and acc: [0.01534516 0.99461436], test minibatch acc: 0.0625
epoch 9 train minibatch loss and acc: [0.01119067 0.9964262 ], test minibatch acc: 0.125
epoch 10 train minibatch loss and acc: [0.0108306 0.9965314], test minibatch acc: 0.0625
我只想知道为什么会这样?我的代码有任何错误吗?