train_loss和test_loss之间的巨大差距

时间:2018-07-02 08:59:42

标签: tensorflow lstm

我写了一个带有tensorflow的crnn模型。我使用参数trainableis_trainingreuse在训练或测试中设置模型。 但是,我在train_losstest_loss之间留有很大的距离。这是我的模型:

class CRNN:
def __init__(self, is_training, trainable, reuse, nclass):
    self.is_training = is_training
    self.trainable = trainable
    self.reuse = reuse
    self.nclass = nclass

def build_net(self, images):
    #images = tf.placeholder(dtype=tf.float32, shape=[32, 32, 592, 3], name='input')
    initializer = slim.xavier_initializer()
    reuse = self.reuse
    trainable = self.trainable
    is_training = self.is_training
    nclass = self.nclass

    with tf.variable_scope('CNN', reuse=reuse):
        # conv0 relu0 pooling0
        with tf.variable_scope('layer1', reuse=reuse):
            net = slim.conv2d(images, 64, 3, 1, 'SAME', activation_fn=tf.nn.relu, weights_initializer=initializer,
                              trainable=trainable)
            net = slim.max_pool2d(net, 2, 2, 'VALID')


        # conv1 relu1 pooling1
        with tf.variable_scope('layer2', reuse=reuse):
            net = slim.conv2d(net, 128, 3, 1, 'SAME', activation_fn=tf.nn.relu, weights_initializer=initializer,
                              trainable=trainable)
            net = slim.max_pool2d(net, 2, 2, 'VALID')

        # conv2 bn2 relu2
        with tf.variable_scope('layer3', reuse=reuse):
            net = slim.layers.conv2d(net, 256, 3, 1, 'SAME', activation_fn=tf.nn.relu, weights_initializer=initializer,
                                     trainable=trainable)
            net = slim.batch_norm(net, scale=True, trainable=trainable, is_training=is_training)
            net = tf.nn.relu(net)

        # conv3 relu3 pooling2
        with tf.variable_scope('layer4', reuse=reuse):
            net = slim.conv2d(net, 256, 3, 1, 'SAME', activation_fn=tf.nn.relu, weights_initializer=initializer,
                              trainable=trainable)
            net = slim.max_pool2d(net, 2, 2, 'VALID')

        # conv4 bn4 relu4
        with tf.variable_scope('layer5', reuse=reuse):
            net = slim.conv2d(net, 512, 3, 1, 'SAME', activation_fn=tf.nn.relu, weights_initializer=initializer,
                              trainable=trainable)
            net = slim.batch_norm(net, scale=True, trainable=trainable, is_training=is_training)
            net = tf.nn.relu(net)

        # cov5 relu5 pooling3
        with tf.variable_scope('layer6', reuse=reuse):
            net = slim.conv2d(net, 512, 3, 1, 'SAME', activation_fn=tf.nn.relu, weights_initializer=initializer,
                              trainable=trainable)
            # [batch, height, width, channel]
            paddings = tf.constant([[0, 0], [0, 0], [1, 1], [0, 0]])
            net = tf.pad(net, paddings, "CONSTANT")
            net = slim.max_pool2d(net, 2, (2, 1))

        # conv6 bn6 relu6
        with tf.variable_scope('layer7', reuse=reuse):
            net = slim.conv2d(net, 512, 2, 1, 'VALID', activation_fn=tf.nn.relu, weights_initializer=initializer,
                              trainable=trainable)
            net = slim.batch_norm(net, scale=True, trainable=trainable, is_training=is_training)
            net = tf.nn.relu(net)

    shape = net.get_shape().as_list()
    assert shape[1] == 1  # H of the feature map must equal to 1
    net = tf.squeeze(net, axis=1)

    with tf.variable_scope('RNN', reuse=reuse):
        with tf.variable_scope('BiLSTM1', reuse=reuse):
            # Backward direction cells
            fw_cell_list = [tf.contrib.rnn.BasicLSTMCell(256, forget_bias=1.0, reuse=reuse)]
            # Backward direction cells
            bw_cell_list = [tf.contrib.rnn.BasicLSTMCell(256, forget_bias=1.0, reuse=reuse)]
            net, _, _ = tf.contrib.rnn.stack_bidirectional_dynamic_rnn(fw_cell_list, bw_cell_list, net, dtype=tf.float32)
        with tf.variable_scope('embeings1', reuse=reuse):
            net = slim.fully_connected(net, 256)

        with tf.variable_scope('BiLSTM2', reuse=reuse):
            # Backward direction cells
            fw_cell_list = [tf.contrib.rnn.BasicLSTMCell(256, forget_bias=1.0, reuse=reuse)]
            # Backward direction cells
            bw_cell_list = [tf.contrib.rnn.BasicLSTMCell(256, forget_bias=1.0, reuse=reuse)]
            net, _, _ = tf.contrib.rnn.stack_bidirectional_dynamic_rnn(fw_cell_list, bw_cell_list, net, dtype=tf.float32)
        with tf.variable_scope('embeings2', reuse=reuse):
            net = slim.fully_connected(net, nclass)
    return net

我使用以下代码创建training_model和test_model:

crnn_train = CRNN(True, True, False, 13)
crnn_val = CRNN(False, False, True, 13)

以及我的train_loss和test_loss:

setps: 500, train_loss = 39.6001, test_loss = 0.0000
setps: 1000, train_loss = 34.9743, test_loss = 114.3018
setps: 1500, train_loss = 26.7536, test_loss = 114.3018
setps: 2000, train_loss = 12.9428, test_loss = 107.1753
setps: 2500, train_loss = 6.2388, test_loss = 107.1753
setps: 3000, train_loss = 3.3221, test_loss = 98.5681
setps: 3500, train_loss = 1.6190, test_loss = 98.5681
setps: 4000, train_loss = 0.8410, test_loss = 70.5069

顺便说一句,我想知道如何设置LSTM的trainable参数。 有人可以帮忙吗?

0 个答案:

没有答案