Tensorflow CNN模型在训练集中表现良好,但验证集很差

时间:2017-12-07 15:12:28

标签: python machine-learning tensorflow neural-network

我使用tensorflow构建了一个cnn模型(回归)。但它在列车组中表现良好,但在验证/测试集中表现不佳。下面是损失曲线,红线是火车组的损失,蓝线是验证集:

image

这是否过度拟合?我尝试在其中使用规则化和退出,结果不会改变。

这是我的模型和损失函数:

def weight_variable(shape, reg = False):
    initial = tf.truncated_normal(shape, stddev=0.1)
    #initial = tf.random_uniform(shape)
    w = tf.Variable(initial)
    if reg:
        tf.add_to_collection('reg', w)
    return w

def bias_variable(shape):
    initial = tf.constant(0.1, shape=shape)
    return tf.Variable(initial)

def conv2d(x, W, stride):
    return tf.nn.conv2d(x, W, strides=[1, stride, stride, 1], padding='VALID')

def cnn_steer():

    with tf.name_scope('input'):
        x = tf.placeholder(tf.float32, shape=[None, 66, 200, 3])
        y_ = tf.placeholder(tf.float32, shape=[None])

    with tf.name_scope('keep_prob'):
        keep_prob = tf.placeholder(tf.float32)

    x_image = x

    with tf.name_scope('conv1'):
        # first convolutional layer
        W_conv1 = weight_variable([5, 5, 3, 24])
        b_conv1 = bias_variable([24])

        h_conv1 = tf.nn.relu(conv2d(x_image, W_conv1, 2) + b_conv1)


    with tf.name_scope('conv2'):
        # second convolutional layer
        W_conv2 = weight_variable([5, 5, 24, 36])
        b_conv2 = bias_variable([36])

        h_conv2 = tf.nn.relu(conv2d(h_conv1, W_conv2, 2) + b_conv2)


    with tf.name_scope('conv3'):
        # third convolutional layer
        W_conv3 = weight_variable([5, 5, 36, 48])
        b_conv3 = bias_variable([48])

        h_conv3 = tf.nn.relu(conv2d(h_conv2, W_conv3, 2) + b_conv3)


    with tf.name_scope('conv4'):
        # fourth convolutional layer
        W_conv4 = weight_variable([3, 3, 48, 64])
        b_conv4 = bias_variable([64])

        h_conv4 = tf.nn.relu(conv2d(h_conv3, W_conv4, 1) + b_conv4)


    with tf.name_scope('conv5'):
        # fifth convolutional layer
        W_conv5 = weight_variable([3, 3, 64, 64])
        b_conv5 = bias_variable([64])

        h_conv5 = tf.nn.relu(conv2d(h_conv4, W_conv5, 1) + b_conv5)

    with tf.name_scope('fc1'):
        # fully connected layer 1
        W_fc1 = weight_variable([1152, 1164], True)
        b_fc1 = bias_variable([1164])

        h_conv5_flat = tf.reshape(h_conv5, [-1, 1152])
        h_fc1 = tf.nn.relu(tf.matmul(h_conv5_flat, W_fc1) + b_fc1)

        h_fc1_drop = tf.nn.dropout(h_fc1, keep_prob)

    with tf.name_scope('fc2'):
        # fully connected layer 2
        W_fc2 = weight_variable([1164, 100], True)
        b_fc2 = bias_variable([100])

        h_fc2 = tf.nn.relu(tf.matmul(h_fc1_drop, W_fc2) + b_fc2)

        h_fc2_drop = tf.nn.dropout(h_fc2, keep_prob)

    with tf.name_scope('fc3'):
        # fully connected layer 3
        W_fc3 = weight_variable([100, 50], True)
        b_fc3 = bias_variable([50])

        h_fc3 = tf.nn.relu(tf.matmul(h_fc2_drop, W_fc3) + b_fc3)

        h_fc3_drop = tf.nn.dropout(h_fc3, keep_prob)

    with tf.name_scope('fc4'):
        # fully connected layer 4
        W_fc4 = weight_variable([50, 10], True)
        b_fc4 = bias_variable([10])

        h_fc4 = tf.nn.relu(tf.matmul(h_fc3_drop, W_fc4) + b_fc4)

        h_fc4_drop = tf.nn.dropout(h_fc4, keep_prob)

    with tf.name_scope('output'):
        # output
        W_fc5 = weight_variable([10, 1])
        b_fc5 = bias_variable([1])


        # y = tf.multiply(tf.atan(tf.matmul(h_fc4_drop, W_fc5) + b_fc5), 2)
        y = tf.matmul(h_fc4_drop, W_fc5) + b_fc5

    return x, y_, y, keep_prob

损失函数是:

regularizer = tf.contrib.layers.l2_regularizer(scale = 0.02)
reg_var = tf.get_collection('reg')
reg_term = tf.contrib.layers.apply_regularization(regularizer, reg_var)
loss = tf.reduce_mean(tf.square(y_actual - tf.transpose(y_predict)))
r_loss = loss + reg_term

我使用的keep_prob是0.6

有人可以告诉我我的模特有什么问题吗?

0 个答案:

没有答案