我使用tensorflow构建了一个cnn模型(回归)。但它在列车组中表现良好,但在验证/测试集中表现不佳。下面是损失曲线,红线是火车组的损失,蓝线是验证集:
这是否过度拟合?我尝试在其中使用规则化和退出,结果不会改变。
这是我的模型和损失函数:
def weight_variable(shape, reg = False):
initial = tf.truncated_normal(shape, stddev=0.1)
#initial = tf.random_uniform(shape)
w = tf.Variable(initial)
if reg:
tf.add_to_collection('reg', w)
return w
def bias_variable(shape):
initial = tf.constant(0.1, shape=shape)
return tf.Variable(initial)
def conv2d(x, W, stride):
return tf.nn.conv2d(x, W, strides=[1, stride, stride, 1], padding='VALID')
def cnn_steer():
with tf.name_scope('input'):
x = tf.placeholder(tf.float32, shape=[None, 66, 200, 3])
y_ = tf.placeholder(tf.float32, shape=[None])
with tf.name_scope('keep_prob'):
keep_prob = tf.placeholder(tf.float32)
x_image = x
with tf.name_scope('conv1'):
# first convolutional layer
W_conv1 = weight_variable([5, 5, 3, 24])
b_conv1 = bias_variable([24])
h_conv1 = tf.nn.relu(conv2d(x_image, W_conv1, 2) + b_conv1)
with tf.name_scope('conv2'):
# second convolutional layer
W_conv2 = weight_variable([5, 5, 24, 36])
b_conv2 = bias_variable([36])
h_conv2 = tf.nn.relu(conv2d(h_conv1, W_conv2, 2) + b_conv2)
with tf.name_scope('conv3'):
# third convolutional layer
W_conv3 = weight_variable([5, 5, 36, 48])
b_conv3 = bias_variable([48])
h_conv3 = tf.nn.relu(conv2d(h_conv2, W_conv3, 2) + b_conv3)
with tf.name_scope('conv4'):
# fourth convolutional layer
W_conv4 = weight_variable([3, 3, 48, 64])
b_conv4 = bias_variable([64])
h_conv4 = tf.nn.relu(conv2d(h_conv3, W_conv4, 1) + b_conv4)
with tf.name_scope('conv5'):
# fifth convolutional layer
W_conv5 = weight_variable([3, 3, 64, 64])
b_conv5 = bias_variable([64])
h_conv5 = tf.nn.relu(conv2d(h_conv4, W_conv5, 1) + b_conv5)
with tf.name_scope('fc1'):
# fully connected layer 1
W_fc1 = weight_variable([1152, 1164], True)
b_fc1 = bias_variable([1164])
h_conv5_flat = tf.reshape(h_conv5, [-1, 1152])
h_fc1 = tf.nn.relu(tf.matmul(h_conv5_flat, W_fc1) + b_fc1)
h_fc1_drop = tf.nn.dropout(h_fc1, keep_prob)
with tf.name_scope('fc2'):
# fully connected layer 2
W_fc2 = weight_variable([1164, 100], True)
b_fc2 = bias_variable([100])
h_fc2 = tf.nn.relu(tf.matmul(h_fc1_drop, W_fc2) + b_fc2)
h_fc2_drop = tf.nn.dropout(h_fc2, keep_prob)
with tf.name_scope('fc3'):
# fully connected layer 3
W_fc3 = weight_variable([100, 50], True)
b_fc3 = bias_variable([50])
h_fc3 = tf.nn.relu(tf.matmul(h_fc2_drop, W_fc3) + b_fc3)
h_fc3_drop = tf.nn.dropout(h_fc3, keep_prob)
with tf.name_scope('fc4'):
# fully connected layer 4
W_fc4 = weight_variable([50, 10], True)
b_fc4 = bias_variable([10])
h_fc4 = tf.nn.relu(tf.matmul(h_fc3_drop, W_fc4) + b_fc4)
h_fc4_drop = tf.nn.dropout(h_fc4, keep_prob)
with tf.name_scope('output'):
# output
W_fc5 = weight_variable([10, 1])
b_fc5 = bias_variable([1])
# y = tf.multiply(tf.atan(tf.matmul(h_fc4_drop, W_fc5) + b_fc5), 2)
y = tf.matmul(h_fc4_drop, W_fc5) + b_fc5
return x, y_, y, keep_prob
损失函数是:
regularizer = tf.contrib.layers.l2_regularizer(scale = 0.02)
reg_var = tf.get_collection('reg')
reg_term = tf.contrib.layers.apply_regularization(regularizer, reg_var)
loss = tf.reduce_mean(tf.square(y_actual - tf.transpose(y_predict)))
r_loss = loss + reg_term
我使用的keep_prob是0.6
有人可以告诉我我的模特有什么问题吗?