对于大量训练样本(100.000),我的卷积神经网络的准确度降低而不是增加。对于较少数量的训练样本(6.000),准确度会增加到一个点,然后开始减少。
示例:
nr_training_examples 100000
tb 2500
epoch 0 loss 0.19646 acc 18.52
nr_test_examples 5000
Accuract test set 0.00
nr_training_examples 100000
tb 2500
epoch 1 loss 0.20000 acc 0.00
nr_test_examples 5000
Accuract test set 0.00
nr_training_examples 100000
tb 2500
我做错了什么?
我使用面部照片作为训练样本(70 x 70像素)。
网络的灵感来自VGG模型:
2 x cov-3
max_pooling
2 x conv-3
max_pooling
2 X conv-3
1 X conv-1
max_pooling
2 X conv-3
1 X conv-1
max_pooling
fully_connected 1024
fully_connected 1024 - output 128
以下是模特:
def siamese_convnet(x):
global keep_rate
#reshape input
w_conv1_1 = tf.get_variable(name='w_conv1_1', initializer=tf.random_normal([3, 3, 1, 64]))
w_conv1_2 = tf.get_variable(name='w_conv1_2', initializer=tf.random_normal([3, 3, 64, 64]))
w_conv2_1 = tf.get_variable(name='w_conv2_1', initializer=tf.random_normal([3, 3, 64, 128]))
w_conv2_2 = tf.get_variable(name='w_conv2_2', initializer=tf.random_normal([3, 3, 128, 128]))
w_conv3_1 = tf.get_variable(name='w_conv3_1', initializer=tf.random_normal([3, 3, 128, 256]))
w_conv3_2 = tf.get_variable(name='w_conv3_2', initializer=tf.random_normal([3, 3, 256, 256]))
w_conv3_3 = tf.get_variable(name='w_conv3_3', initializer=tf.random_normal([1, 1, 256, 256]))
w_conv4_1 = tf.get_variable(name='w_conv4_1', initializer=tf.random_normal([3, 3, 256, 512]))
w_conv4_2 = tf.get_variable(name='w_conv4_2', initializer=tf.random_normal([3, 3, 512, 512]))
w_conv4_3 = tf.get_variable(name='w_conv4_3', initializer=tf.random_normal([1, 1, 512, 512]))
w_conv5_1 = tf.get_variable(name='w_conv5_1', initializer=tf.random_normal([3, 3, 512, 512]))
w_conv5_2 = tf.get_variable(name='w_conv5_2', initializer=tf.random_normal([3, 3, 512, 512]))
w_conv5_3 = tf.get_variable(name='w_conv5_3', initializer=tf.random_normal([1, 1, 512, 512]))
w_fc_1 = tf.get_variable(name='fc_1', initializer=tf.random_normal([2*2*512, 1024]))
w_fc_2 = tf.get_variable(name='fc_2', initializer=tf.random_normal([1024, 1024]))
fc_layer = tf.get_variable(name='fc_layer', initializer=tf.random_normal([1024, 1024]))
w_out = tf.get_variable(name='w_out', initializer=tf.random_normal([1024, 128]))
bias_conv1_1 = tf.get_variable(name='bias_conv1_1', initializer=tf.random_normal([64]))
bias_conv1_2 = tf.get_variable(name='bias_conv1_2', initializer=tf.random_normal([64]))
bias_conv2_1 = tf.get_variable(name='bias_conv2_1', initializer=tf.random_normal([128]))
bias_conv2_2 = tf.get_variable(name='bias_conv2_2', initializer=tf.random_normal([128]))
bias_conv3_1 = tf.get_variable(name='bias_conv3_1', initializer=tf.random_normal([256]))
bias_conv3_2 = tf.get_variable(name='bias_conv3_2', initializer=tf.random_normal([256]))
bias_conv3_3 = tf.get_variable(name='bias_conv3_3', initializer=tf.random_normal([256]))
bias_conv4_1 = tf.get_variable(name='bias_conv4_1', initializer=tf.random_normal([512]))
bias_conv4_2 = tf.get_variable(name='bias_conv4_2', initializer=tf.random_normal([512]))
bias_conv4_3 = tf.get_variable(name='bias_conv4_3', initializer=tf.random_normal([512]))
bias_conv5_1 = tf.get_variable(name='bias_conv5_1', initializer=tf.random_normal([512]))
bias_conv5_2 = tf.get_variable(name='bias_conv5_2', initializer=tf.random_normal([512]))
bias_conv5_3 = tf.get_variable(name='bias_conv5_3', initializer=tf.random_normal([512]))
bias_fc_1 = tf.get_variable(name='bias_fc_1', initializer=tf.random_normal([1024]))
bias_fc_2 = tf.get_variable(name='bias_fc_2', initializer=tf.random_normal([1024]))
bias_fc = tf.get_variable(name='bias_fc', initializer=tf.random_normal([1024]))
out = tf.get_variable(name='out', initializer=tf.random_normal([128]))
x = tf.reshape(x , [-1, 70, 70, 1]);
conv1_1 = tf.nn.relu(conv2d(x, w_conv1_1) + bias_conv1_1);
conv1_2= tf.nn.relu(conv2d(conv1_1, w_conv1_2) + bias_conv1_2);
max_pool1 = max_pool(conv1_2);
conv2_1 = tf.nn.relu( conv2d(max_pool1, w_conv2_1) + bias_conv2_1 );
conv2_2 = tf.nn.relu( conv2d(conv2_1, w_conv2_2) + bias_conv2_2 );
max_pool2 = max_pool(conv2_2)
conv3_1 = tf.nn.relu( conv2d(max_pool2, w_conv3_1) + bias_conv3_1 );
conv3_2 = tf.nn.relu( conv2d(conv3_1, w_conv3_2) + bias_conv3_2 );
conv3_3 = tf.nn.relu( conv2d(conv3_2, w_conv3_3) + bias_conv3_3 );
max_pool3 = max_pool(conv3_3)
conv4_1 = tf.nn.relu( conv2d(max_pool3, w_conv4_1) + bias_conv4_1 );
conv4_2 = tf.nn.relu( conv2d(conv4_1, w_conv4_2) + bias_conv4_2 );
conv4_3 = tf.nn.relu( conv2d(conv4_2, w_conv4_3) + bias_conv4_3 );
max_pool4 = max_pool(conv4_3)
conv5_1 = tf.nn.relu( conv2d(max_pool4, w_conv5_1) + bias_conv5_1 );
conv5_2 = tf.nn.relu( conv2d(conv5_1, w_conv5_2) + bias_conv5_2 );
conv5_3 = tf.nn.relu( conv2d(conv5_2, w_conv5_3) + bias_conv5_3 );
max_pool5 = max_pool(conv5_3)
fc_helper = tf.reshape(max_pool4, [-1, 2*2*512]);
fc_1 = tf.nn.relu( tf.matmul(fc_helper, w_fc_1) + bias_fc_1 );
#fc_2 = tf.nn.relu( tf.matmul(fc_1, w_fc_2) + bias_fc_1 );
fc = tf.nn.relu( tf.matmul(fc_1, fc_layer) + bias_fc );
output = tf.matmul(fc, w_out) + out
output = tf.nn.l2_normalize(output, 0)
return output
答案 0 :(得分:2)
准确度增加到一个点,然后开始减少。
这表明你的NN过度拟合了。如果您仍然怀疑,请尝试检查您的成本函数结果,如果它在某个时刻增加,我确定它过度拟合。
有许多常见的解决方案可以解决过度拟合问题:
您可以在此处获取有关上述解决方案的详细信息:
http://neuralnetworksanddeeplearning.com/chap3.html#overfitting_and_regularization
答案 1 :(得分:1)
您的网络可能过度拟合。尝试将dropout(保持概率大约为0.5)添加到完全连接的图层。