我用tensorflow构建了一个nn。我使用辍学对其进行了正则化。训练后,我在验证集上对其进行了测试,得出的准确度为0.37。然后仅出于测试目的,我再次启用了辍学功能,并且我的验证准确性提高到0.87。 这怎么可能。使用辍学会使模型更糟。 这是我定义nn架构的代码:
with tf.name_scope('train_data'):
X = tf.placeholder(tf.float32, shape=(None, 28, 28, 1))
Y = tf.placeholder(tf.int32, shape=(None, 36))
train = tf.placeholder_with_default(False, shape=())
batch_size = tf.placeholder_with_default(1, shape=())
def layer(inp, k, l, train, padding='SAME'):
with tf.name_scope('layer'):
z = tf.layers.conv2d(inp, k, kernel_size=(3, 3), strides=(1, 1), padding=padding, kernel_initializer=tf.initializers.he_normal(), name='z'+l)
bn = tf.layers.batch_normalization(z, epsilon=1e-7, name='bn'+l)
a = tf.nn.leaky_relu(bn, 0.2)
d = tf.layers.dropout(a, rate=0.5, training=train, name='d'+l)
return (z, bn, a, d)
with tf.name_scope('network'):
d0 = tf.layers.dropout(X, rate=0.2, training=train, name='d0')
z1, bn1, a1, d1 = layer(d0, 32, '1', train)
p1 = tf.nn.max_pool(d1, ksize=(1, 2, 2, 1), strides=(1, 2, 2, 1), padding='VALID', name='p1')
z2, bn2, a2, d2 = layer(p1, 64, '2', train)
p2 = tf.nn.max_pool(d2, ksize=(1, 2, 2, 1), strides=(1, 2, 2, 1), padding='VALID', name='p2')
fc2 = tf.layers.flatten(p2, name='fc2')
enc = tf.layers.dense(fc2, 36, name='fc3')
with tf.name_scope('encodings'):
target = tf.slice(enc, [0, 0], [batch_size, -1])
for i in range(36):
globals()['enc'+str(i)] = tf.slice(enc, [(1+i)*batch_size, 0], [batch_size, -1])
with tf.name_scope('prediction'):
logits = tf.concat([-tf.sqrt(tf.reduce_mean((globals()['enc'+str(i)] - target)**2, axis=1, keepdims=True)) for i in range(36)], 1)
out = tf.nn.softmax(logits)
with tf.name_scope('cost'):
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(labels=Y, logits=logits))
然后,我使用ADAM优化器对模型进行了训练,并在训练过程中将训练设为True。
_, loss, acc = sess.run([trainig_op, cost, accuracy], feed_dict={X: x_batch, Y: y_batch, train: True, batch_size: bs})
最后为了评估,我在验证集中计算了acc并将火车参数设置为False。
loss, acc = sess.run([cost, accuracy], feed_dict={X: x_batch, Y: y_batch, batch_size: bs})