我尝试使用tf.layers.batch_normalization在完全连接的网络中使用批处理规范化。我创建了一个布尔占位符以指示训练模式。在每次训练之后使用training=False
将验证数据输入模型中时,验证准确性保持在“随机选择”级别。它似乎没有使用经过训练的模型权重。我相信我正确使用了tf.GraphKeys.UPDATE_OPS
。代码显示在这里。
当我在验证时间内更改training=True
时,结果更有意义。但这从根本上是不正确的。我的代码中可能存在一些逻辑错误,请提供帮助。谢谢!
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2)
def net(X,train_mode):
fc1 = tf.layers.dense(inputs=X, units=100,activation = None)
fc1 = tf.layers.batch_normalization(inputs=fc1, training=train_mode)
fc1 = tf.nn.relu(fc1)
#fc1 = tf.layers.dropout(inputs=fc1, rate=0.2,training=True)
fc2=tf.layers.dense(inputs=fc1, units=80, activation = None)
fc2 = tf.layers.batch_normalization(inputs=fc2, training=train_mode)
fc2 = tf.nn.relu(fc2)
fc2 = tf.layers.dropout(inputs=fc2, rate=0.4,training=train_mode)
fc3=tf.layers.dense(inputs=fc2, units=60, activation = None)
fc3 = tf.layers.batch_normalization(inputs=fc3,training=train_mode)
fc3 = tf.nn.relu(fc3)
fc3 = tf.layers.dropout(inputs=fc3, rate=0.4,training=train_mode)
output = tf.layers.dense(inputs=fc3, units=2,activation=None)
return output
def training(X_train,Y_train,X_test,Y_test):
update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
with tf.name_scope('input'):
X = tf.placeholder(tf.float32, shape=[None, 3])
y = tf.placeholder(tf.int32, shape=[None,])
train_mode = tf.placeholder(tf.bool)
with tf.name_scope('loss'):
output = net(X,train_mode)
xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=y, logits=output)
loss=tf.reduce_mean(xentropy)
with tf.name_scope('optimization'):
with tf.control_dependencies(update_ops):
optimizer = tf.train.AdamOptimizer(0.01).minimize(loss)
with tf.name_scope('accuracy'):
correct = tf.nn.in_top_k(output, y, 1)
acc = tf.reduce_mean(tf.cast(correct,tf.float32))
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
numIter = int(len(X_train)/batchSize)
for epoch in range(NumEpoch):
trainacc = 0
for itr in range(numIter):
x_batch = X_train[itr*batchSize:(itr+1)*batchSize]
y_batch = Y_train[itr*batchSize:(itr+1)*batchSize]
_optimizer, _acc = sess.run([optimizer,acc], feed_dict = {X:x_batch,y:y_batch, train_mode:True})
trainacc+=_acc
acc_train=trainacc/numIter
val_acc = sess.run(acc, feed_dict = {X:X_test, y:Y_test, train_mode:False})
print('Epoch: {}, train_acc {:.4f}, val_acc {:.4f}'.format(epoch +1, acc_train, val_acc))
if __name__ == "__main__":
training(X_train,Y_train,X_test,Y_test)