有没有人有一个使用tf.train.AdadeltaOptimizer且效果很好的代码示例?
我有一个TF图,最初是用tf.train.AdamOptimizer设置的,并且效果很好。当我用AdadeltaOptimizer替换它时,使用默认参数,它会产生糟糕的结果。
我使用了Cuda 7.5。
答案 0 :(得分:0)
以下是与“AdadeltaOptimizer”优化器配合使用的示例代码。它适用于'亚当'。 Adam对“学习率”和“Adadelta”不敏感的唯一区别是敏感。 我建议您阅读有关优化算法的更多信息(如here)。 在您自己的示例中,只是尝试将“学习率”更改为更小或更大(它被命名为“超参数优化”)。
注意: 根据我的经验,'Adam'是RNN的一个非常好的优化器,比'AdaDelta'更好(使用示例代码,'Adam'可以更快地获得更好的分数)。另一方面,对于CNN,SGD + Momentum效果最佳。
代码,使用Bi-LSTM学习MNIST分类:
# Mnist classification using Bi-LSTM
import tensorflow as tf
from tensorflow.examples.tutorials.mnist import input_data
import numpy as np
mnist = input_data.read_data_sets("MNIST_data", one_hot=True)
learning_rate = 0.01
training_epochs = 100
batch_size = 64
seq_length = 28
heigh_image = 28
hidden_size = 128
class_numer = 10
input = tf.placeholder(tf.float32, [None, None, heigh_image])
target = tf.placeholder(tf.float32, [None, class_numer])
seq_len = tf.placeholder(tf.int32, [None])
def fulconn_layer(input_data, output_dim, activation_func=None):
input_dim = int(input_data.get_shape()[1])
W = tf.Variable(tf.random_normal([input_dim, output_dim]))
b = tf.Variable(tf.random_normal([output_dim]))
if activation_func:
return activation_func(tf.matmul(input_data, W) + b)
else:
return tf.matmul(input_data, W) + b
with tf.name_scope("BiLSTM"):
with tf.variable_scope('forward'):
lstm_fw_cell = tf.nn.rnn_cell.LSTMCell(hidden_size, forget_bias=1.0, state_is_tuple=True)
with tf.variable_scope('backward'):
lstm_bw_cell = tf.nn.rnn_cell.LSTMCell(hidden_size, forget_bias=1.0, state_is_tuple=True)
outputs, states = tf.nn.bidirectional_dynamic_rnn(cell_fw=lstm_fw_cell, cell_bw=lstm_bw_cell, inputs=input,sequence_length=seq_len, dtype=tf.float32, scope="BiLSTM")
# As we have Bi-LSTM, we have two output, which are not connected. So merge them
outputs = tf.concat(2, outputs)
# As we want do classification, we only need the last output from LSTM.
last_output = outputs[:,0,:]
# Create the final classification layer
yhat = fulconn_layer(last_output, class_numer)
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(yhat, target))
optimizer = tf.train.AdadeltaOptimizer(learning_rate=learning_rate).minimize(cost) # AdamOptimizer
accuracy = tf.reduce_mean(tf.cast(tf.equal(tf.argmax(target, 1), tf.argmax(yhat, 1)), tf.float32))
gpu_opts = tf.GPUOptions(per_process_gpu_memory_fraction=0.3)
with tf.Session(config=tf.ConfigProto(gpu_options=gpu_opts)) as session:
session.run(tf.initialize_all_variables())
print ("Start Learing")
for epoch in range(training_epochs):
for i in range(int(mnist.train.num_examples/batch_size)):
x_batch, y_batch = mnist.train.next_batch(batch_size)
x_batch = x_batch.reshape([batch_size, seq_length, heigh_image])
train_seq_len = np.ones(batch_size) * seq_length
session.run([optimizer], feed_dict={input: x_batch, target: y_batch, seq_len: train_seq_len})
train_accuracy = session.run(accuracy, feed_dict={input: x_batch, target: y_batch, seq_len: train_seq_len})
x_test = mnist.test.images.reshape([-1, seq_length, heigh_image])
y_test = mnist.test.labels
test_seq_len = np.ones(x_test.shape[0]) * seq_length
test_accuracy = session.run(accuracy, feed_dict={input: x_test, target: y_test, seq_len: test_seq_len})
print("epoch: %d, train_accuracy: %3f, test_accuracy: %3f" % (epoch, train_accuracy, test_accuracy))