Question

有没有人有一个使用tf.train.AdadeltaOptimizer且效果很好的代码示例？

我有一个TF图，最初是用tf.train.AdamOptimizer设置的，并且效果很好。当我用AdadeltaOptimizer替换它时，使用默认参数，它会产生糟糕的结果。

我使用了Cuda 7.5。

Answer 1

以下是与“AdadeltaOptimizer”优化器配合使用的示例代码。它适用于'亚当'。 Adam对“学习率”和“Adadelta”不敏感的唯一区别是敏感。我建议您阅读有关优化算法的更多信息（如here）。在您自己的示例中，只是尝试将“学习率”更改为更小或更大（它被命名为“超参数优化”）。

注意：根据我的经验，'Adam'是RNN的一个非常好的优化器，比'AdaDelta'更好（使用示例代码，'Adam'可以更快地获得更好的分数）。另一方面，对于CNN，SGD + Momentum效果最佳。

代码，使用Bi-LSTM学习MNIST分类：

# Mnist classification using Bi-LSTM
import tensorflow as tf
from tensorflow.examples.tutorials.mnist import input_data
import numpy as np

mnist = input_data.read_data_sets("MNIST_data", one_hot=True)
learning_rate   = 0.01
training_epochs = 100
batch_size  = 64
seq_length  = 28
heigh_image = 28
hidden_size = 128
class_numer = 10
input   = tf.placeholder(tf.float32, [None, None, heigh_image])
target  = tf.placeholder(tf.float32, [None, class_numer])
seq_len = tf.placeholder(tf.int32, [None])

def fulconn_layer(input_data, output_dim, activation_func=None):
    input_dim = int(input_data.get_shape()[1])
    W = tf.Variable(tf.random_normal([input_dim, output_dim]))
    b = tf.Variable(tf.random_normal([output_dim]))
    if activation_func:
        return activation_func(tf.matmul(input_data, W) + b)
    else:
        return tf.matmul(input_data, W) + b       

with tf.name_scope("BiLSTM"):
  with tf.variable_scope('forward'):
    lstm_fw_cell = tf.nn.rnn_cell.LSTMCell(hidden_size, forget_bias=1.0, state_is_tuple=True)
  with tf.variable_scope('backward'):
    lstm_bw_cell = tf.nn.rnn_cell.LSTMCell(hidden_size, forget_bias=1.0, state_is_tuple=True)
  outputs, states = tf.nn.bidirectional_dynamic_rnn(cell_fw=lstm_fw_cell, cell_bw=lstm_bw_cell, inputs=input,sequence_length=seq_len, dtype=tf.float32, scope="BiLSTM")  

# As we have Bi-LSTM, we have two output, which are not connected. So merge them
outputs = tf.concat(2, outputs)
# As we want do classification, we only need the last output from LSTM.
last_output = outputs[:,0,:]
# Create the final classification layer
yhat = fulconn_layer(last_output, class_numer)

cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(yhat, target))
optimizer = tf.train.AdadeltaOptimizer(learning_rate=learning_rate).minimize(cost) # AdamOptimizer
accuracy = tf.reduce_mean(tf.cast(tf.equal(tf.argmax(target, 1), tf.argmax(yhat, 1)), tf.float32))
gpu_opts = tf.GPUOptions(per_process_gpu_memory_fraction=0.3)
with tf.Session(config=tf.ConfigProto(gpu_options=gpu_opts)) as session:
  session.run(tf.initialize_all_variables())
  print ("Start Learing")
  for epoch in range(training_epochs):
      for i in range(int(mnist.train.num_examples/batch_size)):
          x_batch, y_batch = mnist.train.next_batch(batch_size)
          x_batch = x_batch.reshape([batch_size, seq_length, heigh_image])
          train_seq_len = np.ones(batch_size) * seq_length
          session.run([optimizer], feed_dict={input: x_batch, target: y_batch, seq_len: train_seq_len})

      train_accuracy = session.run(accuracy, feed_dict={input: x_batch, target: y_batch, seq_len: train_seq_len})
      x_test = mnist.test.images.reshape([-1, seq_length, heigh_image])
      y_test = mnist.test.labels
      test_seq_len = np.ones(x_test.shape[0]) * seq_length
      test_accuracy = session.run(accuracy, feed_dict={input: x_test, target: y_test, seq_len: test_seq_len})
      print("epoch: %d, train_accuracy: %3f, test_accuracy: %3f" % (epoch, train_accuracy, test_accuracy))

AdadeltaOptimizer示例代码

1 个答案: