AdadeltaOptimizer示例代码

时间:2016-10-27 11:36:12

标签: python tensorflow deep-learning

有没有人有一个使用tf.train.AdadeltaOptimizer且效果很好的代码示例?

我有一个TF图,最初是用tf.train.AdamOptimizer设置的,并且效果很好。当我用AdadeltaOptimizer替换它时,使用默认参数,它会产生糟糕的结果。

我使用了Cuda 7.5。

1 个答案:

答案 0 :(得分:0)

以下是与“AdadeltaOptimizer”优化器配合使用的示例代码。它适用于'亚当'。 Adam对“学习率”和“Adadelta”不敏感的唯一区别是敏感。 我建议您阅读有关优化算法的更多信息(如here)。 在您自己的示例中,只是尝试将“学习率”更改为更小或更大(它被命名为“超参数优化”)。

注意: 根据我的经验,'Adam'是RNN的一个非常好的优化器,比'AdaDelta'更好(使用示例代码,'Adam'可以更快地获得更好的分数)。另一方面,对于CNN,SGD + Momentum效果最佳。

代码,使用Bi-LSTM学习MNIST分类:

# Mnist classification using Bi-LSTM
import tensorflow as tf
from tensorflow.examples.tutorials.mnist import input_data
import numpy as np

mnist = input_data.read_data_sets("MNIST_data", one_hot=True)
learning_rate   = 0.01
training_epochs = 100
batch_size  = 64
seq_length  = 28
heigh_image = 28
hidden_size = 128
class_numer = 10
input   = tf.placeholder(tf.float32, [None, None, heigh_image])
target  = tf.placeholder(tf.float32, [None, class_numer])
seq_len = tf.placeholder(tf.int32, [None])

def fulconn_layer(input_data, output_dim, activation_func=None):
    input_dim = int(input_data.get_shape()[1])
    W = tf.Variable(tf.random_normal([input_dim, output_dim]))
    b = tf.Variable(tf.random_normal([output_dim]))
    if activation_func:
        return activation_func(tf.matmul(input_data, W) + b)
    else:
        return tf.matmul(input_data, W) + b       

with tf.name_scope("BiLSTM"):
  with tf.variable_scope('forward'):
    lstm_fw_cell = tf.nn.rnn_cell.LSTMCell(hidden_size, forget_bias=1.0, state_is_tuple=True)
  with tf.variable_scope('backward'):
    lstm_bw_cell = tf.nn.rnn_cell.LSTMCell(hidden_size, forget_bias=1.0, state_is_tuple=True)
  outputs, states = tf.nn.bidirectional_dynamic_rnn(cell_fw=lstm_fw_cell, cell_bw=lstm_bw_cell, inputs=input,sequence_length=seq_len, dtype=tf.float32, scope="BiLSTM")  

# As we have Bi-LSTM, we have two output, which are not connected. So merge them
outputs = tf.concat(2, outputs)
# As we want do classification, we only need the last output from LSTM.
last_output = outputs[:,0,:]
# Create the final classification layer
yhat = fulconn_layer(last_output, class_numer)

cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(yhat, target))
optimizer = tf.train.AdadeltaOptimizer(learning_rate=learning_rate).minimize(cost) # AdamOptimizer
accuracy = tf.reduce_mean(tf.cast(tf.equal(tf.argmax(target, 1), tf.argmax(yhat, 1)), tf.float32))
gpu_opts = tf.GPUOptions(per_process_gpu_memory_fraction=0.3)
with tf.Session(config=tf.ConfigProto(gpu_options=gpu_opts)) as session:
  session.run(tf.initialize_all_variables())
  print ("Start Learing")
  for epoch in range(training_epochs):
      for i in range(int(mnist.train.num_examples/batch_size)):
          x_batch, y_batch = mnist.train.next_batch(batch_size)
          x_batch = x_batch.reshape([batch_size, seq_length, heigh_image])
          train_seq_len = np.ones(batch_size) * seq_length
          session.run([optimizer], feed_dict={input: x_batch, target: y_batch, seq_len: train_seq_len})

      train_accuracy = session.run(accuracy, feed_dict={input: x_batch, target: y_batch, seq_len: train_seq_len})
      x_test = mnist.test.images.reshape([-1, seq_length, heigh_image])
      y_test = mnist.test.labels
      test_seq_len = np.ones(x_test.shape[0]) * seq_length
      test_accuracy = session.run(accuracy, feed_dict={input: x_test, target: y_test, seq_len: test_seq_len})
      print("epoch: %d, train_accuracy: %3f, test_accuracy: %3f" % (epoch, train_accuracy, test_accuracy))