Tensorflow:如何实现Multilayered dynamic_rnn?

时间:2017-07-13 04:56:40

标签: python tensorflow

我构建了一个单层LSTM。它有效。

以下代码侧重于权重和偏差的定义以及RNN结构:

# Define weights
weights = {
    'in': tf.Variable(tf.random_normal([n_inputs, n_hidden_units])),
    'out': tf.Variable(tf.random_normal([n_hidden_units, n_classes]))
}
biases = {
    'in': tf.Variable(tf.constant(0.1, shape=[n_hidden_units, ])),
    'out': tf.Variable(tf.constant(0.1, shape=[n_classes, ]))
}


def RNN(X, weights, biases):

    X = tf.reshape(X, [-1, n_inputs])

    X_in = tf.matmul(X, weights['in']) + biases['in']
    X_in = tf.reshape(X_in, [-1, n_steps, n_hidden_units])

    lstm_cell = tf.contrib.rnn.BasicLSTMCell(n_hidden_units)

    init_state = lstm_cell.zero_state(batch_size_holder, dtype=tf.float32)

    outputs, final_state = tf.nn.dynamic_rnn(lstm_cell, X_in, initial_state=init_state, time_major=False)

    outputs = tf.unstack(tf.transpose(outputs, [1,0,2]))
    results = tf.matmul(outputs[-1], weights['out']) + biases['out']    

        return results

pred = RNN(x, weights, biases) # prediction

现在,我想再添加一层LSTM单元格。 我查看了Tensorflow官方网站上的示例。 https://www.tensorflow.org/tutorials/recurrent

但我很难弄清楚如何使用MultiRNNCell。我尝试使用与普通神经网络相同的逻辑,将第一层的输出乘以加偏置,然后发送到第二层。以下代码实现了这一点:

# Define weights
weights1 = {
    'in': tf.Variable(tf.random_normal([n_inputs, n_hidden_units])),
    'out': tf.Variable(tf.random_normal([n_hidden_units, n_hidden_units]))
}
biases1 = {
    'in': tf.Variable(tf.constant(0.1, shape=[n_hidden_units, ])),
    'out': tf.Variable(tf.constant(0.1, shape=[n_hidden_units, ]))
}

weights2 = {
    'in': tf.Variable(tf.random_normal([n_inputs, n_hidden_units])),
    'out': tf.Variable(tf.random_normal([n_hidden_units, n_classes]))
}
biases2 = {
    'in': tf.Variable(tf.constant(0.1, shape=[n_hidden_units, ])),
    'out': tf.Variable(tf.constant(0.1, shape=[n_classes, ]))
}



def RNN(X, weights1, biases1, weights2, biases2):

    X = tf.reshape(x, [-1, n_inputs])
    X_in = tf.matmul(X, weights1['in']) + biases1['in']
    X_in = tf.reshape(X_in, [-1, n_steps, n_hidden_units])

    lstm_cell1 = tf.contrib.rnn.BasicLSTMCell(n_hidden_units)
    lstm_cell2 = tf.contrib.rnn.BasicLSTMCell(n_hidden_units)

    init_state1 = lstm_cell1.zero_state(batch_size_holder, dtype=tf.float32)
    init_state2 = lstm_cell2.zero_state(batch_size_holder, dtype=tf.float32)

    outputs1, final_state1 = tf.nn.dynamic_rnn(lstm_cell1, X_in, initial_state=init_state1, time_major=False)

    outputs1 = tf.unstack(tf.transpose(outputs1, [1,0,2]))
    results1 = tf.matmul(outputs1[-1], weights1['out']) + biases1['out']    

    input = tf.matmul(results1, weights2['in']) + biases2['in']
    input = tf.reshape(input, [-1, n_steps, n_hidden_units])
    outputs2, final_state2 = tf.nn.dynamic_rnn(lstm_cell2, input, initial_state=init_state2, time_major=False)

    outputs2 = tf.unstack(tf.transpose(outputs2, [1,0,2]))
    results2 = tf.matmul(outputs2[-1], weights2['out']) + biases2['out']    


    return results2

我只是制作两层大小相同的lstm_cells,并调用dynamic_rnn两次。

我的第一个问题是,这段代码是否符合我的要求?

运行时,我收到了错误:

ValueError:变量rnn / basic_lstm_cell / weights已经存在,不允许。你的意思是在VarScope中设置reuse = True吗?

根据Tensorflow,(https://www.tensorflow.org/tutorials/recurrent) 这是一个版本问题,应该通过添加reuse=tf.get_variable_scope().reuse来解决 BasicLSTMCell()的参数。

但是,我的BasicLSTMCell()函数甚至没有“重用”参数。

你们知道如何让它发挥作用吗?任何建议和帮助表示赞赏。

完整代码如下:

import tensorflow as tf


lr = 0.005

n_inputs = 128
n_steps = 255
n_hidden_units = 128 
number_of_layers = 2
n_classes = 1      
batch_size = 100
gradient = 0.1

# tf Graph input
x = tf.placeholder(tf.float32, [None, n_steps, n_inputs])
y = tf.placeholder(tf.float32, [None, n_classes])
batch_size_holder = tf.placeholder(tf.int32, [], name='batch_size_holder')


# Define weights
weights = {
    'in': tf.Variable(tf.random_normal([n_inputs, n_hidden_units])),
    'out': tf.Variable(tf.random_normal([n_hidden_units, n_classes]))
}
biases = {
    'in': tf.Variable(tf.constant(0.1, shape=[n_hidden_units, ])),
    'out': tf.Variable(tf.constant(0.1, shape=[n_classes, ]))
}


def RNN(X, weights, biases):

    X = tf.reshape(X, [-1, n_inputs])

    X_in = tf.matmul(X, weights['in']) + biases['in']
    X_in = tf.reshape(X_in, [-1, n_steps, n_hidden_units])

    lstm_cell = tf.contrib.rnn.BasicLSTMCell(n_hidden_units)

    init_state = lstm_cell.zero_state(batch_size_holder, dtype=tf.float32)

    outputs, final_state = tf.nn.dynamic_rnn(lstm_cell, X_in, initial_state=init_state, time_major=False)

    outputs = tf.unstack(tf.transpose(outputs, [1,0,2]))
    results = tf.matmul(outputs[-1], weights['out']) + biases['out']    # shape = (128, 10)

    return results


pred = RNN(x, weights, biases)
cost = tf.reduce_mean(tf.square(pred-y))
optimizer = tf.train.AdamOptimizer(lr)
gvs = optimizer.compute_gradients(cost)
capped_gvs = [(tf.clip_by_value(grad, -gradient, gradient), var) for grad, var in gvs]
train_step = optimizer.apply_gradients(capped_gvs)

sess = tf.Session()

init = tf.global_variables_initializer()
sess.run(init)

mydata = data(batch = batch_size, s = 10000, per = 0.95)
step = 0
train_loss = []
test_loss = []
while mydata.hasNext():
    batch_xs, batch_ys = mydata.next()
    batch_xs = batch_xs.reshape([batch_size, n_steps, n_inputs])
    batch_ys = batch_ys.reshape([batch_size, 1])

    sess.run(train_step, feed_dict={
        x: batch_xs,
        y: batch_ys,
        batch_size_holder : 100
    })
    if step % 10 == 0:
        test_x, test_y = mydata.test()
        test_x = test_x.reshape([-1, n_steps, n_inputs])
        test_y = test_y.reshape([-1, 1])
        loss1 = sess.run(cost, feed_dict = {x : batch_xs, y: batch_ys, batch_size_holder : 100})
        loss2 = sess.run(cost, feed_dict = {x : test_x, y : test_y, batch_size_holder : 500})
        train_loss.append(loss1)
        test_loss.append(loss2)

        print("training cost: ", loss1)
        print("testing cost: ", loss2)
    step += 1

sess.close()
import matplotlib.pyplot as plt
plt.plot(train_loss)
plt.plot(test_loss)

-------更新---------

感谢vijay的回答,更新的代码如下:

请注意,在输出结果之前,网络有2个(n_layers)LSTM图层和1个密集图层。

import tensorflow as tf


lr = 0.01
n_inputs = 128
n_steps = 255
n_hidden_units = 200 
n_layers = 2
number_of_layers = 2
n_classes = 1
batch_size = 100
gradient = 0.5


# tf Graph input
x = tf.placeholder(tf.float32, [None, n_steps, n_inputs])
y = tf.placeholder(tf.float32, [None, n_classes])
batch_size_holder = tf.placeholder(tf.int32, [], name='batch_size_holder')


def lstm_cell():        
    return tf.contrib.rnn.BasicLSTMCell(n_hidden_units)

def RNN(X):

    lstm_stacked = tf.contrib.rnn.MultiRNNCell([lstm_cell() for _ in range(n_layers)]) 
    init_state = lstm_stacked.zero_state(batch_size_holder, dtype=tf.float32)
    outputs, final_state = tf.nn.dynamic_rnn(lstm_stacked, X, dtype=tf.float32)

    output = tf.layers.dense(outputs[:, -1, :], 1)

    return output


pred = RNN(x)
cost = tf.losses.mean_squared_error(y, pred)
optimizer = tf.train.AdamOptimizer(lr)
gvs = optimizer.compute_gradients(cost)
capped_gvs = [(tf.clip_by_value(grad, -gradient, gradient), var) for grad, var in gvs]
train_step = optimizer.apply_gradients(capped_gvs)

sess = tf.Session()

init = tf.global_variables_initializer()
sess.run(init)

mydata = data(batch = batch_size, s = 30000, per = 0.95)
step = 0
train_loss = []
test_loss = []
while mydata.hasNext():
    batch_xs, batch_ys = mydata.next()
    batch_xs = batch_xs.reshape([batch_size, n_steps, n_inputs])
    batch_ys = batch_ys.reshape([batch_size, 1])

    sess.run(train_step, feed_dict={
        x: batch_xs,
        y: batch_ys,
        batch_size_holder : batch_size
    })
    if step % 10 == 0:
        test_x, test_y = mydata.test()
        test_x = test_x.reshape([-1, n_steps, n_inputs])
        test_y = test_y.reshape([-1, 1])
        loss1 = sess.run(cost, feed_dict = {x : batch_xs, y: batch_ys, batch_size_holder : batch_size})
        loss2 = sess.run(cost, feed_dict = {x : test_x, y : test_y, batch_size_holder : 1500})
        train_loss.append(loss1)
        test_loss.append(loss2)

        print("training cost: ", loss1, "testing cost: ", loss2)

    step += 1

1 个答案:

答案 0 :(得分:2)

如果您想要multi-layer LSTM,可以使用tf.contrib.rnn.MultiRNNCell。所以对于两层:

n_layers = 2
lstm_stacked = tf.contrib.rnn.MultiRNNCell([lstm_cell() for _ in range(n_layers)]) 
outputs, final_state = tf.nn.dynamic_rnn(lstm_stacked, X_in, dtype=tf.float32)

def lstm_cell():        
    # Single RNN cell
    return tf.contrib.rnn.BasicLSTMCell(n_hidden_units)