我构建了一个单层LSTM。它有效。
以下代码侧重于权重和偏差的定义以及RNN结构:
# Define weights
weights = {
'in': tf.Variable(tf.random_normal([n_inputs, n_hidden_units])),
'out': tf.Variable(tf.random_normal([n_hidden_units, n_classes]))
}
biases = {
'in': tf.Variable(tf.constant(0.1, shape=[n_hidden_units, ])),
'out': tf.Variable(tf.constant(0.1, shape=[n_classes, ]))
}
def RNN(X, weights, biases):
X = tf.reshape(X, [-1, n_inputs])
X_in = tf.matmul(X, weights['in']) + biases['in']
X_in = tf.reshape(X_in, [-1, n_steps, n_hidden_units])
lstm_cell = tf.contrib.rnn.BasicLSTMCell(n_hidden_units)
init_state = lstm_cell.zero_state(batch_size_holder, dtype=tf.float32)
outputs, final_state = tf.nn.dynamic_rnn(lstm_cell, X_in, initial_state=init_state, time_major=False)
outputs = tf.unstack(tf.transpose(outputs, [1,0,2]))
results = tf.matmul(outputs[-1], weights['out']) + biases['out']
return results
pred = RNN(x, weights, biases) # prediction
现在,我想再添加一层LSTM单元格。 我查看了Tensorflow官方网站上的示例。 https://www.tensorflow.org/tutorials/recurrent
但我很难弄清楚如何使用MultiRNNCell。我尝试使用与普通神经网络相同的逻辑,将第一层的输出乘以加偏置,然后发送到第二层。以下代码实现了这一点:
# Define weights
weights1 = {
'in': tf.Variable(tf.random_normal([n_inputs, n_hidden_units])),
'out': tf.Variable(tf.random_normal([n_hidden_units, n_hidden_units]))
}
biases1 = {
'in': tf.Variable(tf.constant(0.1, shape=[n_hidden_units, ])),
'out': tf.Variable(tf.constant(0.1, shape=[n_hidden_units, ]))
}
weights2 = {
'in': tf.Variable(tf.random_normal([n_inputs, n_hidden_units])),
'out': tf.Variable(tf.random_normal([n_hidden_units, n_classes]))
}
biases2 = {
'in': tf.Variable(tf.constant(0.1, shape=[n_hidden_units, ])),
'out': tf.Variable(tf.constant(0.1, shape=[n_classes, ]))
}
def RNN(X, weights1, biases1, weights2, biases2):
X = tf.reshape(x, [-1, n_inputs])
X_in = tf.matmul(X, weights1['in']) + biases1['in']
X_in = tf.reshape(X_in, [-1, n_steps, n_hidden_units])
lstm_cell1 = tf.contrib.rnn.BasicLSTMCell(n_hidden_units)
lstm_cell2 = tf.contrib.rnn.BasicLSTMCell(n_hidden_units)
init_state1 = lstm_cell1.zero_state(batch_size_holder, dtype=tf.float32)
init_state2 = lstm_cell2.zero_state(batch_size_holder, dtype=tf.float32)
outputs1, final_state1 = tf.nn.dynamic_rnn(lstm_cell1, X_in, initial_state=init_state1, time_major=False)
outputs1 = tf.unstack(tf.transpose(outputs1, [1,0,2]))
results1 = tf.matmul(outputs1[-1], weights1['out']) + biases1['out']
input = tf.matmul(results1, weights2['in']) + biases2['in']
input = tf.reshape(input, [-1, n_steps, n_hidden_units])
outputs2, final_state2 = tf.nn.dynamic_rnn(lstm_cell2, input, initial_state=init_state2, time_major=False)
outputs2 = tf.unstack(tf.transpose(outputs2, [1,0,2]))
results2 = tf.matmul(outputs2[-1], weights2['out']) + biases2['out']
return results2
我只是制作两层大小相同的lstm_cells,并调用dynamic_rnn两次。
我的第一个问题是,这段代码是否符合我的要求?
运行时,我收到了错误:
ValueError:变量rnn / basic_lstm_cell / weights已经存在,不允许。你的意思是在VarScope中设置reuse = True吗?
根据Tensorflow,(https://www.tensorflow.org/tutorials/recurrent)
这是一个版本问题,应该通过添加reuse=tf.get_variable_scope().reuse
来解决
BasicLSTMCell()的参数。
但是,我的BasicLSTMCell()函数甚至没有“重用”参数。
你们知道如何让它发挥作用吗?任何建议和帮助表示赞赏。
完整代码如下:
import tensorflow as tf
lr = 0.005
n_inputs = 128
n_steps = 255
n_hidden_units = 128
number_of_layers = 2
n_classes = 1
batch_size = 100
gradient = 0.1
# tf Graph input
x = tf.placeholder(tf.float32, [None, n_steps, n_inputs])
y = tf.placeholder(tf.float32, [None, n_classes])
batch_size_holder = tf.placeholder(tf.int32, [], name='batch_size_holder')
# Define weights
weights = {
'in': tf.Variable(tf.random_normal([n_inputs, n_hidden_units])),
'out': tf.Variable(tf.random_normal([n_hidden_units, n_classes]))
}
biases = {
'in': tf.Variable(tf.constant(0.1, shape=[n_hidden_units, ])),
'out': tf.Variable(tf.constant(0.1, shape=[n_classes, ]))
}
def RNN(X, weights, biases):
X = tf.reshape(X, [-1, n_inputs])
X_in = tf.matmul(X, weights['in']) + biases['in']
X_in = tf.reshape(X_in, [-1, n_steps, n_hidden_units])
lstm_cell = tf.contrib.rnn.BasicLSTMCell(n_hidden_units)
init_state = lstm_cell.zero_state(batch_size_holder, dtype=tf.float32)
outputs, final_state = tf.nn.dynamic_rnn(lstm_cell, X_in, initial_state=init_state, time_major=False)
outputs = tf.unstack(tf.transpose(outputs, [1,0,2]))
results = tf.matmul(outputs[-1], weights['out']) + biases['out'] # shape = (128, 10)
return results
pred = RNN(x, weights, biases)
cost = tf.reduce_mean(tf.square(pred-y))
optimizer = tf.train.AdamOptimizer(lr)
gvs = optimizer.compute_gradients(cost)
capped_gvs = [(tf.clip_by_value(grad, -gradient, gradient), var) for grad, var in gvs]
train_step = optimizer.apply_gradients(capped_gvs)
sess = tf.Session()
init = tf.global_variables_initializer()
sess.run(init)
mydata = data(batch = batch_size, s = 10000, per = 0.95)
step = 0
train_loss = []
test_loss = []
while mydata.hasNext():
batch_xs, batch_ys = mydata.next()
batch_xs = batch_xs.reshape([batch_size, n_steps, n_inputs])
batch_ys = batch_ys.reshape([batch_size, 1])
sess.run(train_step, feed_dict={
x: batch_xs,
y: batch_ys,
batch_size_holder : 100
})
if step % 10 == 0:
test_x, test_y = mydata.test()
test_x = test_x.reshape([-1, n_steps, n_inputs])
test_y = test_y.reshape([-1, 1])
loss1 = sess.run(cost, feed_dict = {x : batch_xs, y: batch_ys, batch_size_holder : 100})
loss2 = sess.run(cost, feed_dict = {x : test_x, y : test_y, batch_size_holder : 500})
train_loss.append(loss1)
test_loss.append(loss2)
print("training cost: ", loss1)
print("testing cost: ", loss2)
step += 1
sess.close()
import matplotlib.pyplot as plt
plt.plot(train_loss)
plt.plot(test_loss)
-------更新---------
感谢vijay的回答,更新的代码如下:
请注意,在输出结果之前,网络有2个(n_layers)LSTM图层和1个密集图层。
import tensorflow as tf
lr = 0.01
n_inputs = 128
n_steps = 255
n_hidden_units = 200
n_layers = 2
number_of_layers = 2
n_classes = 1
batch_size = 100
gradient = 0.5
# tf Graph input
x = tf.placeholder(tf.float32, [None, n_steps, n_inputs])
y = tf.placeholder(tf.float32, [None, n_classes])
batch_size_holder = tf.placeholder(tf.int32, [], name='batch_size_holder')
def lstm_cell():
return tf.contrib.rnn.BasicLSTMCell(n_hidden_units)
def RNN(X):
lstm_stacked = tf.contrib.rnn.MultiRNNCell([lstm_cell() for _ in range(n_layers)])
init_state = lstm_stacked.zero_state(batch_size_holder, dtype=tf.float32)
outputs, final_state = tf.nn.dynamic_rnn(lstm_stacked, X, dtype=tf.float32)
output = tf.layers.dense(outputs[:, -1, :], 1)
return output
pred = RNN(x)
cost = tf.losses.mean_squared_error(y, pred)
optimizer = tf.train.AdamOptimizer(lr)
gvs = optimizer.compute_gradients(cost)
capped_gvs = [(tf.clip_by_value(grad, -gradient, gradient), var) for grad, var in gvs]
train_step = optimizer.apply_gradients(capped_gvs)
sess = tf.Session()
init = tf.global_variables_initializer()
sess.run(init)
mydata = data(batch = batch_size, s = 30000, per = 0.95)
step = 0
train_loss = []
test_loss = []
while mydata.hasNext():
batch_xs, batch_ys = mydata.next()
batch_xs = batch_xs.reshape([batch_size, n_steps, n_inputs])
batch_ys = batch_ys.reshape([batch_size, 1])
sess.run(train_step, feed_dict={
x: batch_xs,
y: batch_ys,
batch_size_holder : batch_size
})
if step % 10 == 0:
test_x, test_y = mydata.test()
test_x = test_x.reshape([-1, n_steps, n_inputs])
test_y = test_y.reshape([-1, 1])
loss1 = sess.run(cost, feed_dict = {x : batch_xs, y: batch_ys, batch_size_holder : batch_size})
loss2 = sess.run(cost, feed_dict = {x : test_x, y : test_y, batch_size_holder : 1500})
train_loss.append(loss1)
test_loss.append(loss2)
print("training cost: ", loss1, "testing cost: ", loss2)
step += 1
答案 0 :(得分:2)
如果您想要multi-layer LSTM
,可以使用tf.contrib.rnn.MultiRNNCell
。所以对于两层:
n_layers = 2
lstm_stacked = tf.contrib.rnn.MultiRNNCell([lstm_cell() for _ in range(n_layers)])
outputs, final_state = tf.nn.dynamic_rnn(lstm_stacked, X_in, dtype=tf.float32)
def lstm_cell():
# Single RNN cell
return tf.contrib.rnn.BasicLSTMCell(n_hidden_units)