我正在尝试使用具有可变长度的训练集来训练RNN分类器,我已经阅读并且建议的解决方案是对训练数据进行分组,然后从每个桶中提取批次并退出RNN在某个sequence_length上,但是这个过程的结果与仅将输入序列修剪到10长度的结果非常差,我的损失函数和准确度上下不规则地上下移动
更多信息:
我的批量大小可变,因为有时候桶中某个序列长度的样本不够
序列是一系列转换为单词嵌入的单词
如果我有一堆长度为4的序列,那么批次的大小将为max_seq_length,填充右边的所有内容,向量为0的
批次具有以下形状(batch_size,number_steps,embedding_dimensions)
批次(128,48,100) sequence_length = 4 类(128188)
批次(128,48,100) sequence_length = 8 类(128188)
...
批次(30,48,100) sequence_length = 40 类(30188)
将可变序列长度与可变浴大小混合的最佳方法是什么?
我的图表:
# Network Parameters
n_input = embeddings_dim # word embeddings dimensions : 100
n_steps = max_seq_len # timesteps = maximum sequence length in my training = 47
n_classes = total_labels # total classes = 188
graph = tf.Graph()
with graph.as_default():
# tf Graph input
x = tf.placeholder("float", [None, n_steps, n_input])
# Tensorflow LSTM cell requires 2x n_hidden length (state & cell)
istate = tf.placeholder("float", [None, 2*n_hidden])
y = tf.placeholder("float", [None, n_classes])
#at what step we should read out the value of the RNN
early_stop = tf.placeholder(tf.int32)
tf.scalar_summary('early_stop', early_stop)
# Define weights
weights = {
'hidden': tf.Variable(tf.random_normal([n_input, n_hidden])), # Hidden layer weights
'out': tf.Variable(tf.random_normal([n_hidden, n_classes]))
}
biases = {
'hidden': tf.Variable(tf.random_normal([n_hidden])),
'out': tf.Variable(tf.random_normal([n_classes]))
}
def RNN(_X, _istate, _weights, _biases):
# input shape: (batch_size, n_steps, n_input)
_X = tf.transpose(_X, [1, 0, 2]) # permute n_steps and batch_size
# Reshape to prepare input to hidden activation
_X = tf.reshape(_X, [-1, n_input]) # (n_steps*batch_size, n_input)
# Linear activation
_X = tf.matmul(_X, _weights['hidden']) + _biases['hidden']
# Define a lstm cell with tensorflow
lstm_cell = rnn_cell.LSTMCell(n_hidden, forget_bias=0.25)
# Split data because rnn cell needs a list of inputs for the RNN inner loop
_X = tf.split(0, n_steps, _X) # n_steps * (batch_size, n_hidden)
# Get lstm cell output
outputs, states = rnn.rnn(lstm_cell, _X,
initial_state=_istate,
sequence_length=early_stop)
# Linear activation
# Get inner loop last output
return tf.matmul(outputs[-1], _weights['out']) + _biases['out']
pred = RNN(x, istate, weights, biases)
# Define loss and optimizer
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(pred, y)) # Softmax loss
global_step = tf.Variable(0, name='global_step', trainable=False)
learning_rate_2 = tf.train.exponential_decay(learning_rate, global_step, 300, 0.96, staircase=True)
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate_2).minimize(cost) # Adam Optimizer
# Evaluate model
correct_pred = tf.equal(tf.argmax(pred,1), tf.argmax(y,1))
accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))
我的训练循环:
with tf.Session(graph=graph) as sess:
init = tf.initialize_all_variables()
sess.run(init)
step = 1
while step * batch_size < training_iters:
#batch_xs = batch_xs.reshape((batch_size, n_steps, n_input))
batch_xs, batch_ys, batch_sl = train_batches.next()
ins_batch_size = len(batch_xs)
# Fit training using batch data
summ ,acc = sess.run([merged,optimizer], feed_dict={x: batch_xs, y: batch_ys,
istate: np.zeros((ins_batch_size, 2*n_hidden)),
early_stop:batch_sl})
train_writer.add_summary(summ, step)
if step % display_step == 0:
# Calculate batch accuracy
acc = sess.run(accuracy, feed_dict={x: batch_xs, y: batch_ys,
istate: np.zeros((ins_batch_size, 2*n_hidden)),
early_stop:batch_sl})
# Calculate batch loss
loss = sess.run(cost, feed_dict={x: batch_xs, y: batch_ys,
istate: np.zeros((ins_batch_size, 2*n_hidden)),
early_stop:batch_sl})
print( "Iter " + str(step*batch_size) + ", Minibatch Loss= " + "{:.6f}".format(loss) + \
", Training Accuracy= " + "{:.5f}".format(acc))
step += 1
print( "Optimization Finished!")
test_data,test_label, test_sl = test_batches.next()
test_len = len(test_data)
print( "Testing Accuracy:", sess.run(accuracy, feed_dict={x: test_data, y: test_label,
istate: np.zeros((test_len, 2*n_hidden)),
early_stop:test_sl}))