我正在使用RNN(tf.nn.bidirectional_dynamic_rnn)进行批处理训练,并在Windows 10上使用TensorFlow 1.9.0训练句子分类器,并使用bidirectional_dynamic_rnn输入参数sequence_length指定每个批处理元素的确切序列长度。
我的理解是,指定sequence_length可以忽略填充。但是,如果我创建了一个包含单个句子的批处理,并且该批处理重复了batch_size时间,并且插入了不同数量的padding,则会得到不同的标签概率预测(例如,我查看的示例句子中,标签正确的概率随着填充数的增加而降低)。
可能是什么原因造成的?可以吗?
我在代码中添加了可能相关的部分。
感谢您的帮助!
def evaluate(data_set, sess, data_index, batch_size, self):
steps = len(data_set) // batch_size
sentence_ls = []
for step in range(steps):
prior_index = data_index
batch_inputs, batch_labels, data_index = generate_batch(batch_size,
data_set,
data_index)
train_seq_len = seq_len(batch_inputs, self.batch_size)
feed_dict = {self._train_inputs : batch_inputs, self._train_labels : batch_labels,
self._seq_len: train_seq_len, self._keep_prob_ph: 1} #no dropout during evaluation
correct_pred, probs = sess.run([self._correct_prediction, self._probs], feed_dict=feed_dict)
#probs is a list of probability for each label
for idx in range(batch_inputs.shape[0]):
label = batch_labels[idx]
sentence = " ".join(data_set[prior_index + idx].words_parsed_text)
prob = probs[idx]
sentence_attention_ls.append([label, sentence, correct_pred[idx], prob])
return sentence_ls
def seq_len(batch, batch_size):
seq_len_ls = []
for i in range(batch_size):
seq_len = 0
macro_seq_len = batch.shape[1] #the max seq lenght of the batch
for j in range(macro_seq_len):
if batch[i][j] != 1: # 1 = _PAD_
seq_len += 1
seq_len_ls.append(seq_len)
return seq_len_ls
##########
#some of the code that creates the RNN
##########
seq_len = tf.placeholder(tf.int32, [None], name='seq_len')
lstm_fw_cell1 = tf.nn.rnn_cell.LSTMCell(self.hidden_size, forget_bias=1.0, state_is_tuple=True,
initializer=self.initializer)
lstm_fw_cell2 = tf.nn.rnn_cell.LSTMCell(self.hidden_size, forget_bias=1.0, state_is_tuple=True,
initializer=self.initializer)
lstm_fw_cell = tf.contrib.rnn.MultiRNNCell([lstm_fw_cell1, lstm_fw_cell2])
lstm_bw_cell1 = tf.nn.rnn_cell.LSTMCell(self.hidden_size, forget_bias=1.0, state_is_tuple=True,
initializer=self.initializer)
lstm_bw_cell2 = tf.nn.rnn_cell.LSTMCell(self.hidden_size, forget_bias=1.0, state_is_tuple=True,
initializer=self.initializer)
lstm_bw_cell = tf.contrib.rnn.MultiRNNCell([lstm_bw_cell1, lstm_bw_cell2])
outputs, states = tf.nn.bidirectional_dynamic_rnn(cell_fw=lstm_fw_cell, cell_bw=lstm_bw_cell,
inputs=input_layer, sequence_length=seq_len,
dtype=tf.float32, scope="1_BiLSTM")
probs = tf.nn.softmax(logits, name='probs')
correct_prediction = tf.equal(prediction, tf.cast(train_labels, tf.float32), name='correct_prediction')