我正在尝试模拟句子相似度使用tensorflow。理想的是首先将连接的句子提供给rnn,然后将rnn的输出提供给softmax以进行二进制分类,类似或不相似。我对PTB language model example进行了一些减号更改,但费用不会按预期减少。
cost: 0.694479
cost: 0.695012
cost: 0.6955
...
代码如下。任何形式的帮助将不胜感激。
class PTBModel(object):
"""The PTB model."""
def __init__(self, is_training, config):
self.batch_size = batch_size = config.batch_size
self.num_steps = num_steps = config.num_steps
size = config.hidden_size
vocab_size = config.vocab_size
label_size = 2
self._input_data = tf.placeholder(tf.int32, [batch_size, num_steps], name="inputs")
# for each sentence pair we only have one output
self._targets = tf.placeholder(tf.int64, [batch_size], name="labels")
# Slightly better results can be obtained with forget gate biases
# initialized to 1 but the hyperparameters of the model would need to be
# different than reported in the paper.
lstm_cell = rnn_cell.BasicLSTMCell(size, forget_bias=0.0)
if is_training and config.keep_prob < 1:
lstm_cell = rnn_cell.DropoutWrapper(
lstm_cell, output_keep_prob=config.keep_prob)
cell = rnn_cell.MultiRNNCell([lstm_cell] * config.num_layers)
self._initial_state = cell.zero_state(batch_size, tf.float32)
with tf.device("/cpu:0"):
embedding = tf.get_variable("embedding", [vocab_size, size])
inputs = tf.nn.embedding_lookup(embedding, self._input_data)
if is_training and config.keep_prob < 1:
inputs = tf.nn.dropout(inputs, config.keep_prob)
# Simplified version of tensorflow.models.rnn.rnn.py's rnn().
# This builds an unrolled LSTM for tutorial purposes only.
# In general, use the rnn() or state_saving_rnn() from rnn.py.
#
# The alternative version of the code below is:
#
# from tensorflow.models.rnn import rnn
# inputs = [tf.squeeze(input_, [1])
# for input_ in tf.split(1, num_steps, inputs)]
# outputs, states = rnn.rnn(cell, inputs, initial_state=self._initial_state)
outputs = []
states = []
state = self._initial_state
with tf.variable_scope("RNN"):
for time_step in range(num_steps):
if time_step > 0: tf.get_variable_scope().reuse_variables()
(cell_output, state) = cell(inputs[:, time_step, :], state)
outputs.append(cell_output)
states.append(state)
# use the output of the last word as the input feature to softmax
output = tf.reshape(tf.concat(1, outputs[-1]), [-1, size])
softmax_w = tf.get_variable("softmax_w", [size, label_size])
softmax_b = tf.get_variable("softmax_b", [label_size])
self.logits = logits = tf.matmul(output, softmax_w) + softmax_b
loss = tf.nn.sparse_softmax_cross_entropy_with_logits(logits, self.targets)
self._cost = cost = tf.reduce_sum(loss) / batch_size
# self._cost = cost = -tf.reduce_sum(tf.reshape(self.targets, [-1, 1])
# *tf.log(tf.clip_by_value(tf.log(tf.nn.softmax(logits)), 1e-10,1.0)))
self._final_state = states[-1]
if not is_training:
return
self._lr = tf.Variable(0.0, trainable=False)
tvars = tf.trainable_variables()
grads, _ = tf.clip_by_global_norm(tf.gradients(cost, tvars),
config.max_grad_norm)
optimizer = tf.train.GradientDescentOptimizer(self.lr)
self._train_op = optimizer.apply_gradients(zip(grads, tvars))
def step(self, session, inputs, labels, is_training):
if is_training:
output_feeds = [self.cost, self._final_state, self._train_op]
else:
output_feeds = [self.cost]
input_feeds = {
self.input_data: inputs,
self.targets: labels
}
cost, state, logits = session.run(output_feeds, input_feeds)
return cost, state, logits
答案 0 :(得分:0)
给你一个小推荐。
每个句子可以使用两个编码器(RNN或CNN),然后将这两个句子编码为两个句子嵌入。一旦你有两个句子向量,你只需计算余弦相似度作为输出。如果两个句子具有相同的含义则为1,如果不是用于训练则为0。在推理中,您可以输入任意两个句子并将余弦相似度作为句子语义相似度。
答案 1 :(得分:0)
使用RNN时,假设输入之间存在顺序关系。连接两个句子对网络意味着它们构成了一个序列,但事实并非如此。为了比较两个句子,每个句子应单独表示。