
时间:2016-05-03 01:44:57

标签: python machine-learning tensorflow deep-learning

我正在尝试模拟句子相似度使用tensorflow。理想的是首先将连接的句子提供给rnn,然后将rnn的输出提供给softmax以进行二进制分类,类似或不相似。我对PTB language model example进行了一些减号更改,但费用不会按预期减少。

cost: 0.694479 
cost: 0.695012
cost: 0.6955


class PTBModel(object):
"""The PTB model."""
def __init__(self, is_training, config):
    self.batch_size = batch_size = config.batch_size
    self.num_steps = num_steps = config.num_steps
    size = config.hidden_size
    vocab_size = config.vocab_size
    label_size = 2
    self._input_data = tf.placeholder(tf.int32, [batch_size, num_steps], name="inputs")
    # for each sentence pair we only have one output
    self._targets = tf.placeholder(tf.int64, [batch_size], name="labels")
    # Slightly better results can be obtained with forget gate biases
    # initialized to 1 but the hyperparameters of the model would need to be
    # different than reported in the paper.
    lstm_cell = rnn_cell.BasicLSTMCell(size, forget_bias=0.0)
    if is_training and config.keep_prob < 1:
        lstm_cell = rnn_cell.DropoutWrapper(
                lstm_cell, output_keep_prob=config.keep_prob)
    cell = rnn_cell.MultiRNNCell([lstm_cell] * config.num_layers)
    self._initial_state = cell.zero_state(batch_size, tf.float32)
    with tf.device("/cpu:0"):
        embedding = tf.get_variable("embedding", [vocab_size, size])
        inputs = tf.nn.embedding_lookup(embedding, self._input_data)
    if is_training and config.keep_prob < 1:
        inputs = tf.nn.dropout(inputs, config.keep_prob)
    # Simplified version of tensorflow.models.rnn.rnn.py's rnn().
    # This builds an unrolled LSTM for tutorial purposes only.
    # In general, use the rnn() or state_saving_rnn() from rnn.py.
    # The alternative version of the code below is:
    # from tensorflow.models.rnn import rnn
    # inputs = [tf.squeeze(input_, [1])
    #           for input_ in tf.split(1, num_steps, inputs)]
    # outputs, states = rnn.rnn(cell, inputs, initial_state=self._initial_state)
    outputs = []
    states = []
    state = self._initial_state
    with tf.variable_scope("RNN"):
        for time_step in range(num_steps):
            if time_step > 0: tf.get_variable_scope().reuse_variables()
            (cell_output, state) = cell(inputs[:, time_step, :], state)

    # use the output of the last word as the input feature to softmax
    output = tf.reshape(tf.concat(1, outputs[-1]), [-1, size])
    softmax_w = tf.get_variable("softmax_w", [size, label_size])
    softmax_b = tf.get_variable("softmax_b", [label_size])
    self.logits = logits = tf.matmul(output, softmax_w) + softmax_b
    loss = tf.nn.sparse_softmax_cross_entropy_with_logits(logits, self.targets)
    self._cost = cost = tf.reduce_sum(loss) / batch_size
    # self._cost = cost = -tf.reduce_sum(tf.reshape(self.targets, [-1, 1])
    #                                    *tf.log(tf.clip_by_value(tf.log(tf.nn.softmax(logits)), 1e-10,1.0)))
    self._final_state = states[-1]
    if not is_training:
    self._lr = tf.Variable(0.0, trainable=False)
    tvars = tf.trainable_variables()
    grads, _ = tf.clip_by_global_norm(tf.gradients(cost, tvars),
    optimizer = tf.train.GradientDescentOptimizer(self.lr)
    self._train_op = optimizer.apply_gradients(zip(grads, tvars))

def step(self, session, inputs, labels, is_training):
    if is_training:
        output_feeds = [self.cost, self._final_state, self._train_op]
        output_feeds = [self.cost]
    input_feeds = {
        self.input_data: inputs,
        self.targets: labels
    cost, state, logits = session.run(output_feeds, input_feeds)
    return cost, state, logits

2 个答案:

答案 0 :(得分:0)



答案 1 :(得分:0)
