Question

我尝试使用LSTM编写用于将动词标记为B的源代码。我将word映射为数字和一个热编码我认为我的源代码是正确的，但结果与我的期望不同。

import tensorflow as tf
import numpy as np
import os
def get_x():
    with open('/Users/Knight/Desktop/NLP/training_set.txt') as f:
        contents = f.read(1000000).split('\n')
        x_data = []
        word = []
        for sentence in contents:
            sentence = sentence.strip().split(' ')
            x_data.append([])
            word.append([])
            for idx in range(len(sentence)):
                if idx%2 == 0:
                    x_data[-1].append(int(sentence[idx]))
                else:
                    word[-1].append(int(sentence[idx]))
            if len(word[-1]) != 112:
                for i in range(112-len(word[-1])):
                    word[-1].append(0)
            if len(x_data[-1]) != 112:
                for i in range(112-len(x_data[-1])):
                    x_data[-1].append(0)
    m = 0
    for i in x_data:
        m = max(m, len(i))
    print m
    print x_data
    print word
    return word, x_data

def process_x(x_data):
    res = []
    for i in x_data:
        tmp = map(int, list('0'*40))
        tmp[i] = 1
        res.append(tmp)
    res = np.array(res, dtype='f')
    return res

word, x_data = get_x()

rnn_size = 2
time_step_size = std_size
batch_size = 1

rnn_cell = tf.nn.rnn_cell.BasicLSTMCell(rnn_size)
state = rnn_cell.zero_state(batch_size, tf.float32)
for i in range(len(x_data)):
    x = process_x(x_data[i])[:std_size]
    y = word[i][:std_size]
    x_split = tf.split(0, time_step_size, x)
    outputs, state = tf.nn.rnn(rnn_cell, x_split, state)

    prediction = tf.reshape(tf.concat(1, outputs), [-1, rnn_size])
    real = tf.reshape(y, [-1])
    ratio = tf.ones([time_step_size * batch_size])

    loss = tf.nn.seq2seq.sequence_loss_by_example([prediction], [real], [ratio])
    cost = tf.reduce_mean(loss)/batch_size
    train = tf.train.AdamOptimizer(0.01).minimize(cost)

    # h_loss = tf.summary.scalar('loss', loss)
    h_cost = tf.summary.scalar('cost', cost)
    merged = tf.summary.merge_all()

    with tf.Session() as sess:
        tf.global_variables_initializer().run()
        for step in range(1000):
            sess.run(train)
            summary = sess.run(merged)
            writer.add_summary(summary, step)
        result = sess.run(tf.arg_max(prediction, 1))
        print result, [t for t in result] == y
    tf.get_variable_scope().reuse_variables()

上面分享了源代码。在研究一些案例后，我预计会降低成本。但是成本不会降低，结果总是返回0 我无法理解为什么会这样。

我想知道为什么这个张量流模型的成本不会降低？

0 个答案: