我尝试使用LSTM编写用于将动词标记为B的源代码。 我将word映射为数字和一个热编码 我认为我的源代码是正确的,但结果与我的期望不同。
import tensorflow as tf
import numpy as np
import os
def get_x():
with open('/Users/Knight/Desktop/NLP/training_set.txt') as f:
contents = f.read(1000000).split('\n')
x_data = []
word = []
for sentence in contents:
sentence = sentence.strip().split(' ')
x_data.append([])
word.append([])
for idx in range(len(sentence)):
if idx%2 == 0:
x_data[-1].append(int(sentence[idx]))
else:
word[-1].append(int(sentence[idx]))
if len(word[-1]) != 112:
for i in range(112-len(word[-1])):
word[-1].append(0)
if len(x_data[-1]) != 112:
for i in range(112-len(x_data[-1])):
x_data[-1].append(0)
m = 0
for i in x_data:
m = max(m, len(i))
print m
print x_data
print word
return word, x_data
def process_x(x_data):
res = []
for i in x_data:
tmp = map(int, list('0'*40))
tmp[i] = 1
res.append(tmp)
res = np.array(res, dtype='f')
return res
word, x_data = get_x()
rnn_size = 2
time_step_size = std_size
batch_size = 1
rnn_cell = tf.nn.rnn_cell.BasicLSTMCell(rnn_size)
state = rnn_cell.zero_state(batch_size, tf.float32)
for i in range(len(x_data)):
x = process_x(x_data[i])[:std_size]
y = word[i][:std_size]
x_split = tf.split(0, time_step_size, x)
outputs, state = tf.nn.rnn(rnn_cell, x_split, state)
prediction = tf.reshape(tf.concat(1, outputs), [-1, rnn_size])
real = tf.reshape(y, [-1])
ratio = tf.ones([time_step_size * batch_size])
loss = tf.nn.seq2seq.sequence_loss_by_example([prediction], [real], [ratio])
cost = tf.reduce_mean(loss)/batch_size
train = tf.train.AdamOptimizer(0.01).minimize(cost)
# h_loss = tf.summary.scalar('loss', loss)
h_cost = tf.summary.scalar('cost', cost)
merged = tf.summary.merge_all()
with tf.Session() as sess:
tf.global_variables_initializer().run()
for step in range(1000):
sess.run(train)
summary = sess.run(merged)
writer.add_summary(summary, step)
result = sess.run(tf.arg_max(prediction, 1))
print result, [t for t in result] == y
tf.get_variable_scope().reuse_variables()
上面分享了源代码。在研究一些案例后,我预计会降低成本。但是成本不会降低,结果总是返回0 我无法理解为什么会这样。