我想提取一个单词的第一个字母,然后编写以下代码。但是当我运行这段代码时,可以看出成本和准确性确实很差。他们似乎都是一个常数,根本不收敛。当然,我已经调整了学习率,epoch,批处理,num_layers等参数。但是,它不起作用。我的代码怎么了?
import tensorflow as tf
import numpy as np
# 创建字母到数字的映射,以及数字到字母的映射,保存为字典
# create a dict to reflact the number to letter and the letter to number
str_dict = 'abcdefghijklmnopqrstuvwxyz'
num2str = dict(enumerate(str_dict,1))
num2str[0] = '#'
str2num = {i:j for j,i in num2str.items()}
def generate_word(data_length, max_length, min_length):
'''
生成数据
generate some random data
data_length: the number of whole data数据样本大小
max_length: maximum length of the word单词最大长度
min_length: minimum length of the word单词最小长度
>>> generate_word(100, 10, 5)
['asdfghjklp',
'qwert',
...
]
output a list in which the word's maximum length is max_length, minimum length is min_length
输出最长为max_length, 最短为min_length的单词列表
'''
return [''.join([num2str[np.random.randint(1,26)] for i in range(np.random.randint(min_length, max_length))]) for _ in range(data_length)]
def data_process(data):
'''
padding the word to max_length
将单词补齐到列表里最长单词的长度
>>>data_process(['asdfg', 'asd'])
['asdfg', 'asd##']
'''
max_length = max([len(i) for i in data])
tmp_data = [i+num2str[0]*(max_length-len(i)) for i in data]
return [[str2num[i] for i in j] for j in tmp_data]
data_x = [''.join(set(i)) for i in generate_word(10000, 4,3)] # 去重
data_y = [i[0] for i in data_x] # extract the first letter for target data 取首字母
data_x = data_process(data_x) # pad the data 将所有单词补齐为同一长度
data_y = [str2num[i] for i in data_y] # convert the first letter to number 将首字母转换为数字
train_inputs, test_inputs = data_x[:8000], data_x[8000:] # divide into train set and test set 划分训练集和测试集
train_outputs, test_outputs = data_y[:8000], data_y[8000:]
batch_size = 128
vocab_size = len(str2num)
num_layers = 4
embed_size = 20
rnn_size = 20
graph_2 = tf.Graph()
with graph_2.as_default():
with tf.name_scope('placeholder'):
# placeholder 占位符
inputs = tf.placeholder(tf.int32, [batch_size, None])
outputs = tf.placeholder(tf.int32, [batch_size])
output_onehot = tf.one_hot(outputs, len(str2num))
learning_rate = tf.placeholder(tf.float32)
with tf.name_scope('embedding'):
# embedding 词嵌入
lookup_table = tf.truncated_normal([vocab_size, embed_size])
embed_out = tf.nn.embedding_lookup(lookup_table, inputs)
with tf.name_scope('lstm'):
# build a 4-layers networks with lstm cell.
# 构建一个4层的lstm网络,最后用state来计算首字母
lstm = tf.contrib.rnn.LSTMCell(rnn_size, dtype = tf.float32)
# cell = tf.contrib.rnn.DropoutWrapper(lstm, output_keep_prob=0.5)
cell = tf.contrib.rnn.MultiRNNCell([lstm for _ in range(num_layers)])
out, state = tf.nn.dynamic_rnn(cell, embed_out, dtype = tf.float32)
# state size:[num_layers, 2, batch_size, cell.output_size]
# state[-1][1] size: [batch_size, cell.output_size]
logits = tf.matmul(state[-1][1], tf.Variable(tf.truncated_normal([embed_size, len(str2num)])))+tf.Variable(tf.ones(len(str2num))/10)
logits = tf.nn.softmax(logits)
with tf.name_scope('optimizer'):
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(labels=output_onehot, logits=logits))
accuracy = tf.reduce_mean(tf.cast(tf.equal(tf.argmax(output_onehot,1), tf.argmax(logits, 1)), tf.float32))
optimizer = tf.train.RMSPropOptimizer(learning_rate).minimize(cost)
def get_batch(data, batch_size):
batch_num = len(data)//batch_size
for i in range(batch_num):
yield data[i*batch_size:(i+1)*batch_size]
batches = len(train_inputs)//batch_size
lr = 0.01
display = 100
epoches = 10
with tf.Session(graph = graph_2) as sess:
sess.run(tf.global_variables_initializer())
for epoch in range(epoches):
ix, iy = next(get_batch(train_inputs, batch_size)), next(get_batch(train_outputs, batch_size))
for batch in range(batches):
_, c = sess.run([optimizer, cost], {inputs:ix, outputs:iy, learning_rate:lr})
if batch%display == 0:
accu = sess.run(accuracy, {inputs:test_inputs[:128], outputs:test_outputs[:128]})
print('cost is {} accuracy is {}'.format(c, accu))
print('done')