通过lstm提取单词的第一个字母

时间:2019-03-08 02:36:49

标签: tensorflow lstm

我想提取一个单词的第一个字母,然后编写以下代码。但是当我运行这段代码时,可以看出成本和准确性确实很差。他们似乎都是一个常数,根本不收敛。当然,我已经调整了学习率,epoch,批处理,num_layers等参数。但是,它不起作用。我的代码怎么了?

import tensorflow as tf
import numpy as np

# 创建字母到数字的映射,以及数字到字母的映射,保存为字典
# create a dict to reflact the number to letter and the letter to number
str_dict = 'abcdefghijklmnopqrstuvwxyz'    
num2str = dict(enumerate(str_dict,1))
num2str[0] = '#'
str2num = {i:j for j,i in num2str.items()}


def generate_word(data_length, max_length, min_length):
    '''
    生成数据
    generate some random data
    data_length: the number of whole data数据样本大小
    max_length: maximum length of the word单词最大长度
    min_length: minimum length of the word单词最小长度

    >>> generate_word(100, 10, 5)
    ['asdfghjklp',  
     'qwert',
     ...
    ]

    output a list in which the word's maximum length is max_length, minimum length is min_length
    输出最长为max_length, 最短为min_length的单词列表
    '''
    return [''.join([num2str[np.random.randint(1,26)] for i in range(np.random.randint(min_length, max_length))]) for _ in range(data_length)]


def data_process(data):
    '''
    padding the word to max_length
    将单词补齐到列表里最长单词的长度
    >>>data_process(['asdfg', 'asd'])

    ['asdfg', 'asd##']

    '''
    max_length = max([len(i) for i in data])
    tmp_data = [i+num2str[0]*(max_length-len(i)) for i in data]
    return [[str2num[i] for i in j] for j in tmp_data]


data_x = [''.join(set(i)) for i in generate_word(10000, 4,3)]    # 去重
data_y = [i[0] for i in data_x]    # extract the first letter for target data 取首字母

data_x = data_process(data_x)    # pad the data 将所有单词补齐为同一长度
data_y = [str2num[i] for i in data_y]    # convert the first letter to number 将首字母转换为数字


train_inputs, test_inputs = data_x[:8000], data_x[8000:]    # divide into train set and test set 划分训练集和测试集
train_outputs, test_outputs = data_y[:8000], data_y[8000:]

batch_size = 128
vocab_size = len(str2num)
num_layers = 4
embed_size = 20
rnn_size = 20
graph_2 = tf.Graph()
with graph_2.as_default():
    with tf.name_scope('placeholder'):
        # placeholder 占位符
        inputs = tf.placeholder(tf.int32, [batch_size, None])
        outputs = tf.placeholder(tf.int32, [batch_size])
        output_onehot = tf.one_hot(outputs, len(str2num))
        learning_rate = tf.placeholder(tf.float32)


    with tf.name_scope('embedding'):
        # embedding 词嵌入
        lookup_table = tf.truncated_normal([vocab_size, embed_size])
        embed_out = tf.nn.embedding_lookup(lookup_table, inputs)


    with tf.name_scope('lstm'):
        # build a 4-layers networks with lstm cell. 
        # 构建一个4层的lstm网络,最后用state来计算首字母
        lstm = tf.contrib.rnn.LSTMCell(rnn_size, dtype = tf.float32)
#         cell = tf.contrib.rnn.DropoutWrapper(lstm, output_keep_prob=0.5)
        cell = tf.contrib.rnn.MultiRNNCell([lstm for _ in range(num_layers)])
        out, state = tf.nn.dynamic_rnn(cell, embed_out, dtype = tf.float32)
        # state size:[num_layers, 2, batch_size, cell.output_size]
        # state[-1][1] size: [batch_size, cell.output_size]
        logits = tf.matmul(state[-1][1], tf.Variable(tf.truncated_normal([embed_size, len(str2num)])))+tf.Variable(tf.ones(len(str2num))/10)
        logits = tf.nn.softmax(logits)


    with tf.name_scope('optimizer'):
        cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(labels=output_onehot, logits=logits))
        accuracy = tf.reduce_mean(tf.cast(tf.equal(tf.argmax(output_onehot,1), tf.argmax(logits, 1)), tf.float32))
        optimizer = tf.train.RMSPropOptimizer(learning_rate).minimize(cost)


def get_batch(data, batch_size):
    batch_num = len(data)//batch_size
    for i in range(batch_num):
        yield data[i*batch_size:(i+1)*batch_size]


batches = len(train_inputs)//batch_size
lr = 0.01
display = 100
epoches = 10



with tf.Session(graph = graph_2) as sess:
    sess.run(tf.global_variables_initializer())
    for epoch in range(epoches):
        ix, iy = next(get_batch(train_inputs, batch_size)), next(get_batch(train_outputs, batch_size))
        for batch in range(batches):
            _, c = sess.run([optimizer, cost], {inputs:ix, outputs:iy, learning_rate:lr})
            if batch%display == 0:
                accu = sess.run(accuracy, {inputs:test_inputs[:128], outputs:test_outputs[:128]})
                print('cost is {} accuracy is {}'.format(c, accu))
    print('done')

0 个答案:

没有答案