在Tensorflow中使用RNN和LSTM Cell进行语言建模

时间:2017-02-27 19:59:28

标签: machine-learning tensorflow neural-network deep-learning recurrent-neural-network

我的语言建模RNN仅预测""" "和"和"未知"我的代码有什么问题?

这里我定义了超级参数:

num_epochs = 300
total_series_length = len(uniqueSentence) - 4
truncated_backprop_length = 30
state_size = 100
num_classes = NUM_MEANINGFUL + 1
echo_step = 1
batch_size = 32
vocab_length = len(decoder)
num_batches = total_series_length//batch_size//truncated_backprop_length
learning_rate = 0.01
old_perplexity = 0

这里我生成数据(我的输入是通过使用Word2Vec计算的长嵌入100的单词嵌入):

def generateData():

    uniqueSent = uniqueSentence[0 : len(uniqueSentence) - 4]
    x_tr = np.array([model_ted[word] for words in uniqueSent])

    #Roll array elements along a given axis.
    #Elements that roll beyond the last position are re-introduced at the first.

    x_tr = x_tr.reshape((100, batch_size, -1))  # The first index changing slowest, subseries as rows

    x = x_tr.transpose((1, 2, 0))

    print("hi")

    new_y = indexList[1: len(indexList)- 4]
    new_y.append(indexList[len(indexList)-3])
    y = np.array(new_y)
    print(len(y))
    y = y.reshape((batch_size, -1))

    return (x, y)

定义占位符:

batchX_placeholder = tf.placeholder(tf.float32, [batch_size, truncated_backprop_length, 100])
batchY_placeholder = tf.placeholder(tf.int32, [batch_size, truncated_backprop_length])

W = tf.Variable(np.random.rand(state_size, num_classes),dtype=tf.float32)
b = tf.Variable(np.zeros((batch_size, num_classes)), dtype=tf.float32)

W2 = tf.Variable(np.random.rand(state_size, num_classes),dtype=tf.float32)
b2 = tf.Variable(np.zeros((batch_size, num_classes)), dtype=tf.float32)

输入和所需的输出:

labels_series = tf.transpose(batchY_placeholder)
labels_series = tf.unstack(batchY_placeholder, axis=1)
inputs_series = batchX_placeholder

前传:

from tensorflow.contrib.rnn.python.ops import core_rnn_cell_impl
print(tf.__version__) 
#cell = tf.contrib.rnn.BasicRNNCell(state_size)
cell = tf.contrib.rnn.BasicLSTMCell(state_size, state_is_tuple = False)
print(cell.state_size)

init_state = tf.zeros([batch_size, cell.state_size])

outputs, current_state = tf.nn.dynamic_rnn(cell, inputs_series, initial_state = init_state)

iterable_outputs = tf.unstack(outputs, axis = 1)
logits_series = [tf.matmul(state, W2) + b2 for state in iterable_outputs] #Broadcasted addition

predictions_series = [tf.nn.softmax(logits) for logits in logits_series]

losses = [tf.losses.sparse_softmax_cross_entropy(labels, logits) 
          for logits, labels in zip(logits_series, labels_series)]

total_loss = tf.add_n(losses)

train_step = tf.train.AdamOptimizer(learning_rate).minimize(total_loss)

x,y = generateData()
del(model_ted)

训练:

with tf.Session(config=tf.ConfigProto(log_device_placement=True)) as sess:
    sess.run(tf.initialize_all_variables())
    loss_list = []

    print("start")


    _current_state = np.zeros((batch_size, 2*state_size))

    #avevo genrateData fuori e -currentstate dentro
    for epoch_idx in range(num_epochs):

        print("New data, epoch", epoch_idx)


        for batch_idx in range(num_batches):
            start_idx = batch_idx * truncated_backprop_length
            end_idx = start_idx + truncated_backprop_length

            batchX = x[:,start_idx:end_idx,:]
            batchY = y[:,start_idx:end_idx]


            _total_loss, _train_step, _current_state, _predictions_series = sess.run(
                [total_loss, train_step, current_state, predictions_series],
                feed_dict={
                    batchX_placeholder:batchX,
                    batchY_placeholder:batchY,
                    init_state:_current_state
                })

            loss_list.append(_total_loss)  
            del(batchX)
            del(batchY)


        perplexity = 2 ** (_total_loss/truncated_backprop_length )
        print(perplexity)
        del(perplexity)

        _predictions_series = np.array(_predictions_series)
        pr = _predictions_series.transpose([1, 0, 2])
        pr_ind = []
        for line in pr[0]:
            pr_ind.append(np.argmax(line))

        for index in pr_ind:
            print(decoder[index], end = " " )
        del(pr_ind)
        print("\n learning rate: ", end = " ")
        print(learning_rate)

0 个答案:

没有答案