我的语言建模RNN仅预测""" "和"和"未知"我的代码有什么问题?
这里我定义了超级参数:
num_epochs = 300
total_series_length = len(uniqueSentence) - 4
truncated_backprop_length = 30
state_size = 100
num_classes = NUM_MEANINGFUL + 1
echo_step = 1
batch_size = 32
vocab_length = len(decoder)
num_batches = total_series_length//batch_size//truncated_backprop_length
learning_rate = 0.01
old_perplexity = 0
这里我生成数据(我的输入是通过使用Word2Vec计算的长嵌入100的单词嵌入):
def generateData():
uniqueSent = uniqueSentence[0 : len(uniqueSentence) - 4]
x_tr = np.array([model_ted[word] for words in uniqueSent])
#Roll array elements along a given axis.
#Elements that roll beyond the last position are re-introduced at the first.
x_tr = x_tr.reshape((100, batch_size, -1)) # The first index changing slowest, subseries as rows
x = x_tr.transpose((1, 2, 0))
print("hi")
new_y = indexList[1: len(indexList)- 4]
new_y.append(indexList[len(indexList)-3])
y = np.array(new_y)
print(len(y))
y = y.reshape((batch_size, -1))
return (x, y)
定义占位符:
batchX_placeholder = tf.placeholder(tf.float32, [batch_size, truncated_backprop_length, 100])
batchY_placeholder = tf.placeholder(tf.int32, [batch_size, truncated_backprop_length])
W = tf.Variable(np.random.rand(state_size, num_classes),dtype=tf.float32)
b = tf.Variable(np.zeros((batch_size, num_classes)), dtype=tf.float32)
W2 = tf.Variable(np.random.rand(state_size, num_classes),dtype=tf.float32)
b2 = tf.Variable(np.zeros((batch_size, num_classes)), dtype=tf.float32)
输入和所需的输出:
labels_series = tf.transpose(batchY_placeholder)
labels_series = tf.unstack(batchY_placeholder, axis=1)
inputs_series = batchX_placeholder
前传:
from tensorflow.contrib.rnn.python.ops import core_rnn_cell_impl
print(tf.__version__)
#cell = tf.contrib.rnn.BasicRNNCell(state_size)
cell = tf.contrib.rnn.BasicLSTMCell(state_size, state_is_tuple = False)
print(cell.state_size)
init_state = tf.zeros([batch_size, cell.state_size])
outputs, current_state = tf.nn.dynamic_rnn(cell, inputs_series, initial_state = init_state)
iterable_outputs = tf.unstack(outputs, axis = 1)
logits_series = [tf.matmul(state, W2) + b2 for state in iterable_outputs] #Broadcasted addition
predictions_series = [tf.nn.softmax(logits) for logits in logits_series]
losses = [tf.losses.sparse_softmax_cross_entropy(labels, logits)
for logits, labels in zip(logits_series, labels_series)]
total_loss = tf.add_n(losses)
train_step = tf.train.AdamOptimizer(learning_rate).minimize(total_loss)
x,y = generateData()
del(model_ted)
训练:
with tf.Session(config=tf.ConfigProto(log_device_placement=True)) as sess:
sess.run(tf.initialize_all_variables())
loss_list = []
print("start")
_current_state = np.zeros((batch_size, 2*state_size))
#avevo genrateData fuori e -currentstate dentro
for epoch_idx in range(num_epochs):
print("New data, epoch", epoch_idx)
for batch_idx in range(num_batches):
start_idx = batch_idx * truncated_backprop_length
end_idx = start_idx + truncated_backprop_length
batchX = x[:,start_idx:end_idx,:]
batchY = y[:,start_idx:end_idx]
_total_loss, _train_step, _current_state, _predictions_series = sess.run(
[total_loss, train_step, current_state, predictions_series],
feed_dict={
batchX_placeholder:batchX,
batchY_placeholder:batchY,
init_state:_current_state
})
loss_list.append(_total_loss)
del(batchX)
del(batchY)
perplexity = 2 ** (_total_loss/truncated_backprop_length )
print(perplexity)
del(perplexity)
_predictions_series = np.array(_predictions_series)
pr = _predictions_series.transpose([1, 0, 2])
pr_ind = []
for line in pr[0]:
pr_ind.append(np.argmax(line))
for index in pr_ind:
print(decoder[index], end = " " )
del(pr_ind)
print("\n learning rate: ", end = " ")
print(learning_rate)