问题:这是我第一次构建多类LSTM预测模型。我试图获取每个类的预测概率,以输入到损失函数和随后的优化器中。有20个类,所以我相信我需要一个包含20列的输出层,其行总和为1?现在,我得到20列,其行总计为20。我认为它是下面的预测代码,或者是下面的损失函数。修复它,我将赚5美元。
predictions = tf.contrib.layers.fully_connected(lstm_outputs[:, -1], 20, activation_fn=tf.sigmoid)
loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=predictions, labels=labels_))
下面的代码...
def model_inputs():
"""
Create the model inputs
"""
inputs_ = tf.placeholder(tf.int32, [None, None], name='inputs')
labels_ = tf.placeholder(tf.int32, [None, None], name='labels')
keep_prob_ = tf.placeholder(tf.float32, name='keep_prob')
return inputs_, labels_, keep_prob_
def build_embedding_layer(inputs_, vocab_size, embed_size):
"""
Create the embedding layer
"""
embedding = tf.Variable(tf.random_uniform((vocab_size, embed_size), -1, 1))
embed = tf.nn.embedding_lookup(embedding, inputs_)
return embed
def build_lstm_layers(lstm_sizes, embed, keep_prob_, batch_size):
"""
Create the LSTM layers
"""
lstms = [tf.contrib.rnn.BasicLSTMCell(size) for size in lstm_sizes]
# Add dropout to the cell
drops = [tf.contrib.rnn.DropoutWrapper(lstm, output_keep_prob=keep_prob_) for lstm in lstms]
# Stack up multiple LSTM layers, for deep learning
cell = tf.contrib.rnn.MultiRNNCell(drops)
# Getting an initial state of all zeros
initial_state = cell.zero_state(batch_size, tf.float32)
lstm_outputs, final_state = tf.nn.dynamic_rnn(cell, embed, initial_state=initial_state)
return initial_state, lstm_outputs, cell, final_state
def build_cost_fn_and_opt(lstm_outputs, labels_, learning_rate):
"""
Create the Loss function and Optimizer
"""
predictions = tf.contrib.layers.fully_connected(lstm_outputs[:, -1], 20, activation_fn=tf.sigmoid)
loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=predictions, labels=labels_))
#loss = tf.losses.softmax_cross_entropy(labels_, predictions)
##loss = tf.nn.sigmoid_cross_entropy_with_logits(labels=labels_, logits=predictions)
optimizer = tf.train.RMSPropOptimizer(learning_rate=learning_rate).minimize(loss)
#optimizer = tf.train.AdadeltaOptimizer(learning_rate).minimize(loss)
##optimizer = tf.train.AdamOptimizer(learning_rate).minimize(loss)
return predictions, loss, optimizer
def build_accuracy(predictions, labels_):
"""
Create accuracy
"""
correct_pred = tf.equal(tf.cast(tf.round(predictions), tf.int32), labels_)
accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))
return accuracy
def build_and_train_network(lstm_sizes, vocab_size, embed_size, epochs, batch_size,
learning_rate, keep_prob, train_x, val_x, train_y, val_y):
inputs_, labels_, keep_prob_ = model_inputs()
embed = build_embedding_layer(inputs_, vocab_size, embed_size)
initial_state, lstm_outputs, lstm_cell, final_state = build_lstm_layers(lstm_sizes, embed, keep_prob_, batch_size)
predictions, loss, optimizer = build_cost_fn_and_opt(lstm_outputs, labels_, learning_rate)
accuracy = build_accuracy(predictions, labels_)
saver = tf.train.Saver()
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
n_batches = len(train_x)//batch_size
for e in range(epochs):
state = sess.run(initial_state)
train_acc = []
for ii, (x, y) in enumerate(utl.get_batches(train_x, train_y, batch_size), 1):
feed = {inputs_: x,
labels_: y[:, None],
keep_prob_: keep_prob,
initial_state: state}
loss_, state, _, batch_acc, pred, lstm_outputs_ = sess.run([loss, final_state, optimizer, accuracy, predictions,lstm_outputs], feed_dict=feed)
train_acc.append(batch_acc)
if (ii + 1) % n_batches == 0:
val_acc = []
val_state = sess.run(lstm_cell.zero_state(batch_size, tf.float32))
for xx, yy in utl.get_batches(val_x, val_y, batch_size):
feed = {inputs_ : xx,
labels_ : yy[:, None],
keep_prob_ : 1,
initial_state: val_state}
val_batch_acc, val_state = sess.run([accuracy, final_state], feed_dict=feed)
val_acc.append(val_batch_acc)
print("Epoch: {}/{}...".format(e+1, epochs),
"Batch: {}/{}...".format(ii+1, n_batches),
"Train Loss: {:.3f}...".format(loss_),
"Train Accruacy: {:.3f}...".format(np.mean(train_acc)),
"Val Accuracy: {:.3f}".format(np.mean(val_acc)))
#predictions = predictions.eval(feed_dict = {inputs_:train_x})
saver.save(sess, "checkpoints/sentiment.ckpt")
#return lstm_outputs
return lstm_outputs_, pred
def test_network(model_dir, batch_size, test_x, test_y):
inputs_, labels_, keep_prob_ = model_inputs()
embed = build_embedding_layer(inputs_, vocab_size, embed_size)
initial_state, lstm_outputs, lstm_cell, final_state = build_lstm_layers(lstm_sizes, embed, keep_prob_, batch_size)
predictions, loss, optimizer = build_cost_fn_and_opt(lstm_outputs, labels_, learning_rate)
accuracy = build_accuracy(predictions, labels_)
saver = tf.train.Saver()
test_acc = []
with tf.Session() as sess:
saver.restore(sess, tf.train.latest_checkpoint(model_dir))
test_state = sess.run(lstm_cell.zero_state(batch_size, tf.float32))
feed = {inputs_: test_x,
labels_: test_y[:, None],
keep_prob_: 1,
initial_state: test_state}
batch_acc, test_state,pred = sess.run([accuracy, final_state, predictions], feed_dict=feed)
test_acc.append(batch_acc)
print("Test Accuracy: {:.3f}".format(np.mean(test_acc)))
return pred
###################
#RUN IT
###################
# Define Inputs and Hyperparameters
#lstm_sizes = [250, 125]
lstm_sizes = [128, 64]
vocab_size = len(vocab_to_int) + 1 #add one for padding
embed_size = 10
epochs = 1
batch_size = 200
learning_rate = 0.01
keep_prob = 0.25
with tf.Graph().as_default():
lstm_outputs, pred = build_and_train_network(lstm_sizes, vocab_size, embed_size, epochs, batch_size,
learning_rate, keep_prob, train_x, val_x, train_y, val_y)
print(lstm_outputs)
with tf.Graph().as_default():
pred = test_network('checkpoints', 315, test_x, test_y)