LSTM模型损坏将支付

时间:2018-12-19 01:21:17

标签: lstm

问题:这是我第一次构建多类LSTM预测模型。我试图获取每个类的预测概率,以输入到损失函数和随后的优化器中。有20个类,所以我相信我需要一个包含20列的输出层,其行总和为1?现在,我得到20列,其行总计为20。我认为它是下面的预测代码,或者是下面的损失函数。修复它,我将赚5美元。

  predictions = tf.contrib.layers.fully_connected(lstm_outputs[:, -1], 20, activation_fn=tf.sigmoid)
loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=predictions, labels=labels_))

下面的代码...

def model_inputs():
    """
    Create the model inputs
    """
    inputs_ = tf.placeholder(tf.int32, [None, None], name='inputs')
    labels_ = tf.placeholder(tf.int32, [None, None], name='labels')
    keep_prob_ = tf.placeholder(tf.float32, name='keep_prob')

    return inputs_, labels_, keep_prob_


def build_embedding_layer(inputs_, vocab_size, embed_size):
    """
    Create the embedding layer
    """
    embedding = tf.Variable(tf.random_uniform((vocab_size, embed_size), -1, 1))
    embed = tf.nn.embedding_lookup(embedding, inputs_)

    return embed


def build_lstm_layers(lstm_sizes, embed, keep_prob_, batch_size):

    """
    Create the LSTM layers
    """

    lstms = [tf.contrib.rnn.BasicLSTMCell(size) for size in lstm_sizes]
    # Add dropout to the cell
    drops = [tf.contrib.rnn.DropoutWrapper(lstm, output_keep_prob=keep_prob_) for lstm in lstms]
    # Stack up multiple LSTM layers, for deep learning
    cell = tf.contrib.rnn.MultiRNNCell(drops)
    # Getting an initial state of all zeros
    initial_state = cell.zero_state(batch_size, tf.float32)

    lstm_outputs, final_state = tf.nn.dynamic_rnn(cell, embed, initial_state=initial_state)

    return initial_state, lstm_outputs, cell, final_state

def build_cost_fn_and_opt(lstm_outputs, labels_, learning_rate):    
    """
    Create the Loss function and Optimizer
    """

    predictions = tf.contrib.layers.fully_connected(lstm_outputs[:, -1], 20, activation_fn=tf.sigmoid)
    loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=predictions, labels=labels_))
    #loss = tf.losses.softmax_cross_entropy(labels_, predictions)
    ##loss = tf.nn.sigmoid_cross_entropy_with_logits(labels=labels_, logits=predictions)
    optimizer = tf.train.RMSPropOptimizer(learning_rate=learning_rate).minimize(loss) 
    #optimizer = tf.train.AdadeltaOptimizer(learning_rate).minimize(loss)
    ##optimizer = tf.train.AdamOptimizer(learning_rate).minimize(loss)

    return predictions, loss, optimizer

def build_accuracy(predictions, labels_):

    """
    Create accuracy
    """

    correct_pred = tf.equal(tf.cast(tf.round(predictions), tf.int32), labels_)
    accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))

    return accuracy

def build_and_train_network(lstm_sizes, vocab_size, embed_size, epochs, batch_size,
                            learning_rate, keep_prob, train_x, val_x, train_y, val_y):

    inputs_, labels_, keep_prob_ = model_inputs()
    embed = build_embedding_layer(inputs_, vocab_size, embed_size)
    initial_state, lstm_outputs, lstm_cell, final_state = build_lstm_layers(lstm_sizes, embed, keep_prob_, batch_size)
    predictions, loss, optimizer = build_cost_fn_and_opt(lstm_outputs, labels_, learning_rate)
    accuracy = build_accuracy(predictions, labels_)

    saver = tf.train.Saver()

    with tf.Session() as sess:

        sess.run(tf.global_variables_initializer())
        n_batches = len(train_x)//batch_size
        for e in range(epochs):
            state = sess.run(initial_state)

            train_acc = []
            for ii, (x, y) in enumerate(utl.get_batches(train_x, train_y, batch_size), 1):
                feed = {inputs_: x,
                        labels_: y[:, None],
                        keep_prob_: keep_prob,
                        initial_state: state}
                loss_, state, _,  batch_acc, pred, lstm_outputs_ = sess.run([loss, final_state, optimizer, accuracy, predictions,lstm_outputs], feed_dict=feed)
                train_acc.append(batch_acc)

                if (ii + 1) % n_batches == 0:

                    val_acc = []
                    val_state = sess.run(lstm_cell.zero_state(batch_size, tf.float32))
                    for xx, yy in utl.get_batches(val_x, val_y, batch_size):
                        feed = {inputs_ : xx,
                                labels_ : yy[:, None],
                                keep_prob_ : 1,
                                initial_state: val_state}
                        val_batch_acc, val_state = sess.run([accuracy, final_state], feed_dict=feed)
                        val_acc.append(val_batch_acc)


                    print("Epoch: {}/{}...".format(e+1, epochs),
                          "Batch: {}/{}...".format(ii+1, n_batches),
                          "Train Loss: {:.3f}...".format(loss_),
                          "Train Accruacy: {:.3f}...".format(np.mean(train_acc)),
                          "Val Accuracy: {:.3f}".format(np.mean(val_acc)))

        #predictions = predictions.eval(feed_dict = {inputs_:train_x})


        saver.save(sess, "checkpoints/sentiment.ckpt")
        #return lstm_outputs
        return lstm_outputs_, pred



def test_network(model_dir, batch_size, test_x, test_y):

    inputs_, labels_, keep_prob_ = model_inputs()
    embed = build_embedding_layer(inputs_, vocab_size, embed_size)
    initial_state, lstm_outputs, lstm_cell, final_state = build_lstm_layers(lstm_sizes, embed, keep_prob_, batch_size)
    predictions, loss, optimizer = build_cost_fn_and_opt(lstm_outputs, labels_, learning_rate)
    accuracy = build_accuracy(predictions, labels_)

    saver = tf.train.Saver()

    test_acc = []
    with tf.Session() as sess:
        saver.restore(sess, tf.train.latest_checkpoint(model_dir))
        test_state = sess.run(lstm_cell.zero_state(batch_size, tf.float32))
        feed = {inputs_: test_x,
                labels_: test_y[:, None],
                keep_prob_: 1,
                initial_state: test_state}
        batch_acc, test_state,pred = sess.run([accuracy, final_state, predictions], feed_dict=feed)
        test_acc.append(batch_acc)
        print("Test Accuracy: {:.3f}".format(np.mean(test_acc)))
        return pred 

###################
#RUN IT
###################

# Define Inputs and Hyperparameters
#lstm_sizes = [250, 125]
lstm_sizes = [128, 64]
vocab_size = len(vocab_to_int) + 1 #add one for padding
embed_size = 10
epochs = 1
batch_size = 200
learning_rate = 0.01
keep_prob = 0.25

with tf.Graph().as_default():
    lstm_outputs, pred = build_and_train_network(lstm_sizes, vocab_size, embed_size, epochs, batch_size,
                            learning_rate, keep_prob, train_x, val_x, train_y, val_y)
    print(lstm_outputs)
with tf.Graph().as_default():
    pred = test_network('checkpoints', 315, test_x, test_y)   

0 个答案:

没有答案