Question

我在尝试弄清楚如何为basic_seq2seq模型创建一个损失函数时遇到了麻烦。我的输入是一个段落，输出是段落的节标题。

这是我目前的代码：

import tensorflow as tf
import numpy as np
import pickle
import sys

MAX_NUM_WORDS = 500000
MAX_PAR_LENGTH = 85
CONV_DIM = 128
SECTION_LENGTH = 45
EPOCHS = 100
num_paragraphs = 5200000
BATCH_SIZE = 20
SECTION_VOCAB_SIZE = 213884

weights_lstm = {'out': tf.Variable(tf.random_normal([BATCH_SIZE, 200, SECTION_VOCAB_SIZE]))}
biases_lstm = {'out': tf.Variable(tf.random_normal([BATCH_SIZE, SECTION_VOCAB_SIZE]))}

embedding_matrix = np.zeros((MAX_NUM_WORDS+1, 200))

def weight_variable(shape):
    initial = tf.truncated_normal(shape, stddev=0.1)
    return tf.Variable(initial)

def bias_variable(shape):
    initial = tf.constant(0.1, shape=shape)
    return tf.Variable(initial)

def conv1d(x, W):
    return tf.nn.conv1d(x, W, stride=1, padding='SAME')

def max_pool_1d(x):
    return tf.layers.max_pooling1d(x, pool_size=2, strides=2, padding='same')

def batch_norm(x):
    return tf.layers.batch_normalization(x)

def model(x, y):

    input = x
    with tf.device('/cpu:0'):
        input = tf.nn.embedding_lookup(W_e, x)
        output_y = tf.nn.embedding_lookup(W_e, y)

    #encoder
    bn1 = batch_norm(input)
    an1 = tf.nn.relu(bn1)
    drop1 = tf.layers.dropout(an1, 0.2)

    W_conv1 = weight_variable([3, 200, CONV_DIM])
    b_conv1 = bias_variable([CONV_DIM])
    h_conv1 = tf.nn.relu(conv1d(drop1, W_conv1) + b_conv1)

    bn2 = batch_norm(h_conv1)
    an2 = tf.nn.relu(bn2)

    W_conv2 = weight_variable([3, CONV_DIM, CONV_DIM/2])
    b_conv2 = bias_variable([CONV_DIM/2])
    h_conv2 = tf.nn.relu(conv1d(an2, W_conv2) + b_conv2)

    bn3 = batch_norm(h_conv2)
    an3 = tf.nn.relu(bn3)

    W_conv3 = weight_variable([3, CONV_DIM/2, CONV_DIM/4])
    b_conv3 = bias_variable([CONV_DIM/4])
    h_conv3 = tf.nn.relu(conv1d(an3, W_conv3) + b_conv3)

    mp1 = max_pool_1d(h_conv3)

    enc = tf.unstack(mp1, axis=1)
    dec = tf.unstack(output_y, axis=1)
    lstm_cell = tf.contrib.rnn.LSTMCell(200, forget_bias=1.0, activation=tf.nn.softmax)
    outputs, states = tf.contrib.legacy_seq2seq.basic_rnn_seq2seq(enc, dec, lstm_cell)

    projected_outputs = []
    with tf.device('/cpu:0'):
        for output in outputs:
            projected_output = (weights_lstm['out'] * output) + biases_lstm['out']
            projected_outputs.append(projected_output)

    stacked_outputs = tf.stack(projected_outputs, 1) # [? x 45 x V]
    print(stacked_outputs)
    weights = tf.ones_like(y, dtype=tf.float32)

    loss = tf.contrib.seq2seq.sequence_loss(logits = stacked_outputs, targets = y, weights = weights, name = 'loss')



    # gold_outputs = tf.unstack(output_y, axis=1)


    #cross_entropy = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels=output_y, logits=outputs))

    # output_y = [? x 45 x 200]
    # outputs = 45 tensors of [? x 200]
    # stacked_outputs = tf.stack(outputs, 1) # [? x 45 x 200]
    # correct_prediction = tf.equal(tf.argmax(stacked_outputs, 1), tf.argmax(output_y, 1))
    # accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))

    return outputs, loss #cross_entropy

#print('Loading Embeddings...')
#with open('embeddings.txt', 'rb') as f:
#   embedding_matrix = pickle.load(f)

print('Creating Placeholders...')
X = tf.placeholder(tf.int32, [None, MAX_PAR_LENGTH])
Y = tf.placeholder(tf.int32, [None, SECTION_LENGTH])

with tf.device('/cpu:0'):
    W_e = tf.Variable(embedding_matrix, dtype=tf.float32, trainable=False)

print('Creating Model...')  
preds, loss = model(X, Y)

print('Creating Training Parameters...')
train_step = tf.train.RMSPropOptimizer(1e-4).minimize(loss)

saver = tf.train.Saver()

print('Starting Session...')
with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())

    for i in range(EPOCHS):
        print('Epoch ' + str(i))
        print('Number of batches ', str(num_paragraphs/BATCH_SIZE))
        with open('section_train_data_final.txt', 'rb') as f:
            for j in range(num_paragraphs/BATCH_SIZE):
                #load data
                paragraphs = []
                for k in range(BATCH_SIZE):
                    paragraphs.append(pickle.load(f))
                x = np.array([ p for p,s in paragraphs ])
                #y = np.array([ sess.run(tf.one_hot(s, depth=SECTION_VOCAB_SIZE, on_value=1.0, off_value=0.0)) for p,s in paragraphs ])
                y = np.array([ s for p,s in paragraphs ])

                _, step_loss = sess.run([train_step, loss], feed_dict={X:x, Y: y})

                if j % 100 == 0 and j != 0:
                    # train_acc = sess.run(accuracy, feed_dict={X: x, Y: y})
                    print('Epoch %d: Batch %d: Loss: %g' % (i, j, step_loss))
                    saver.save(sess, '~\data\generation_model')

有关如何创建此丢失函数的任何帮助都会有所帮助。我对tensorflow很新，所以我尝试了一个简单的丢失函数，这个函数已被注释掉

cross_entropy = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels=output_y, logits=outputs))

但它没有奏效，因为损失总是出现在0.我的朋友试图创造一个目前正在存在的损失函数，但我不知道他想要做什么。

Seq2Seq损失功能帮助Tensorflow

0 个答案: