我在尝试弄清楚如何为basic_seq2seq模型创建一个损失函数时遇到了麻烦。 我的输入是一个段落,输出是段落的节标题。
这是我目前的代码:
import tensorflow as tf
import numpy as np
import pickle
import sys
MAX_NUM_WORDS = 500000
MAX_PAR_LENGTH = 85
CONV_DIM = 128
SECTION_LENGTH = 45
EPOCHS = 100
num_paragraphs = 5200000
BATCH_SIZE = 20
SECTION_VOCAB_SIZE = 213884
weights_lstm = {'out': tf.Variable(tf.random_normal([BATCH_SIZE, 200, SECTION_VOCAB_SIZE]))}
biases_lstm = {'out': tf.Variable(tf.random_normal([BATCH_SIZE, SECTION_VOCAB_SIZE]))}
embedding_matrix = np.zeros((MAX_NUM_WORDS+1, 200))
def weight_variable(shape):
initial = tf.truncated_normal(shape, stddev=0.1)
return tf.Variable(initial)
def bias_variable(shape):
initial = tf.constant(0.1, shape=shape)
return tf.Variable(initial)
def conv1d(x, W):
return tf.nn.conv1d(x, W, stride=1, padding='SAME')
def max_pool_1d(x):
return tf.layers.max_pooling1d(x, pool_size=2, strides=2, padding='same')
def batch_norm(x):
return tf.layers.batch_normalization(x)
def model(x, y):
input = x
with tf.device('/cpu:0'):
input = tf.nn.embedding_lookup(W_e, x)
output_y = tf.nn.embedding_lookup(W_e, y)
#encoder
bn1 = batch_norm(input)
an1 = tf.nn.relu(bn1)
drop1 = tf.layers.dropout(an1, 0.2)
W_conv1 = weight_variable([3, 200, CONV_DIM])
b_conv1 = bias_variable([CONV_DIM])
h_conv1 = tf.nn.relu(conv1d(drop1, W_conv1) + b_conv1)
bn2 = batch_norm(h_conv1)
an2 = tf.nn.relu(bn2)
W_conv2 = weight_variable([3, CONV_DIM, CONV_DIM/2])
b_conv2 = bias_variable([CONV_DIM/2])
h_conv2 = tf.nn.relu(conv1d(an2, W_conv2) + b_conv2)
bn3 = batch_norm(h_conv2)
an3 = tf.nn.relu(bn3)
W_conv3 = weight_variable([3, CONV_DIM/2, CONV_DIM/4])
b_conv3 = bias_variable([CONV_DIM/4])
h_conv3 = tf.nn.relu(conv1d(an3, W_conv3) + b_conv3)
mp1 = max_pool_1d(h_conv3)
enc = tf.unstack(mp1, axis=1)
dec = tf.unstack(output_y, axis=1)
lstm_cell = tf.contrib.rnn.LSTMCell(200, forget_bias=1.0, activation=tf.nn.softmax)
outputs, states = tf.contrib.legacy_seq2seq.basic_rnn_seq2seq(enc, dec, lstm_cell)
projected_outputs = []
with tf.device('/cpu:0'):
for output in outputs:
projected_output = (weights_lstm['out'] * output) + biases_lstm['out']
projected_outputs.append(projected_output)
stacked_outputs = tf.stack(projected_outputs, 1) # [? x 45 x V]
print(stacked_outputs)
weights = tf.ones_like(y, dtype=tf.float32)
loss = tf.contrib.seq2seq.sequence_loss(logits = stacked_outputs, targets = y, weights = weights, name = 'loss')
# gold_outputs = tf.unstack(output_y, axis=1)
#cross_entropy = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels=output_y, logits=outputs))
# output_y = [? x 45 x 200]
# outputs = 45 tensors of [? x 200]
# stacked_outputs = tf.stack(outputs, 1) # [? x 45 x 200]
# correct_prediction = tf.equal(tf.argmax(stacked_outputs, 1), tf.argmax(output_y, 1))
# accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
return outputs, loss #cross_entropy
#print('Loading Embeddings...')
#with open('embeddings.txt', 'rb') as f:
# embedding_matrix = pickle.load(f)
print('Creating Placeholders...')
X = tf.placeholder(tf.int32, [None, MAX_PAR_LENGTH])
Y = tf.placeholder(tf.int32, [None, SECTION_LENGTH])
with tf.device('/cpu:0'):
W_e = tf.Variable(embedding_matrix, dtype=tf.float32, trainable=False)
print('Creating Model...')
preds, loss = model(X, Y)
print('Creating Training Parameters...')
train_step = tf.train.RMSPropOptimizer(1e-4).minimize(loss)
saver = tf.train.Saver()
print('Starting Session...')
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
for i in range(EPOCHS):
print('Epoch ' + str(i))
print('Number of batches ', str(num_paragraphs/BATCH_SIZE))
with open('section_train_data_final.txt', 'rb') as f:
for j in range(num_paragraphs/BATCH_SIZE):
#load data
paragraphs = []
for k in range(BATCH_SIZE):
paragraphs.append(pickle.load(f))
x = np.array([ p for p,s in paragraphs ])
#y = np.array([ sess.run(tf.one_hot(s, depth=SECTION_VOCAB_SIZE, on_value=1.0, off_value=0.0)) for p,s in paragraphs ])
y = np.array([ s for p,s in paragraphs ])
_, step_loss = sess.run([train_step, loss], feed_dict={X:x, Y: y})
if j % 100 == 0 and j != 0:
# train_acc = sess.run(accuracy, feed_dict={X: x, Y: y})
print('Epoch %d: Batch %d: Loss: %g' % (i, j, step_loss))
saver.save(sess, '~\data\generation_model')
有关如何创建此丢失函数的任何帮助都会有所帮助。 我对tensorflow很新,所以我尝试了一个简单的丢失函数,这个函数已被注释掉
cross_entropy = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels=output_y, logits=outputs))
但它没有奏效,因为损失总是出现在0.我的朋友试图创造一个目前正在存在的损失函数,但我不知道他想要做什么。