我写了一个简单的代码来做像word2vec这样的事情,但是在训练时,我看到交叉熵损失在每个时期开始增加,然后减少。请帮我看看我的代码中是否有任何错误,我已经多次审核过......
from tensorflow.python import debug as tf_debug
import math
import os
import time
import random
import numpy as np
import tensorflow as tf
def gen_next_block(filenames,epochs):
for epoch in range(epochs):
for filename in filenames:
with open(filename) as f:
start = time.time()
line_cnt = 0
data = []
for line in f:
record = line.strip().split(',')
record = [int(record[0]), int(record[1]), float(record[2])] + [int(item) for item in record[3].split(';')]
record = record[:3] + [record[3 + epoch],]
data.append(record)
line_cnt += 1
if line_cnt % 4096000 == 0:
end = time.time()
elapsed_time = (end - start) * 1000
print("load block data: epoch %d, filename %s line_cnt %d, size %d, elapsed time %f ms" % (epoch, filename, line_cnt, len(data), elapsed_time))
random.shuffle(data)
yield data
data = []
start = time.time()
if len(data) > 0:
end = time.time()
elapsed_time = (end - start) * 1000
print("load block data: epoch %d, filename %s line_cnt %d, size %d, elapsed time %f ms" % (epoch, filename, line_cnt, len(data), elapsed_time))
random.shuffle(data)
yield data
data = None
next_block_generator = None
data_index = 0
last_time_data_index = 0
def generate_batch(filenames, epochs, batch_size):
global data
global data_index
global last_time_data_index
global next_block_generator
if next_block_generator is None:
next_block_generator = gen_next_block(filenames,epochs)
if data_index <= last_time_data_index:
data = next(next_block_generator,None)
data_index = 0
last_time_data_index = 0
if data is not None:
last_time_data_index = data_index
batch = np.ndarray(shape=(batch_size), dtype=np.int32)
labels = np.ndarray(shape=(batch_size), dtype=np.int32)
negative_labels = np.ndarray(shape=(batch_size), dtype=np.int32)
weights = np.ndarray(shape=(batch_size), dtype=np.float32)
negative_weights = np.ones(shape=(batch_size), dtype=np.float32)
for i in range(batch_size):
batch[i] = data[data_index][0]
labels[i] = data[data_index][1]
weights[i] = data[data_index][2]
negative_labels[i] = data[data_index][3]
data_index = (data_index + 1) % len(data)
return batch, labels, negative_labels, weights, negative_weights
else:
raise Exception("finish load file list [%s] %d times" % (','.join(filenames),epochs))
filename = 'data/dr_xianyu_item2vec_train_with_meta_20170725_dir/dr_xianyu_item2vec_train_with_meta_20170725_dir_'
filenames = [filename + str(i) for i in range(10)]
epochs = 5
batch_size = 2048
embedding_size = 32 # Dimension of the embedding vector.
num_sampled = batch_size # Number of negative examples to sample.
vocabulary_size = 7483025 + 1
graph = tf.Graph()
with graph.as_default():
with tf.device('/cpu:0'):
with tf.name_scope('input_data'):
train_inputs = tf.placeholder(tf.int32, shape=[batch_size], name = 'context_placeholder')
positive_labels = tf.placeholder(tf.int32, shape=[batch_size], name = 'target_placeholder')
negative_labels = tf.placeholder(tf.int32, shape=[num_sampled], name = 'negative_target_placeholder')
positive_weights = tf.placeholder(tf.float32, shape=([batch_size]), name = 'target_weight')
negative_weights = tf.placeholder(tf.float32, shape=([num_sampled]), name = 'negative_target_weight')
with tf.name_scope('emb_layer'):
embeddings = tf.Variable(
tf.random_uniform([vocabulary_size, embedding_size], -0.5/embedding_size, 0.5/embedding_size), name = 'emb')
embed = tf.nn.embedding_lookup(embeddings, train_inputs)
with tf.name_scope("neg_layer"):
nce_weights = tf.Variable(tf.random_uniform([vocabulary_size, embedding_size], -0.5/embedding_size, 0.5/embedding_size), name = 'nce_weight')
nce_biases = tf.Variable(tf.zeros([vocabulary_size]), name = 'nce_biase')
positive_embed = tf.nn.embedding_lookup(nce_weights,positive_labels)
positive_bias = tf.nn.embedding_lookup(nce_biases,positive_labels)
negative_embed = tf.nn.embedding_lookup(nce_weights,negative_labels)
negative_bias = tf.nn.embedding_lookup(nce_biases,negative_labels)
positive_logits = tf.reduce_sum(tf.multiply(embed,positive_embed),1) + positive_bias
negative_logits = tf.reduce_sum(tf.multiply(embed,negative_embed),1) + negative_bias
with tf.name_scope('loss_layer'):
positive_xent = tf.nn.sigmoid_cross_entropy_with_logits(labels = tf.ones_like(positive_logits), logits = positive_logits)
negative_xent = tf.nn.sigmoid_cross_entropy_with_logits(labels = tf.zeros_like(negative_logits), logits = negative_logits)
weighted_positive_logits = tf.multiply(positive_logits,positive_weights)
weighted_negative_logits = tf.multiply(negative_logits,negative_weights)
loss = (tf.reduce_sum(positive_xent) + tf.reduce_sum(negative_xent)) /(batch_size*2)
with tf.name_scope('train'):
optimizer = tf.train.RMSPropOptimizer(0.001).minimize(loss)
# global_step = tf.Variable(0, trainable=False)
# starter_learning_rate = 0.1
# learning_rate = tf.train.exponential_decay(starter_learning_rate, global_step, 20000, 0.8, staircase=True)
# optimizer = tf.train.GradientDescentOptimizer(learning_rate).minimize(loss,global_step=global_step)
init = tf.global_variables_initializer()
init_local = tf.local_variables_initializer()
tf.summary.scalar('loss_layer/loss', loss)
for var in tf.trainable_variables():
tf.summary.histogram(var.op.name, var)
summary_op = tf.summary.merge_all()
saver = tf.train.Saver()
with tf.Session(graph=graph) as sess:
# sess = tf_debug.LocalCLIDebugWrapperSession(sess)
# sess.add_tensor_filter("has_inf_or_nan", tf_debug.has_inf_or_nan)
for v in tf.global_variables():
print(v.name,v.device,v.shape)
for v in tf.local_variables():
print(v.name,v.device,v.shape)
# if os.path.exists('tmp/model.ckpt.meta'):
# saver = tf.train.import_meta_graph('tmp/model.ckpt.meta')
# saver.restore(sess,tf.train.latest_checkpoint('tmp/'))
# print("model restored")
# else:
if True:
init.run()
init_local.run()
print("model init")
summary_writer = tf.summary.FileWriter('tmp/log', sess.graph)
average_loss = 0
start = time.time()
step = 1
try:
while True:
batch_inputs, batch_labels, batch_negative_labels, positive_weights_np, negative_weights_np = generate_batch(filenames, epochs,batch_size)
feed_dict = {train_inputs: batch_inputs, positive_labels: batch_labels, negative_labels: batch_negative_labels, positive_weights:positive_weights_np, negative_weights:negative_weights_np}
if step%1000 == 0:
loss_val,summary_str,_ = sess.run([loss, summary_op, optimizer], feed_dict=feed_dict)
summary_writer.add_summary(summary_str,step)
else:
loss_val,_ = sess.run([loss, optimizer], feed_dict=feed_dict)
average_loss += loss_val
if step % 1000 == 0:
average_loss /= 1000
end = time.time()
elapsed_time = (end - start)*1000 / 1000
print('Average loss at step ', step, ': ', average_loss, 'time cost', elapsed_time, 'ms')
average_loss = 0
start = time.time()
if step % 20000 == 0:
print('save model...')
save_path = saver.save(sess,'tmp/model.ckpt')
print("saved model in",save_path)
step +=1
except Exception,e:
print e
print("total batch count %d" % step)
summary_writer.flush()
有我的损失 第一张图是在5个时期生成的sgd 第二张图是在2个时期生成的RMSProp(仍在运行)