使用张量流三角嵌入,每个时期开始时损失增加,然后减少

时间:2017-08-02 02:37:12

标签: tensorflow embedding word2vec loss

我写了一个简单的代码来做像word2vec这样的事情,但是在训练时,我看到交叉熵损失在每个时期开始增加,然后减少。请帮我看看我的代码中是否有任何错误,我已经多次审核过......

from tensorflow.python import debug as tf_debug
import math
import os
import time
import random
import numpy as np
import tensorflow as tf
def gen_next_block(filenames,epochs):
    for epoch in range(epochs):
        for filename in filenames:
            with open(filename) as f:
                start = time.time()
                line_cnt = 0
                data = []
                for line in f:
                    record = line.strip().split(',')
                    record = [int(record[0]), int(record[1]), float(record[2])] + [int(item) for item in record[3].split(';')]
                    record = record[:3] + [record[3 + epoch],]
                    data.append(record)
                    line_cnt += 1
                    if line_cnt % 4096000 == 0:
                        end = time.time()
                        elapsed_time = (end - start) * 1000
                        print("load block data: epoch %d, filename %s line_cnt %d, size %d, elapsed time %f ms" % (epoch, filename, line_cnt, len(data), elapsed_time))
                        random.shuffle(data)
                        yield data
                        data = []
                        start = time.time()
                if len(data) > 0:
                    end = time.time()
                    elapsed_time = (end - start) * 1000
                    print("load block data: epoch %d, filename %s line_cnt %d, size %d, elapsed time %f ms" % (epoch, filename, line_cnt, len(data), elapsed_time))
                    random.shuffle(data)
                    yield data
data = None
next_block_generator = None
data_index = 0
last_time_data_index = 0
def generate_batch(filenames, epochs, batch_size):
    global data
    global data_index
    global last_time_data_index
    global next_block_generator
    if next_block_generator is None:
        next_block_generator = gen_next_block(filenames,epochs)
    if data_index <= last_time_data_index:
       data = next(next_block_generator,None) 
       data_index = 0
       last_time_data_index = 0
    if data is not None:
        last_time_data_index = data_index
        batch = np.ndarray(shape=(batch_size), dtype=np.int32)
        labels = np.ndarray(shape=(batch_size), dtype=np.int32)
        negative_labels = np.ndarray(shape=(batch_size), dtype=np.int32) 
        weights = np.ndarray(shape=(batch_size), dtype=np.float32)
        negative_weights = np.ones(shape=(batch_size), dtype=np.float32)
        for i in range(batch_size):
            batch[i] = data[data_index][0]
            labels[i] = data[data_index][1]
            weights[i] = data[data_index][2]
            negative_labels[i] = data[data_index][3]
            data_index = (data_index + 1) % len(data)
        return batch, labels, negative_labels, weights, negative_weights
    else:
        raise Exception("finish load file list [%s] %d times" % (','.join(filenames),epochs))
filename = 'data/dr_xianyu_item2vec_train_with_meta_20170725_dir/dr_xianyu_item2vec_train_with_meta_20170725_dir_' 
filenames = [filename + str(i) for i in range(10)]

epochs = 5 
batch_size = 2048 
embedding_size = 32    # Dimension of the embedding vector.
num_sampled = batch_size        # Number of negative examples to sample.
vocabulary_size = 7483025 + 1


graph = tf.Graph()

with graph.as_default():
    with tf.device('/cpu:0'):
        with tf.name_scope('input_data'):
            train_inputs = tf.placeholder(tf.int32, shape=[batch_size], name = 'context_placeholder')
            positive_labels = tf.placeholder(tf.int32, shape=[batch_size], name = 'target_placeholder')
            negative_labels = tf.placeholder(tf.int32, shape=[num_sampled], name = 'negative_target_placeholder')
            positive_weights = tf.placeholder(tf.float32, shape=([batch_size]), name = 'target_weight')
            negative_weights = tf.placeholder(tf.float32, shape=([num_sampled]), name = 'negative_target_weight')
        with tf.name_scope('emb_layer'):
            embeddings = tf.Variable(
                    tf.random_uniform([vocabulary_size, embedding_size], -0.5/embedding_size, 0.5/embedding_size), name = 'emb')
            embed = tf.nn.embedding_lookup(embeddings, train_inputs)
        with tf.name_scope("neg_layer"):
            nce_weights = tf.Variable(tf.random_uniform([vocabulary_size, embedding_size], -0.5/embedding_size, 0.5/embedding_size), name = 'nce_weight')
            nce_biases = tf.Variable(tf.zeros([vocabulary_size]), name = 'nce_biase')
            positive_embed = tf.nn.embedding_lookup(nce_weights,positive_labels)
            positive_bias = tf.nn.embedding_lookup(nce_biases,positive_labels)
            negative_embed = tf.nn.embedding_lookup(nce_weights,negative_labels)
            negative_bias = tf.nn.embedding_lookup(nce_biases,negative_labels)

            positive_logits = tf.reduce_sum(tf.multiply(embed,positive_embed),1) + positive_bias
            negative_logits = tf.reduce_sum(tf.multiply(embed,negative_embed),1) + negative_bias
        with tf.name_scope('loss_layer'):
            positive_xent = tf.nn.sigmoid_cross_entropy_with_logits(labels = tf.ones_like(positive_logits), logits = positive_logits)
            negative_xent = tf.nn.sigmoid_cross_entropy_with_logits(labels = tf.zeros_like(negative_logits), logits = negative_logits)
            weighted_positive_logits = tf.multiply(positive_logits,positive_weights)
            weighted_negative_logits = tf.multiply(negative_logits,negative_weights)
            loss = (tf.reduce_sum(positive_xent) + tf.reduce_sum(negative_xent)) /(batch_size*2)

        with tf.name_scope('train'):
            optimizer = tf.train.RMSPropOptimizer(0.001).minimize(loss)
           # global_step = tf.Variable(0, trainable=False)
           # starter_learning_rate = 0.1
           # learning_rate = tf.train.exponential_decay(starter_learning_rate, global_step, 20000, 0.8, staircase=True)
           # optimizer = tf.train.GradientDescentOptimizer(learning_rate).minimize(loss,global_step=global_step)
        init = tf.global_variables_initializer()
        init_local = tf.local_variables_initializer()

        tf.summary.scalar('loss_layer/loss', loss)
        for var in tf.trainable_variables():
            tf.summary.histogram(var.op.name, var)

        summary_op = tf.summary.merge_all()
        saver = tf.train.Saver()
with tf.Session(graph=graph) as sess:
#    sess = tf_debug.LocalCLIDebugWrapperSession(sess)
#    sess.add_tensor_filter("has_inf_or_nan", tf_debug.has_inf_or_nan)

    for v in tf.global_variables():
        print(v.name,v.device,v.shape)
    for v in tf.local_variables():
        print(v.name,v.device,v.shape)
#    if os.path.exists('tmp/model.ckpt.meta'): 
#        saver = tf.train.import_meta_graph('tmp/model.ckpt.meta')
#        saver.restore(sess,tf.train.latest_checkpoint('tmp/'))
#        print("model restored")
#    else:
    if True:
        init.run()
        init_local.run()
        print("model init")
    summary_writer = tf.summary.FileWriter('tmp/log', sess.graph)
    average_loss = 0
    start = time.time()
    step = 1
    try:
        while True:
            batch_inputs, batch_labels, batch_negative_labels, positive_weights_np, negative_weights_np = generate_batch(filenames, epochs,batch_size)

            feed_dict = {train_inputs: batch_inputs, positive_labels: batch_labels, negative_labels: batch_negative_labels, positive_weights:positive_weights_np, negative_weights:negative_weights_np}
            if step%1000 == 0:
                loss_val,summary_str,_ = sess.run([loss, summary_op, optimizer], feed_dict=feed_dict)
                summary_writer.add_summary(summary_str,step)
            else:
                loss_val,_ = sess.run([loss, optimizer], feed_dict=feed_dict)

            average_loss += loss_val
            if step % 1000 == 0:
                average_loss /= 1000
                end = time.time()
                elapsed_time = (end - start)*1000 / 1000
                print('Average loss at step ', step, ': ', average_loss, 'time cost', elapsed_time, 'ms')
                average_loss = 0
                start = time.time()
            if step % 20000 == 0:
                print('save model...')
                save_path = saver.save(sess,'tmp/model.ckpt')
                print("saved model in",save_path)
            step +=1
    except Exception,e:
        print e
    print("total batch count %d" % step)
    summary_writer.flush()

有我的损失 第一张图是在5个时期生成的sgd 第二张图是在2个时期生成的RMSProp(仍在运行) enter image description here enter image description here

0 个答案:

没有答案