Tensorflow Word嵌入在GPU上运行速度太慢

时间:2017-12-01 12:43:36

标签: python tensorflow word-embedding

编辑:我现在可以提供一些基本的运行时间:

next epoch:      0.5456549558716741 ('time point 0')
next epoch:    892.5030143482156    (~14:30min)
next epoch:   1757.6139726727963    (~14:30min)
Time elapsed: 2622.23 seconds       (~14min, total of 44min)

与实际语料库大小13k相比,这些时间vocab_size只有5000 {...}

我正在Tensorflow中处理一个单词嵌入任务,它与King James圣经作为语料库一起工作,所以它有一个相当大的13000个独特单词。

在我的机器上(GTX 970),我甚至没有打印到达另一个时代,而在我朋友的CPU上,这个过程在几分钟内完成。

有人可以对这个谜团有所了解吗?

import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
from book_helper import Book
from collections import Counter
from math import sqrt
import random as rnd
import time

%matplotlib notebook

book = Book('./pg10.txt')
vocab_size = len(Counter(book._book_text)) # 13079
book.create_dictionaries(vocab_size)

samples = [x for x in rnd.sample(book._book_text, 10) if x.isalpha()]
print('Sample words:\t' + ', '.join(samples))
print('Words2ids:\t' + ', '.join([str(book.words2ids(samples))]))
print('Ids2words:\t' +  ', '.join(book.ids2words(book.words2ids(samples))))

# hyperparameters
batch_size = 128
epochs = 3
lrate = 1
embedding_size = 64
skip_window = 2
noise_samples = 64

X = tf.placeholder(tf.int32, [batch_size], "Input_Placeholder")
desired = tf.placeholder(tf.int32, [batch_size, 1], "DesiredOutput_Placeholder")

with tf.variable_scope("embeddings"):
    # create word embedding
    ru_init = tf.random_uniform_initializer(-1.0, 1.0)
    embeddings = tf.get_variable("embedding", 
                                 [vocab_size, embedding_size],
                                 initializer=ru_init)
    # retrieve word ids from embedding
    embed = tf.nn.embedding_lookup(embeddings, X)

with tf.variable_scope("output_layer"):
    weights1 = tf.get_variable("weights", 
                               [vocab_size, embedding_size], 
                               initializer=tf.truncated_normal_initializer(1 / sqrt(embedding_size)))
    biases1 = tf.get_variable("bias", initializer=tf.zeros([vocab_size]))

drive = tf.matmul(embed, tf.transpose(weights1)) + biases1
context = tf.nn.softmax(drive)
tf.summary.histogram("drive", drive)

with tf.variable_scope("nce_loss"):
    nce_weights = tf.get_variable("weights",
                                  [vocab_size, embedding_size],
                                  initializer=tf.truncated_normal_initializer(1 / sqrt(embedding_size)))
    nce_bias = tf.get_variable("bias", initializer=tf.zeros([vocab_size]))

nce_loss = tf.reduce_mean(tf.nn.nce_loss(weights= nce_weights,
                                        biases= nce_bias,
                                        labels= desired,
                                        inputs= embed,
                                        num_sampled= noise_samples,
                                        num_classes= vocab_size))

tf.summary.scalar("nce_loss", nce_loss)

training_step = tf.train.GradientDescentOptimizer(lrate).minimize(nce_loss)

merged_summaries = tf.summary.merge_all()

train_writer = tf.summary.FileWriter("./summaries/train", tf.get_default_graph())

with tf.Session() as session:
    start_time = time.clock()
    step = 0
    session.run(tf.global_variables_initializer())

    for _epoch in range(epochs):
        print("next epoch: " + str(time.clock() - start_time))
        for targets, concepts in book.get_training_batch(batch_size, skip_window):
            summaries, _ = session.run([merged_summaries, training_step], feed_dict = {X: targets, desired: concepts})
            train_writer.add_summary(summaries, step)
            step += 1

    duration = time.clock() - start_time
    print("Time elapsed: {0:.2f} seconds".format(duration))

0 个答案:

没有答案