Tensorflow评估:中止(核心转储)

时间:2016-12-04 20:01:51

标签: python tensorflow

tl; dr:我在我的模型中输入一个单词,我应该得到一个类似单词的列表及其相关的相似度量。我收到一个错误:中止(核心转储)。

我的目标是根据输入词的特征向量确定哪些词与输入词相似。我的模特已经训练过了。我加载它并调用两个函数:

def main(argv=None):
    model = NVDM(args)
    sess_saver = tf.train.Saver()
    sess = tf.Session()
    init = tf.initialize_all_variables()
    sess.run(init)
    loaded = load_for_similar(sess, sess_saver) #my function
    wm = word_match(sess, loaded[0], loaded[1], "bottle", loaded[2], loaded[3], topN=5)

我的问题是我无法打印出类似的词和相关的相似度量。我试过(主要):

 sess.run(wm)
 wm[0].eval(session=sess)
 print(wm)

所有这些都给了我错误:

F tensorflow/core/kernels/strided_slice_op.cc:316] Check failed: tmp.CopyFrom(input.Slice(begin[0], end[0]), final_shape) 
Aborted (core dumped)

这告诉我我没有正确运行会话。我做错了什么?

有关功能的详细信息,以防万一:

功能' load_for_similar'恢复我的模型中的解码器的权重和偏差(变分自动编码器),并对它们进行归一化。它还会反转词汇表中的键和值的顺序以供以后使用:

def load_for_similar(sess, saver_obj):
    saver_obj.restore(sess, "./CA_checkpoints/saved_model.ckpt")
    vocab_file = '/path/to/vocab.pkl'
    t1 = loader_object(vocab_file)
    v1 = t1.get_vocab()
    v1_rev = {k:v for v, k in v1.iteritems()}
    decoder_mat = tf.get_collection(tf.GraphKeys.VARIABLES, scope='decoder')[0]
    decoder_bias = tf.get_collection(tf.GraphKeys.VARIABLES, scope='decoder')[1]
    return (find_norm(decoder_mat), find_norm(decoder_bias), v1, v1_rev)

为了找到相似的单词,我传递了规范化的权重矩阵并偏向一个新函数,以及我的单词(vec)的特征向量:

def find_similar(sess, Weights, vec, bias):
    dists = tf.add(tf.reduce_sum(tf.mul(Weights, vec)), bias) 
    best = argsort(sess, dists, reverse=True)
    dist_sort = tf.nn.top_k(dists, k=dists.get_shape().as_list()[0], sorted=True).values 
    return dist_sort, best

最后,我想匹配最接近我提供的单词的单词" bottle":

def word_match(sess, norm_mat , norm_bias, word_ , vocab, vocab_inverse , topN = 10):
    idx = vocab[word_]
    similarity_meas , indexes = find_similar(sess, norm_mat , norm_mat[idx], norm_bias)
    words = tf.gather(vocab_inverse.keys(), indexes[:topN])
    return (words, similarity_meas[:topN])

编辑:回应mrry的评论,这是模型(我希望这是你想要的?)。此代码依赖于utils.py,一个单独的实用程序文件。我也会把它包括在内。请注意,此代码主要基于Yishu Miao'sSarath Nair's

class NVDM(object):
    """ Neural Variational Document Model -- BOW VAE.
    """
    def __init__(self,
             vocab_size=15000, #was 2000
             n_hidden=500,
             n_topic=50,
             n_sample=1,
             learning_rate=1e-5,
             batch_size=100, #was 64
             non_linearity=tf.nn.tanh):
    self.vocab_size = vocab_size
    self.n_hidden = n_hidden
    self.n_topic = n_topic
    self.n_sample = n_sample
    self.non_linearity = non_linearity
    self.learning_rate = learning_rate/batch_size #CA
    self.batch_size = batch_size

    self.x = tf.placeholder(tf.float32, [None, vocab_size], name='input')
    self.mask = tf.placeholder(tf.float32, [None], name='mask')  # mask paddings

    # encoder
    with tf.variable_scope('encoder'):
      self.enc_vec = utils.mlp(self.x, [self.n_hidden, self.n_hidden])
      self.mean = utils.linear(self.enc_vec, self.n_topic, scope='mean')
      self.logsigm = utils.linear(self.enc_vec,
                                 self.n_topic,
                                 bias_start_zero=True,
                                 matrix_start_zero=False,
                                 scope='logsigm')
      self.kld = -0.5 * tf.reduce_sum(1 - tf.square(self.mean) + 2 * self.logsigm - tf.exp(2 * self.logsigm), 1)
      self.kld = self.mask*self.kld  # mask paddings

    with tf.variable_scope('decoder'):
      if self.n_sample ==1:  # single sample
        p1 = tf.cast(tf.reduce_sum(self.mask), tf.int32) #needed for random normal generation
        eps = tf.random_normal((p1, self.n_topic), 0, 1)
        doc_vec = tf.mul(tf.exp(self.logsigm), eps) + self.mean
        logits = tf.nn.log_softmax(utils.linear(doc_vec, self.vocab_size, scope='projection'))
        self.recons_loss = -tf.reduce_sum(tf.mul(logits, self.x), 1)
      # multiple samples
      else:
        eps = tf.random_normal((self.n_sample*batch_size, self.n_topic), 0, 1)
        eps_list = tf.split(0, self.n_sample, eps)
        recons_loss_list = []
        for i in xrange(self.n_sample):
          if i > 0: tf.get_variable_scope().reuse_variables()
          curr_eps = eps_list[i]
          doc_vec = tf.mul(tf.exp(self.logsigm), curr_eps) + self.mean
          logits = tf.nn.log_softmax(utils.linear(doc_vec, self.vocab_size, scope='projection'))
          recons_loss_list.append(-tf.reduce_sum(tf.mul(logits, self.x), 1))

        self.recons_loss = tf.add_n(recons_loss_list) / self.n_sample

    self.objective = self.recons_loss + self.kld

    optimizer = tf.train.AdamOptimizer(learning_rate=self.learning_rate)
    fullvars = tf.trainable_variables()

    enc_vars = utils.variable_parser(fullvars, 'encoder')
    dec_vars = utils.variable_parser(fullvars, 'decoder')

    enc_grads = tf.gradients(self.objective, enc_vars)
    dec_grads = tf.gradients(self.objective, dec_vars)

    self.optim_enc = optimizer.apply_gradients(zip(enc_grads, enc_vars))
    self.optim_dec = optimizer.apply_gradients(zip(dec_grads, dec_vars))


def minibatch_bow(it1, Instance1, n_samples, batch_size, used_ints = set()):
    available = set(np.arange(n_samples)) - used_ints #
    if len(available) < batch_size:
            indices = np.array(list(available))
    else:
            indices = np.random.choice(tuple(available), batch_size, replace=False)
    used = used_ints
    mb = itemgetter(*indices)(it1)
    batch_xs = Instance1._bag_of_words(mb, vocab_size=15000)
    batch_flattened = np.ravel(batch_xs)
    index_positions = np.where(batch_flattened > 0)[0]
    return (batch_xs, index_positions, set(indices)) #batch_xs[0] is the bag of words; batch_xs[1] is the 0/1 word used/not;

def train(sess, model, train_file, vocab_file, saver_obj, training_epochs, alternate_epochs, batch_size):
    Instance1 = testchunk_Nov23.testLoader(train_file, vocab_file)
    data_set = Instance1.get_batch(batch_size) #get all minibatches of size 100
    n_samples = Instance1.num_reviews()
    train_batches = list(data_set) #this is an itertools.chain object

    it1_train = list(itertools.chain(*train_batches)) #length is 732,356. This is all the reviews.atch_size

    if len(it1_train) % batch_size != 0:
            total_batch = int(len(it1_train)/batch_size) + 1
    else:
            total_batch = int(len(it1_train)/batch_size)

    trainfilesave = "train_ELBO_and_perplexity_Dec1.txt"

    #Training
    train_time = time.time()
    for epoch in range(training_epochs):
            for switch in xrange(0, 2):
                    if switch == 0:
                            optim = model.optim_dec
                            print_mode = 'updating decoder'
                    else:
                            optim = model.optim_enc
                            print_mode = 'updating encoder'
            with open(trainfilesave, 'w') as f:
                    for i in xrange(alternate_epochs):
                            loss_sum = 0.0
                            kld_sum = 0.0
                            word_count = 0
                            used_indices = set()
                            for idx_batch in range(total_batch): #train_batches:
                                    mb = minibatch_bow(it1_train, Instance1, n_samples, batch_size, used_ints=used_indices)
                                    print('minibatch', idx_batch)
                                    used_indices.update(mb[2])
                                    num_mb = np.ones(mb[0][0].shape[0])
                                    input_feed = {model.x.name: mb[0][0], model.mask: num_mb}
                                    _, (loss, kld) = sess.run((optim,[model.objective, model.kld]) , input_feed)
                                    loss_sum += np.sum(loss)

utils.py文件:

def linear(inputs,
       output_size,
       no_bias=False,
       bias_start_zero=False,
       matrix_start_zero=False,
       scope=None):
  """Define a linear connection."""
  with tf.variable_scope(scope or 'Linear'):
    if matrix_start_zero:
      matrix_initializer = tf.constant_initializer(0)
    else:
      matrix_initializer = None
    if bias_start_zero:
       bias_initializer = tf.constant_initializer(0)
    else:
       bias_initializer = None
    input_size = inputs.get_shape()[1].value

    matrix = tf.get_variable('Matrix', [input_size, output_size],
                         initializer=matrix_initializer)
    bias_term = tf.get_variable('Bias', [output_size],
                            initializer=bias_initializer)
    output = tf.matmul(inputs, matrix)
    if not no_bias:
        output = output + bias_term
    return output

def mlp(inputs,
        mlp_hidden=[],
        mlp_nonlinearity=tf.nn.tanh,
        scope=None):
  """Define an MLP."""
  with tf.variable_scope(scope or 'Linear'):
    mlp_layer = len(mlp_hidden)
    res = inputs
    for l in xrange(mlp_layer):
      res = mlp_nonlinearity(linear(res, mlp_hidden[l], scope='l'+str(l)))
    return res

0 个答案:

没有答案