我正在使用chainer并尝试进行主题建模。培训阶段的代码包含以下内容:
optimizer = O.Adam()
optimizer.setup(self.train_model)
clip = chainer.optimizer.GradientClipping(5.0)
optimizer.add_hook(clip)
j = 0
msgs = defaultdict(list)
for epoch in range(epochs):
print "epoch : ",epoch
data = prepare_topics(cuda.to_cpu(self.train_model.mixture.weights.W.data).copy(),
cuda.to_cpu(self.train_model.mixture.factors.W.data).copy(),
cuda.to_cpu(self.train_model.sampler.W.data).copy(),
self.words)
top_words = print_top_words_per_topic(data)
if j % 100 == 0 and j > 100:
coherence = topic_coherence(top_words)
for j in range(self.n_topics):
print j, coherence[(j, 'cv')]
kw = dict(top_words=top_words, coherence=coherence, epoch=epoch)
data['doc_lengths'] = self.doc_lengths
data['term_frequency'] = self.term_frequency
for d, f in utils.chunks(self.batchsize, self.doc_ids, self.flattened):
t0 = time.time()
self.train_model.cleargrads()
l = self.train_model.fit_partial(d.copy(), f.copy(), update_words = update_words, update_topics = update_topics)
prior = self.train_model.prior()
loss = prior * self.fraction
loss.backward()
optimizer.update()
msg = ("J:{j:05d} E:{epoch:05d} L:{loss:1.3e} "
"P:{prior:1.3e} R:{rate:1.3e}")
prior.to_cpu()
loss.to_cpu()
t1 = time.time()
dt = t1 - t0
rate = self.batchsize / dt
msgs["E"].append(epoch)
msgs["L"].append(float(l))
j += 1
logs = dict(loss=float(l), epoch=epoch, j=j, prior=float(prior.data), rate=rate)
print msg.format(**logs)
print "\n ================================= \n"
#serializers.save_hdf5("lda2vec.hdf5", self.model)
msgs["loss_per_epoch"].append(float(l))
我执行的代码我得到了例如:
J:00200 E:00380 L:0.000e+00 P:-2.997e+04 R:2.421e+04
只有L(损失)不变,有人可以帮助知道为什么这个值保持为零?