我正在尝试基于tf exmaple编写word2vec的实现。 在我的数据中,我使用了带有正负示例的会话,因此我想使用this article中提到的损失函数(正弦的S形和(负的S型对数的对数))
我写了它的实现:
def loss_fn(batch_size, batch_inputs, batch_labels, batch_negative, embeddings):
norm = tf.sqrt(tf.reduce_sum(tf.square(embeddings), 1, keepdims=True))
normalized_embeddings = embeddings / norm
log.info("loss_fn init")
res_lst = []
for i in xrange(batch_size):
inp = batch_inputs[i]
lbl = batch_labels[i]
ng = batch_negative[i]
m = tf.map_fn(lambda k: tf.matmul(tf.gather(normalized_embeddings, [inp]),
tf.gather(normalized_embeddings, [k]),
transpose_b=True),
lbl,
dtype=tf.float32)
nm = tf.map_fn(lambda n: tf.matmul(tf.gather(normalized_embeddings, [inp]),
tf.gather(normalized_embeddings, [n]),
transpose_b=True),
ng,
dtype=tf.float32)
s = tf.map_fn(lambda x: tf.log(tf.math.sigmoid(x)), m)
ns = tf.map_fn(lambda x: tf.log(tf.math.sigmoid(-x)), nm)
res = -(tf.math.reduce_sum(ns) + tf.math.reduce_sum(s))
res_lst.append(res)
return tf.stack(res_lst)
它按预期返回每个类别的损失。接下来,它尝试将其传递给优化器:
with tf.name_scope('loss'):
loss = tf.reduce_mean(
loss_fn(
batch_size=batch_size,
batch_inputs=train_inputs,
batch_labels=train_labels,
batch_negative=negative_samples,
embeddings=embeddings))
with tf.name_scope('optimizer'):
optimizer = tf.train.AdagradOptimizer(learning_rate).minimize(loss)
我注意到,即使是第一步评估的梯度结果
print(session.run(tf.train.AdagradOptimizer(learning_rate).compute_gradients(loss), feed_dict=feed_dict))
我不明白自己的实现有什么问题。