https://github.com/tensorflow/models/blob/master/tutorials/embedding/word2vec.py 作为此代码中word2vec的实现,在生成样本时,注释显示为“负采样”,但在接下来的损耗计算部分中,注释显示为“ nce_loss”。据我所知,根据代码,应该是“负采样”,而不是“ nce”或“ nce_loss”。谁能帮忙解释一下?
# Negative sampling.
sampled_ids, _, _ = (tf.nn.fixed_unigram_candidate_sampler(
true_classes=labels_matrix,
num_true=1,
num_sampled=opts.num_samples,
unique=True,
range_max=opts.vocab_size,
distortion=0.75,
unigrams=opts.vocab_counts.tolist()))
# Embeddings for examples: [batch_size, emb_dim]
example_emb = tf.nn.embedding_lookup(emb, examples)
# Weights for labels: [batch_size, emb_dim]
true_w = tf.nn.embedding_lookup(sm_w_t, labels)
# Biases for labels: [batch_size, 1]
true_b = tf.nn.embedding_lookup(sm_b, labels)
# Weights for sampled ids: [num_sampled, emb_dim]
sampled_w = tf.nn.embedding_lookup(sm_w_t, sampled_ids)
# Biases for sampled ids: [num_sampled, 1]
sampled_b = tf.nn.embedding_lookup(sm_b, sampled_ids)
# True logits: [batch_size, 1]
true_logits = tf.reduce_sum(tf.multiply(example_emb, true_w), 1) + true_b
# Sampled logits: [batch_size, num_sampled]
# We replicate sampled noise labels for all examples in the batch
# using the matmul.
sampled_b_vec = tf.reshape(sampled_b, [opts.num_samples])
sampled_logits = tf.matmul(example_emb,
sampled_w,
transpose_b=True) + sampled_b_vec
return true_logits, sampled_logits
和损失
def nce_loss(self, true_logits, sampled_logits):
"""Build the graph for the NCE loss."""
# cross-entropy(logits, labels)
opts = self._options
true_xent = tf.nn.sigmoid_cross_entropy_with_logits(
labels=tf.ones_like(true_logits), logits=true_logits)
sampled_xent = tf.nn.sigmoid_cross_entropy_with_logits(
labels=tf.zeros_like(sampled_logits), logits=sampled_logits)
# NCE-loss is the sum of the true and noise (sampled words)
# contributions, averaged over the batch.
nce_loss_tensor = (tf.reduce_sum(true_xent) +
tf.reduce_sum(sampled_xent)) / opts.batch_size
return nce_loss_tensor