我有2批13x13网格的序列,包含5个100个数字的嵌入向量。我希望一些嵌入向量在使用规范时彼此非常接近,而其他距离非常远。如何计算每批2个嵌入向量的所有可能组合的l2范数或其他范数?在下面的代码中,我试图实现cos规范,但是经过一段时间它变成了inf。 'tr'变量表示应该彼此接近的嵌入向量。
tf.reset_default_graph()
if True:
a = tf.placeholder(tf.float32,[2,2,13,13,5,100])
b = tf.placeholder(tf.float32,[2,2,13,13,5,1])
a11 = tf.reshape(a, [2, -1, 100])
a1 = tf.layers.Dense(100)(a11)
def triple_loss(pr, tr, batch_size=2, alpha=0.1, cos_norm=False, number_norm=False):
'''face2vec loss
pr: [b,h,w,boxes,embed]
tr: [b,h,w,boxes,1]
returns: []'''
b,l,h,w,bo,_ = tr.get_shape().as_list()
em = pr.get_shape().as_list()[-1]
tr_r = tf.reshape(tr,[-1, l*h*w*bo, 1])
tr_tiled = tf.tile(tr_r,[1, 1, l*h*w*bo])#
pr_reshaped = tf.reshape(pr,[-1, l*h*w*bo, em])
embed_prod = tf.matmul(pr_reshaped, pr_reshaped, transpose_b=True)
if cos_norm:
tr_norm = tf.reduce_sum(tf.sqrt(tr_tiled*tr_tiled),-1)
tr_norm_tiled = tf.tile(tf.reshape(tr_norm,[-1, l*h*w*bo, 1]),[1, 1, l*h*w*bo])
scale = tf.matmul(tr_norm_tiled, tr_norm_tiled, transpose_b=True)
embed_prod = embed_prod/(scale+0.000001)
if number_norm:
return tf.reduce_mean(tf.reduce_mean(embed_prod*tr_tiled,[-1,-2]) /tf.reduce_sum(tr_r,[-1,-2])\
- tf.reduce_mean(embed_prod*(1.0 - tr_tiled),[-1,-2])/tf.reduce_sum((1.0 - tr_r),[-1,-2]))\
+alpha
loss = tf.reduce_mean( tf.reduce_mean(embed_prod*tr_tiled,[-1,-2]) \
- tf.reduce_mean(embed_prod*(1.0 - tr_tiled),[-1,-2]))+alpha
return loss
loss = triple_loss(a1, b, cos_norm=True)
optimizer = tf.train.GradientDescentOptimizer(1e-3)
train_op = optimizer.minimize(loss)
sess= tf.Session()
init = tf.global_variables_initializer()
sess.run(init)
aa = np.zeros((2,2,13,13,5,100))
aa[...,:2,:2]=1
aa[...,0,1:3,1:2]=1
bb = np.zeros((2,2,13,13,5,1))
bb[:,:,:3,:3,:2,0] = 1.0
for i in range(10):
l = sess.run([loss,train_op],{a:aa, b:bb})
print(l[-2])
输出是:
-320486.0
-2.02932e+12
-1.27284e+19
-8.06542e+25
-inf
nan
nan
只需嵌入向量之间的线性乘积,网络就会收敛!
答案 0 :(得分:0)
为每对向量计算(平方)l2范数,将它们堆叠在一个矩阵中并乘以它的转置。
您计算的损失可能是负数。优化器正在实现其最大限度地减少损失的目标 - 它达到负无穷大。你需要确保你的损失来自下方。