我目前正在尝试使用位于网络下方的另一层的输出来训练神经网络模型的特定层。我的损失函数使用余弦相似度对概率建模来计算层分布之间的KL散度。当我尝试将梯度应用于优化器时,将弹出错误“没有为任何变量提供渐变”。我是tensorflow的新手,我完全被卡住了。有任何想法是什么问题?
注意:我正在运行tf.enable_eager_execution()
def grad_pkt(model, loss_function, x, receiver_layer, donor_layer):
with tf.GradientTape() as tape:
loss_value = loss_function(model, x, receiver_layer, donor_layer)
return loss_value, tape.gradient(loss_value, model.trainable_variables)
def pkt_loss(model, x, receiver_layer, donor_layer):
eps = 1e-5 # In case there is division by zero
# Get donor and receiver layers' output
x_donor = get_layer_output(model, x, donor_layer, flatten=True)
x_receiver = get_layer_output(model, x, receiver_layer, flatten=True)
# Get Norms
norm_donor = tf.norm(x_donor, axis=1)
norm_receiver = tf.norm(x_receiver, axis=1)
# Normalize each Data
x_donor = x_donor / tf.reshape(norm_donor, (-1, 1))
x_receiver = x_receiver / tf.reshape(norm_receiver, (-1, 1))
# Calculate cosine similarities
donor_sim = tf.matmul(x_donor, tf.transpose(x_donor))
receiver_sim = tf.matmul(x_receiver, tf.transpose(x_receiver))
# Scale similarities to [0, 1]
donor_sim = (donor_sim + 1.0)/2.0
receiver_sim = (receiver_sim + 1.0)/2.0
# Get Probability Distributions
P = donor_sim / tf.reduce_sum(donor_sim, axis=1)
Q = receiver_sim / tf.reduce_sum(receiver_sim, axis=1)
# Calculate KL Divergence
loss = tf.reduce_sum(P * tf.log((P + eps)/(Q + eps)))
return loss
def get_layer_output(model, data, layer_name, flatten=False):
intermediate_layer_model = tf.keras.Model(inputs = model.input, outputs = model.get_layer(layer_name).output)
layer_output = intermediate_layer_model.predict(data)
if flatten:
layer_output = layer_output.reshape(layer_output.shape[0], -1)
return layer_output