我对计算梯度wrt感兴趣。 Tensorflow中keras模型的输入。我知道以前可以通过构建图形并使用tf.gradients
来完成。例如here。但是,我想在渴望模式下尝试实现此目标(可能使用GradientTape
)。具体来说,如果我的网络有两个输入(x, y)
,并预测(u, v, p)
可以计算出du/dx
用于损失。
下面的代码段,完整代码at this gist。
model = tf.keras.Sequential([
tf.keras.layers.Dense(20, activation=tf.nn.relu, input_shape=(2,)), # input shape required
tf.keras.layers.Dense(20, activation=tf.nn.relu),
tf.keras.layers.Dense(20, activation=tf.nn.relu),
tf.keras.layers.Dense(20, activation=tf.nn.relu),
tf.keras.layers.Dense(3)
])
def loss(model: tf.keras.Model, inputs, outputs):
u_true, v_true = outputs[:, 0], outputs[:, 1]
prediction = model(inputs)
u_pred, v_pred = prediction[:, 0], prediction[:, 1]
loss_value = tf.reduce_mean(tf.square(u_true - u_pred)) + \
tf.reduce_mean(tf.square(v_true - v_pred))
return loss_value, u_pred, v_pred
def grad(model: tf.keras.Model, inputs, outputs):
"""
:param inputs: (batch_size, 2) -> x, y
:param outputs: (batch_size, 3) -> vx, vy, p
:return:
"""
with tf.GradientTape() as tape:
loss_value, u_pred, v_pred = loss(model, inputs, outputs)
# AttributeError: 'DeferredTensor' object has no attribute '_id'
print(tape.gradient(u_pred, model.input))
grads = tape.gradient(loss_value, model.trainable_variables)
return loss_value, grads
我尝试了一些事情,例如tape.gradient(u_pred, model.input)
或tape.gradient(model.output, model.input)
,但这些抛出:
AttributeError: 'DeferredTensor' object has no attribute '_id'
有没有一种方法可以在渴望模式下实现?
答案 0 :(得分:1)
这里是一个使用急切执行来检索与输入有关的预测梯度的示例
基本上,您需要使用tape.watch(inputs)[我在示例中使用的功能-无论您要调用x ...的哪一个]都可以使Tensorflow记录模型输出中的更改(您可以关于输入...(并确保在tf.GradientTape()上下文之外调用tape.gradient)
看看下面的get_gradients函数...
希望这会有所帮助!
model = tf.keras.Sequential([
tf.keras.layers.Dense(10, activation=tf.nn.relu, input_shape=(len(numeric_headers),)), # input shape required
tf.keras.layers.Dense(10, activation=tf.nn.relu),
tf.keras.layers.Dense(1, activation=tf.nn.sigmoid)
])
# model = MyModel()
loss_object = tf.keras.losses.BinaryCrossentropy()
optimizer = tf.keras.optimizers.Adam()
train_loss = tf.keras.metrics.Mean(name='train_loss')
train_accuracy = tf.keras.metrics.SparseCategoricalAccuracy(name='train_accuracy')
test_loss = tf.keras.metrics.Mean(name='test_loss')
test_accuracy = tf.keras.metrics.SparseCategoricalAccuracy(name='test_accuracy')
def get_gradients(model, features):
with tf.GradientTape() as tape:
tape.watch(features)
predictions = model(features)
gradients = tape.gradient(predictions, features)
return gradients
def train_step(features, label):
with tf.GradientTape() as tape:
predictions = model(features)
loss = loss_object(label, predictions)
gradients = tape.gradient(loss, model.trainable_variables)
optimizer.apply_gradients(zip(gradients, model.trainable_variables))
train_loss(loss)
train_accuracy(label, predictions)
def test_step(features, label):
predictions = model(features)
t_loss = loss_object(label, predictions)
test_loss(t_loss)
test_accuracy(label, predictions)
EPOCHS = 5
for epoch in range(EPOCHS):
for features, labels in train_ds:
train_step(features, labels)
for features, labels in train_ds:
test_step(features, labels)
template = 'Epoch {}, Loss: {}, Accuracy: {}, Test Loss: {}, Test Accuracy: {}'
print (template.format(epoch+1,
train_loss.result(),
train_accuracy.result()*100,
test_loss.result(),
test_accuracy.result()*100))
if epoch == EPOCHS - 1:
for features, labels in train_ds:
print ('-')
print (get_gradients(model, features))