我正在尝试开发一种用于omniglot数据集的元学习算法(maml)。在最初的几次迭代之后,使用gradienttape计算坡度停止工作,并返回了一个包含None的列表。我什至将watch变量放在磁带内,而在梯度计算之前在磁带外未做任何修改,但没有用。这是代码:
def contrastive_loss(y_pred, y_true):
'''Contrastive loss from Hadsell-et-al.'06
http://yann.lecun.com/exdb/publis/pdf/hadsell-chopra-lecun-06.pdf
'''
margin = 1
square_pred = K.square(y_pred)
margin_square = K.square(K.maximum(margin - y_pred, 0))
return K.mean(y_true * square_pred + (1 - y_true) * margin_square)
def copy_model(model, imgs1, imgs2):
'''Copy model weights to a new model.
Args:
model: model to be copied.
img1: First set of images example used in training. This is used to run
a forward pass in order to add the weights of the graph
as variables.
imgs2: The sencond set of images used in training
Returns:
A copy of the model.
'''
copied_model = get_siamese_model((105, 105, 1))
# If we don't run this step the weights are not "initialized"
# and the gradients will not be computed.
copied_model.call(inputs = [imgs1,imgs2])
copied_model.set_weights(model.get_weights())
return copied_model
def train_maml(model, epochs, dataset, lr_inner=0.01, batch_size=1, log_steps=1000):
'''Train using the MAML setup.
The comments in this function that start with:
Step X:
Refer to a step described in the Algorithm 1 of the paper.
Args:
model: A model.
epochs: Number of epochs used for training.
dataset: A dataset used for training.
lr_inner: Inner learning rate (alpha in Algorithm 1). Default value is 0.01.
batch_size: Batch size. Default value is 1. The paper does not specify
which value they use.
log_steps: At every `log_steps` a log message is printed.
Returns:
A strong, fully-developed and trained maml.
'''
optimizer = Adam()
total_loss = 0
losses = []
start = time.time()
# Step 2: instead of checking for convergence, we train for a number
# of epochs
for i in range(epochs):
# Step 3 and 4
x, y = get_batch(batch_size)
x1 = tf.cast(tf.convert_to_tensor(x[0]), dtype=tf.float32)
x2 = tf.cast(tf.convert_to_tensor(x[1]), dtype=tf.float32)
y1 = tf.cast(tf.convert_to_tensor(y), dtype=tf.float32)
with tf.GradientTape() as test_tape:
#test_tape.watch(model.trainable_weights)
# Step 5
with tf.GradientTape() as train_tape:
train_tape.watch(model.trainable_weights)
y_out = model.call(inputs = [x1,x2]) # run call forward pass to initialize weights
train_loss = contrastive_loss(y_out, y1)
# Step 6
gradients = train_tape.gradient(train_loss, model.trainable_weights)
model_copy = copy_model(model, x1, x2)
model_copy.set_weights([w - lr_inner * g for w, g in zip(model.trainable_weights, gradients)])
# Step 8
y_out = model_copy.call(inputs = [x1,x2])
test_loss = contrastive_loss(y_out, y1)
# Step 8
gradients = test_tape.gradient(test_loss, model_copy.trainable_weights)
optimizer.apply_gradients(zip(gradients, model.trainable_weights))
# Logs
total_loss += test_loss
loss = total_loss / (i+1.0)
losses.append(loss)
if i % log_steps == 0 and i > 0:
print('Step {}: loss = {}, Time to run {} steps = {}'.format(i, loss, log_steps, time.time() - start))
start = time.time()
plt.plot(losses)
plt.show()
train_maml(model, 200, X, batch_size=32, log_steps=10)
以下是输出:
Step 10: loss = 0.26724734902381897, Time to run 10 steps = 216.29537153244019
Step 20: loss = 0.26651105284690857, Time to run 10 steps = 193.76108241081238
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
<ipython-input-15-5bec10c2de66> in <module>
----> 1 train_maml(model, 200, X, batch_size=32, log_steps=10)
<ipython-input-14-85acebbaae75> in train_maml(model, epochs, dataset, lr_inner, batch_size, log_steps)
77 model_copy = copy_model(model, x1, x2)
78
---> 79 model_copy.set_weights([w - lr_inner * g for w, g in zip(model.trainable_weights, gradients)])
80
81 # Step 8
<ipython-input-14-85acebbaae75> in <listcomp>(.0)
77 model_copy = copy_model(model, x1, x2)
78
---> 79 model_copy.set_weights([w - lr_inner * g for w, g in zip(model.trainable_weights, gradients)])
80
81 # Step 8
TypeError: unsupported operand type(s) for *: 'float' and 'NoneType'
还有其他方法可以实现同一目标吗?