我正在将CNN功能输入到gpflow模型中。我在这里从程序中编写代码块。我正在将tape.gradient与Adam优化器(计划的lr)一起使用。我的准确度停留在47%,令人惊讶的是,我的损失仍在减少。它很奇怪。我已经调试了程序。 CNN功能尚可,但gp模型无法学习。请检查训练循环,让我知道我在哪里错了。
def optimization_step(gp_model: gpflow.models.SVGP, image_data,labels):
with tf.GradientTape(watch_accessed_variables=False)as tape:
tape.watch(gp_model.trainable_variables)
cnn_feat = cnn_model(image_data,training=False)
cnn_feat=tf.cast(cnn_feat,dtype=default_float())
labels=tf.cast(labels,dtype=np.int64)
data=(cnn_feat, labels)
loss = gp_model.training_loss(data)
gp_grads=tape.gradient(loss, gp_model.trainable_variables)
gp_optimizer.apply_gradients(zip(gp_grads, gp_model.trainable_variables))
return loss, cnn_feat
训练循环是
def simple_training_loop(gp_model: gpflow.models.SVGP, epochs: int = 3, logging_epoch_freq: int = 10):
total_loss = []
features=[]
tf_optimization_step = tf.function(optimization_step, autograph=False)
for epoch in range(epochs):
lr.assign(max(args.learning_rate_clip, args.learning_rate * (args.decay_rate ** epoch)))
data_loader.shuffle_data(args.is_training)
for b in range(data_loader.n_batches):
batch_x, batch_y= data_loader.next_batch(b)
batch_x=tf.convert_to_tensor(batch_x)
batch_y=tf.convert_to_tensor(batch_y)
loss,features_CNN=tf_optimization_step(gp_model, batch_x,batch_y)
我正在从迁移学习过程中保存的检查点恢复CNN的权重。
随着时间的推移,损耗继续减少,但准确性也开始下降。
gp模型声明如下
kernel = gpflow.kernels.Matern32() + gpflow.kernels.White(variance=0.01)
invlink = gpflow.likelihoods.RobustMax(C)
likelihood = gpflow.likelihoods.MultiClass(C, invlink=invlink)
测试功能
cnn_feat=cnn_model(test_x,training=False)
cnn_feat = tf.cast(cnn_feat, dtype=default_float())
mean, var = gp_model.predict_f(cnn_feat)
preds = np.argmax(mean, 1).reshape(test_labels.shape)
correct = (preds == test_labels.numpy().astype(int))
acc = np.average(correct.astype(float)) * 100
答案 0 :(得分:0)
能否请您检查一下训练循环是否正确编写
训练循环看起来不错。但是,为了清晰和优化起见,应修改某些位。
def simple_training_loop(gp_model: gpflow.models.SVGP, epochs: int = 3, logging_epoch_freq: int = 10):
total_loss = []
features=[]
@tf.function
def compute_cnn_feat(x: tf.Tensor) -> tf.Tensor:
return tf.cast(cnn_model(x, training=False), dtype=default_float())
@tf.function
def optimization_step(cnn_feat: tf.Tensor, labels: tf.Tensor): # **Change 1.**
with tf.GradientTape(watch_accessed_variables=False) as tape:
tape.watch(gp_model.trainable_variables)
data = (cnn_feat, labels)
loss = gp_model.training_loss(data)
gp_grads = tape.gradient(loss, gp_model.trainable_variables) # **Change 2.**
gp_optimizer.apply_gradients(zip(gp_grads, gp_model.trainable_variables))
return loss
for epoch in range(epochs):
lr.assign(max(args.learning_rate_clip, args.learning_rate * (args.decay_rate ** epoch)))
data_loader.shuffle_data(args.is_training)
for b in range(data_loader.n_batches):
batch_x, batch_y= data_loader.next_batch(b)
batch_x = tf.convert_to_tensor(batch_x)
batch_y = tf.convert_to_tensor(batch_y, dtype=default_float())
cnn_feat = compute_cnn_feat(batch_x) # **Change 3.**
loss = optimization_step(cnn_feat, batch_y)
更改1。用tf.function
包装的函数的签名不应包含可变对象。
更改2。梯度磁带将跟踪上下文管理器内部的所有计算,包括梯度的计算,即tape.gradient(...)
。反过来,这意味着您的代码执行了不必要的计算。
变更3。出于与“变更2”相同的原因。我将CNN特征提取移到了渐变带之外。