我从以下地方得到了一段代码: https://github.com/liorshk/facenet_pytorch/blob/master/train_triplet.py
def train(train_loader, model, optimizer, epoch):
# switch to train mode
model.train()
pbar = tqdm(enumerate(train_loader))
labels, distances = [], []
for batch_idx, (data_a, data_p, data_n,label_p,label_n) in pbar:
data_a, data_p, data_n = data_a.cuda(), data_p.cuda(), data_n.cuda()
data_a, data_p, data_n = Variable(data_a), Variable(data_p), \
Variable(data_n)
# compute output
out_a, out_p, out_n = model(data_a), model(data_p), model(data_n)
# Choose the hard negatives
d_p = l2_dist.forward(out_a, out_p)
d_n = l2_dist.forward(out_a, out_n)
all = (d_n - d_p < args.margin).cpu().data.numpy().flatten()
hard_triplets = np.where(all == 1)
if len(hard_triplets[0]) == 0:
continue
out_selected_a = Variable(torch.from_numpy(out_a.cpu().data.numpy()[hard_triplets]).cuda())
out_selected_p = Variable(torch.from_numpy(out_p.cpu().data.numpy()[hard_triplets]).cuda())
out_selected_n = Variable(torch.from_numpy(out_n.cpu().data.numpy()[hard_triplets]).cuda())
selected_data_a = Variable(torch.from_numpy(data_a.cpu().data.numpy()[hard_triplets]).cuda())
selected_data_p = Variable(torch.from_numpy(data_p.cpu().data.numpy()[hard_triplets]).cuda())
selected_data_n = Variable(torch.from_numpy(data_n.cpu().data.numpy()[hard_triplets]).cuda())
selected_label_p = torch.from_numpy(label_p.cpu().numpy()[hard_triplets])
selected_label_n= torch.from_numpy(label_n.cpu().numpy()[hard_triplets])
triplet_loss = TripletMarginLoss(args.margin).forward(out_selected_a, out_selected_p, out_selected_n)
cls_a = model.forward_classifier(selected_data_a)
cls_p = model.forward_classifier(selected_data_p)
cls_n = model.forward_classifier(selected_data_n)
criterion = nn.CrossEntropyLoss()
predicted_labels = torch.cat([cls_a,cls_p,cls_n])
true_labels = torch.cat([Variable(selected_label_p.cuda()),Variable(selected_label_p.cuda()),Variable(selected_label_n.cuda())])
cross_entropy_loss = criterion(predicted_labels.cuda(),true_labels.cuda())
loss = cross_entropy_loss + triplet_loss
# compute gradient and update weights
optimizer.zero_grad()
loss.backward()
optimizer.step()
# update the optimizer learning rate
adjust_learning_rate(optimizer)
# log loss value
logger.log_value('triplet_loss', triplet_loss.data[0]).step()
logger.log_value('cross_entropy_loss', cross_entropy_loss.data[0]).step()
logger.log_value('total_loss', loss.data[0]).step()
if batch_idx % args.log_interval == 0:
pbar.set_description(
'Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f} \t # of Selected Triplets: {}'.format(
epoch, batch_idx * len(data_a), len(train_loader.dataset),
100. * batch_idx / len(train_loader),
loss.data[0],len(hard_triplets[0])))
dists = l2_dist.forward(out_selected_a,out_selected_n) #torch.sqrt(torch.sum((out_a - out_n) ** 2, 1)) # euclidean distance
distances.append(dists.data.cpu().numpy())
labels.append(np.zeros(dists.size(0)))
dists = l2_dist.forward(out_selected_a,out_selected_p)#torch.sqrt(torch.sum((out_a - out_p) ** 2, 1)) # euclidean distance
distances.append(dists.data.cpu().numpy())
labels.append(np.ones(dists.size(0)))
labels = np.array([sublabel for label in labels for sublabel in label])
distances = np.array([subdist[0] for dist in distances for subdist in dist])
tpr, fpr, accuracy, val, val_std, far = evaluate(distances,labels)
print('\33[91mTrain set: Accuracy: {:.8f}\n\33[0m'.format(np.mean(accuracy)))
logger.log_value('Train Accuracy', np.mean(accuracy))
plot_roc(fpr,tpr,figure_name="roc_train_epoch_{}.png".format(epoch))
# do checkpointing
torch.save({'epoch': epoch + 1, 'state_dict': model.state_dict()},
'{}/checkpoint_{}.pth'.format(LOG_DIR, epoch))
如您所见,有一堆model.forward(..)调用:
out_a, out_p, out_n = model(data_a), model(data_p), model(data_n)
....
cls_a = model.forward_classifier(selected_data_a)
cls_p = model.forward_classifier(selected_data_p)
cls_n = model.forward_classifier(selected_data_n)
(model.forward_classifier() will invoke model.forward() also)
由于调用了loss.backward(),在第二次迭代开始之前,释放了一点gpu内存,但没有释放超过30G gpu内存(我有4x1080ti),然后训练因GPU内存不足而崩溃,所以问题是:
1,在nn.module中,如何(或在哪里)保存计算图,如何释放gpu内存?
2,必须调用backward()吗?如果可以,.backward()方法应该是什么?
3,以我为例,释放了GPU内存的哪一部分,保留了其中的那一部分?有没有办法看到它?