Question

我遵循pytorch传输学习教程并将其应用于kaggle种子分类任务，我只是不确定如何将预测保存在csv文件中以便我可以进行提交，任何建议都会有所帮助，这就是我所拥有的，

use_gpu = torch.cuda.is_available()
 model = models.resnet50(pretrained=True)
for param in model.parameters():
    param.requires_grad = False

num_ftrs = model.fc.in_features
model.fc = torch.nn.Linear(num_ftrs, len(classes))
if use_gpu:
    model = model.cuda()

criterion = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.fc.parameters(), lr=0.001, momentum=0.9)
exp_lr_scheduler = lr_scheduler.StepLR(optimizer, step_size=7, gamma=0.1)

loaders = {'train':train_loader, 'valid':valid_loader, 'test': test_loader}

model = train_model(loaders, model, criterion, optimizer, exp_lr_scheduler, num_epochs=50)

Answer 1

培训完模型后，您可以根据测试数据对其进行评估。这可能会在GPU上为您提供Variable。从那里，您需要将tensor复制到具有cpu()的CPU，并将其转换为带有numpy()的numpy数组。然后，您可以使用numpy的CSV functionality或使用例如大熊猫'DataFrame.to_csv。在第一种情况下，你会有这样的事情：

# evaluate on Variable x with testing data
y = model(x)
# access Variable's tensor, copy back to CPU, convert to numpy
arr = y.data.cpu().numpy()
# write CSV
np.savetxt('output.csv', arr)

Answer 2

我正在分享我用于SNLI任务的评估功能。请注意，这只是一个例子，而不是确切的答案，可能你正在寻找。我希望它能帮到你!!

def evaluate(model, batches, dictionary, outfile=None):
    # Turn on evaluation mode which disables dropout.
    model.eval()

    n_correct, n_total = 0, 0
    y_preds, y_true, output = [], [], []
    for batch_no in range(len(batches)):
        sent1, sent_len1, sent2, sent_len2, labels = helper.batch_to_tensors(batches[batch_no], dictionary)
        if model.config.cuda:
            sent1 = sent1.cuda()
            sent2 = sent2.cuda()
            labels = labels.cuda()

        score = model(sent1, sent_len1, sent2, sent_len2)
        preds = torch.max(score, 1)[1]
        if outfile:
            predictions = preds.data.cpu().tolist()
            for i in range(len(batches[batch_no])):
                output.append([batches[batch_no][i].id, predictions[i]])
        else:
            y_preds.extend(preds.data.cpu().tolist())
            y_true.extend(labels.data.cpu().tolist())
            n_correct += (preds.view(labels.size()).data == labels.data).sum()
            n_total += len(batches[batch_no])

    if outfile:
        target_names = ['entailment', 'neutral', 'contradiction']
        with open(outfile, 'w') as f:
            f.write('pairID,gold_label' + '\n')
            for item in output:
                f.write(str(item[0]) + ',' + target_names[item[1]] + '\n')
    else:
        return 100. * n_correct / n_total, 100. * f1_score(numpy.asarray(y_true), numpy.asarray(y_preds),
                                                           average='weighted')

通常，我按如下方式调用eval函数：

evaluate(model, test_batches, dictionary, args.save_path + 'predictions.csv')

Answer 3

我不知道您为什么需要将预测另存为CSV。是为了人类可读性？

无论如何，通常在转换为numpy时，我们会这样做：

tensor_data.detach().cpu().numpy()

以此顺序（为避免重复出现梯度，我们不需要）。

此外，如果您保存预测并选择将其另存为torch.tensor对象，请确保调用detach以避免记住意外的梯度（除非您希望我猜到）：

y = f(x)
torch.save({'y': y.detach().cpu()})

保存为CSV：

y = f(x).detach().cpu().numpy()
np.savetxt(path / `output.csv', y)

但是，我建议您仅将print语句用于调试。另外，使用torch.save，即可为您完成所有操作。我不建议泡菜，因为它会用pytorch发出警告。

有关完整的工作示例：


# test for saving everything with torch.save

import torch
import torch.nn as nn

from pathlib import Path
from collections import OrderedDict

import numpy as np

import pickle

path = Path('~/data/tmp/').expanduser()
path.mkdir(parents=True, exist_ok=True)

num_samples = 3
Din, Dout = 1, 1
lb, ub = -1, 1

x = torch.torch.distributions.Uniform(low=lb, high=ub).sample((num_samples, Din))

f = nn.Sequential(OrderedDict([
    ('f1', nn.Linear(Din,Dout)),
    ('out', nn.SELU())
]))
y = f(x)

# save data torch to numpy
x_np, y_np = x.detach().cpu().numpy(), y.detach().cpu().numpy()
db2 = {'f': f, 'x': x_np, 'y': y_np}
torch.save(db2, path / 'db_f_x_y')
np.savetxt(path / 'output.csv', y_np)

db3 = torch.load(path / 'db_f_x_y')
f3 = db3['f']
x3 = db3['x']
y3 = db3['y']
xx = torch.tensor(x3)
yy3 = f3(xx)

print(yy3)

从pytorch模型中保存预测

3 个答案: