我不断收到以下错误消息。我似乎无法查明所提到的张量。在下面,您将找到trainer.py和main.py模块。我正在开发的模型是CelebA数据集上的GAN。我正在远程服务器上运行代码,因此花了很多时间调试我的模型。
这是完整的错误消息:
Traceback (most recent call last):
File "main.py", line 52, in <module>
main(opt)
File "main.py", line 47, in main
trainer.train(train_loader)
File "/home/path/trainer.py", line 45, in train
d_loss_cls = F.binary_cross_entropy_with_logits(out_cls, label_org, size_average=False) / out_cls.size(0)
File "/home/path/anaconda3/lib/python3.7/site-packages/torch/nn/functional.py", line 2077, in binary_cross_entropy_with_logits
return torch.binary_cross_entropy_with_logits(input, target, weight, pos_weight, reduction_enum)
RuntimeError: expected type torch.cuda.FloatTensor but got torch.FloatTensor
trainer.py
from tqdm import tqdm
import torch
import torch.nn.functional as F
from model import Discriminator, Generator
from tensorboardX import SummaryWriter
class Trainer():
def __init__(self, opt):
# Generator
self.G = Generator(64, 5, 6)
# Discriminator
self.D = Discriminator(128, 64, 5, 6)
# Generator optimizer
self.g_optimizer = torch.optim.Adam(self.G.parameters(), opt.lr)
self.d_optimizer = torch.optim.Adam(self.D.parameters(), opt.lr)
self.opt = opt
if self.opt.cuda:
self.G = self.G.cuda()
self.D = self.D.cuda()
def train(self, data_loader):
"""Function to train the model
"""
print('Training model')
writer_d = SummaryWriter('runs/disc') # discriminator writer
writer_g = SummaryWriter('runs/gen') # generator writer
print('Start training...')
for epoch in tqdm(range(self.opt.epochs)):
for x_real, label_org in tqdm(data_loader):
pass
# Generate target domain labels randomly.
rand_idx = torch.randperm(label_org.size(0))
label_trg = label_org[rand_idx]
c_org = label_org.clone()
c_trg = label_org.clone()
if self.opt.cuda:
x_real = x_real.cuda() # Input images
c_org = c_org.cuda() # Original domain labels
c_trg = c_trg.cuda() # Target domain labels
label_org = label_org.cuda() # Labels for computing classification loss
label_trg = label_trg.cuda() # Labels for computing classification loss
out_src, out_cls = self.D(x_real)
d_loss_real = - torch.mean(out_src)
d_loss_cls = F.binary_cross_entropy_with_logits(out_cls, label_org, size_average=False) / out_cls.size(0)
# Compute loss with fake images
x_fake = self.G(x_real, c_trg)
out_src, out_cls = self.D(x_fake.detach())
d_loss_fake = torch.mean(out_src)
# Compute loss for gradient penalty
alpha = torch.rand(x_real.size(0), 1, 1, 1).cuda()
x_hat = (alpha * x_real.data + (1 - alpha) * x_fake.data).requires_grad_(True)
out_src, _ = self.D(x_hat)
# Backward and optimize
d_loss = d_loss_real + d_loss_fake + d_loss_cls
self.g_optimizer.zero_grad()
self.d_optimizer.zero_grad()
d_loss.backward()
self.d_optimizer.step()
if (i + 1) % 2 == 0:
# Original-to-target domain
x_fake = self.G(x_real, c_trg)
out_src, out_cls = self.D(x_fake)
g_loss_fake = - torch.mean(out_src)
g_loss_cls = F.binary_cross_entropy_with_logits(out_cls, label_trg, size_average=False) / out_cls.size(0)
# Target-to-original domain
x_reconst = self.G(x_fake, c_org)
g_loss_rec = torch.mean(torch.abs(x_real - x_reconst))
# Backward and optimize
g_loss = g_loss_fake + g_loss_rec
self.g_optimizer.zero_grad()
self.d_optimizer.zero_grad()
g_loss.backward()
self.g_optimizer.step()
# write loss to tensorboard
writer_d.add_scalar('data/loss', d_loss, epoch)
writer_d.add_scalar('data/loss', g_loss, epoch)
print('Finished Training')
def test(self, data_loader):
with torch.no_grad():
for i, (x_real, c_org) in enumerate(data_loader):
# Prepare input images and target domain labels.
if self.opt.cuda:
x_real = x_real.cuda()
# Translate images.
x_fake_list = [x_real]
for c_trg in c_trg_list:
x_fake_list.append(self.G(x_real, c_trg))
main.py
import argparse
import random
import torch
from torch import nn
import torch.optim as optim
from torch.utils.data import DataLoader
import torchvision.transforms as transforms
from preprocess import pre_process
from celeb_dataset import CelebDataset
from trainer import Trainer
# Setting up the argument parser
parser = argparse.ArgumentParser()
parser.add_argument('--workers', type=int, help='number of data loading workers', default=4)
parser.add_argument('--batchSize', type=int, default=8, help='input batch size')
parser.add_argument('--epochs', type=int, default=20, help='number of epochs to train')
parser.add_argument('--lr', type=float, default=0.0002, help='learning rate')
parser.add_argument('--cuda', action='store_true', help='enables cuda')
parser.add_argument('--manualSeed', type=int, help='manual seed')
parser.add_argument('--dataset_path', type=str, default='./data/celeba', help='dataset path')
opt = parser.parse_args()
print(opt)
if opt.manualSeed is None:
opt.manualSeed = random.randint(1, 10000)
print("Random Seed: ", opt.manualSeed)
def main(opt):
# Setup the parameters for the training/testing
params = {
'batch_size': opt.batchSize,
'shuffle': True,
'num_workers': opt.workers
}
# preprocess and setup dataset and datalader
processed_data = pre_process(opt.dataset_path)
train_dataset = CelebDataset(processed_data[:-2000])
test_dataset = CelebDataset(processed_data[2000:])
train_loader = DataLoader(train_dataset, **params)
test_loader = DataLoader(test_dataset, **params)
trainer = Trainer(opt)
trainer.train(train_loader)
trainer.test(test_loader)
if __name__ == "__main__":
main(opt)
答案 0 :(得分:2)
由于out_cls, label_org
之一不在GPU上,因此您收到该错误。
您的代码在哪里执行parser.add_argument('--cuda', action='store_true', help='enables cuda')
选项?
也许是这样的:
trainer = Trainer(opt)
if opt.cuda:
trainer = trainer.cuda()