我正在尝试在 Google Collab 中使用 PyTorch 训练 GAN 模型,问题是我的程序运行了将近 2 分钟,但意外停止了!
我认为 for i, data in enumerate(trainloader)
在此命令后尝试打印(“something”)代码时阻止了我的代码的进展,原因是代码没有进展?
在这里你可以找到我的笔记本的链接:https://colab.research.google.com/drive/1STi87M2pNjVOt-LGc6rZ5-kpAf0NwFAm?usp=sharing
以及数据集:https://drive.google.com/drive/folders/1LpEJwa_OlirZV2mK9vdxNSWZor-AFty_?usp=sharing
错误位于最后一个单元格,谢谢
以下代码是有问题的
import os
import copy
import torch
import torch.nn as nn
import torch.optim as optim
import torch.utils.data
import numpy as np
import matplotlib.pyplot as plt
#from dataset import NTUSkeletonDataset
from torch.utils.data import Dataset, DataLoader
#import GAN
from torch.autograd import Variable
import matplotlib.pyplot as plt
import time
# Root directory for dataset
dataroot = "Data/Hey/nturgb+d_skeletons"
# Batch size during training
batch_size = 5
# Size of z latent vector (i.e. size of generator input)
latent_dim = 20
# Number of training epochs
num_epochs = 200
# Learning rate for optimizers
lrG = 0.00005
lrD = 0.00005
clip_value = 0.01
n_critic = 20
trainset = NTUSkeletonDataset(root_dir=dataroot, pinpoint=1, merge=2)
trainloader = DataLoader(trainset, batch_size=batch_size,
shuffle=True, num_workers=4)
cuda = torch.cuda.is_available()
device = torch.device("cuda:0" if cuda else "cpu")
Tensor = torch.cuda.FloatTensor if cuda else torch.FloatTensor
generator = Gen0(latent_dim).to(device)
discriminator = Dis0().to(device)
optimizer_G = torch.optim.RMSprop(generator.parameters(), lr=lrG)
optimizer_D = torch.optim.RMSprop(discriminator.parameters(), lr=lrD)
epoch_loss = np.zeros((num_epochs, 3, len(trainloader)//n_critic+1))
for epoch in range(num_epochs):
j = 0
print("Boucle 1")
epoch_start = time.time()
for i, data in enumerate(trainloader):
print("something")
size = (-1, data.size(-1))
data = data.reshape(size)
print
optimizer_D.zero_grad()
real_skeleton = Variable(data.type(Tensor)).to(device)
critic_real = -torch.mean(discriminator(real_skeleton))
# critic_real.backward()
# sample noise as generator input
z = torch.randn(real_skeleton.size(0), latent_dim).to(device)
# Generate a batch of fake skeleton
fake_skeleton = generator(z).detach()
critic_fake = torch.mean(discriminator(fake_skeleton))
# critic_fake.backward()
loss_D = critic_real + critic_fake
loss_D.backward()
optimizer_D.step()
# clip weights of discriminator
for p in discriminator.parameters():
p.data.clamp_(-clip_value, clip_value)
# Train the generator every n_critic iterations:
if i % n_critic == n_critic - 1:
optimizer_G.zero_grad()
# Generate a batch of
gen_skeleton = generator(z)
# adversarial loss
loss_G = -torch.mean(discriminator(gen_skeleton))
loss_G.backward()
optimizer_G.step()
for k, l in enumerate((loss_G, critic_real, critic_fake)):
epoch_loss[epoch, k, j] = l.item()
j += 1
epoch_end = time.time()
print('[%d] time eplased: %.3f' % (epoch, epoch_end-epoch_start))
for k, l in enumerate(('G', 'critic real', 'critic fake')):
print('\t', l, epoch_loss[epoch, k].mean(axis=-1))
if epoch % 20 == 19:
m = copy.deepcopy(generator.state_dict())
torch.save(m, 'gen0_%d.pt' % epoch)
np.save('gen0_epoch_loss.npy', epoch_loss)
有功能的:
import torch
from torch import nn
from torch.autograd import Variable
KEYPOINTS = 25
DIM = 2 # x and y
def linear_block(in_dim, out_dim, batch_norm=True):
l = [nn.Linear(in_dim, out_dim), nn.ReLU()]
if batch_norm:
l.insert(1, nn.BatchNorm1d(out_dim))
return l
def output_block(in_dim, out_dim):
return [nn.Dropout(0.5), nn.Linear(in_dim, out_dim)]
class Basic(nn.Module):
def __init__(self, in_dim, out_dim, batch_norm=True):
super(Basic, self).__init__()
self.block1 = nn.Sequential(
*linear_block(in_dim, 1024, batch_norm)
)
l = []
for _ in range(2):
l += linear_block(1024, 1024, batch_norm)
self.block2 = nn.Sequential(*l)
self.block3 = nn.Sequential(
*output_block(1024, out_dim)
)
def forward(self, z):
b1 = self.block1(z)
b2 = self.block2(b1)
return self.block3(b1 + b2)
class Gen0(Basic):
def __init__(self, z0_dim):
"""
z_dim: int
The dimension of the input noise z
"""
self.z0_dim = z0_dim
super(Gen0, self).__init__(z0_dim, KEYPOINTS * DIM)
def forward(self, z, separate_xy=False):
l = z.size()
if len(l) == 3: # merge 0 and 1 axis
t = z.size(1)
o = super(Gen0, self).forward(z.reshape((-1, self.z0_dim)))
else: # Assume it is 2 or 1
o = super(Gen0, self).forward(z)
if separate_xy:
return o.reshape((*l[:-1], KEYPOINTS, DIM))
else:
return o.reshape((*l[:-1], KEYPOINTS * DIM))
class Dis0(Basic):
def __init__(self):
super(Dis0, self).__init__(KEYPOINTS * DIM, 1, False)
class VAE0(nn.Module):
def __init__(self, z0_dim, hidden_dim):
super(VAE0, self).__init__()
in_out_dim = KEYPOINTS * DIM
self.z0_dim = z0_dim
l = []
h_prev = in_out_dim
for h in hidden_dim:
l += linear_block(h_prev, h)
h_prev = h
self.encoding = nn.Sequential(*l)
self.mu = nn.Sequential(
*output_block(hidden_dim[-1], z0_dim)
)
self.logvar = nn.Sequential(
*output_block(hidden_dim[-1], z0_dim)
)
l = []
h_prev = z0_dim
hidden_dim.reverse()
for h in hidden_dim:
l += linear_block(h_prev, h)
h_prev = h
l += output_block(hidden_dim[-1], in_out_dim)
self.decoding = nn.Sequential(*l)
def forward(self, x=None, z=None):
if x is None: # Generate
assert not (z is None)
self.training = False
else: # Reconstruct
h = self.encoding(x)
mu = self.mu(h)
logvar = self.logvar(h)
std = torch.exp(logvar / 2)
z_stn = Variable(torch.randn_like(mu, requires_grad=False))
z = mu + std * z_stn
misc = (mu, logvar, std)
x_rec = self.decoding(z)
if self.training:
x_stn = self.decoding(z_stn)
return x_rec, x_stn, misc
return x_rec
终于有了数据集:
import numpy as np
import os
import torch.utils.data
import utils
class NTUSkeletonDataset(torch.utils.data.Dataset):
def __init__(self, root_dir, frames=100, pinpoint=0, pin_body=None, merge=None,
scale_each_body=True):
super().__init__()
self.root_dir = root_dir
self.files = os.listdir(root_dir)
self.num_frames = frames
self.pinpoint = pinpoint
self.pin_body = pin_body
self.merge = merge
self.scale_each_body = scale_each_body
def __len__(self):
return len(self.files)
def __getitem__(self, index):
fname = self.files[index]
# (# bodies, # frames, # keypoints, xy)
f = None
f = utils.read(os.path.join(self.root_dir, fname))
# f = self._valid_data(f)
# Pin to one of the keypoints
f = self._pin_skeleton(f)
# Align the frames
f = self._align_frames(f)
# assert f.shape[1] == self.num_frames, "wrong frames %d" % f.shape[1]
# At most 1
if self.pin_body is not None and self.scale_each_body:
for i in range(4):
f[i//2, ..., i % 2] /= np.abs(f[i//2, ..., i % 2]).max() + 1e-5
if self.merge == 1:
f = f.reshape((*f.shape[:2], 50))
elif self.merge == 2:
f = f.reshape((f.shape[0] * self.num_frames, 50))
return f
def _pin_skeleton(self, data):
if self.pin_body is None:
pin_xyz = data[..., self.pinpoint, :]
data -= pin_xyz[..., None, :]
else:
pin_xyz = data[self.pin_body, :, self.pinpoint, :]
data -= pin_xyz[None, :, None, :]
return data
def _align_frames(self, data):
num_frames0 = data.shape[1]
diff = num_frames0 - self.num_frames
if diff > 0: # Del
to_del = np.linspace(0, num_frames0, num=diff,
endpoint=False, dtype=np.int32)
return np.delete(data, to_del, axis=1)
elif diff < 0: # Interpolate
buf = np.zeros((2, self.num_frames, 25, 2),
dtype=np.float64)
utils.ins_frames(buf, data, -diff)
return buf
else: # Keep as the original
return data