RuntimeError:预期用于4维权重128 256的4维输入,但是却获得了大小为[32,128]的2维输入

时间:2020-08-06 01:43:08

标签: pytorch dimensions generative-adversarial-network

我正在使用条件GAN 作为基本模型来创建图像生成器。我遇到了一个错误,即使在网上搜索解决方案后,我也不知道如何调试。我不确定是否应该更改训练设置或对模型进行一些调整等等。任何有关做什么的帮助将不胜感激。

我正在使用的CGAN模型:

create clustered index idx on #TempPlAndCodeType (CodeTypeId)

用于初始化模型的代码:

class Generator(nn.Module):
    def __init__(self, classes, channels, img_size, latent_dim):
        super(Generator, self).__init__()
        self.classes = classes
        self.channels = channels
        self.img_size = img_size
        self.latent_dim = latent_dim
        self.img_shape = (self.channels, self.img_size, self.img_size)
        self.label_embedding = nn.Embedding(self.classes, self.classes) # process label information, behave as a lookup table

        self.model = nn.Sequential(
            *self._create_layer_1(self.latent_dim + self.classes, 128, False),
            *self._create_layer_2(128, 256),
            *self._create_layer_2(256, 512),
            *self._create_layer_2(512, 1024),
            nn.Linear(1024, int(np.prod(self.img_shape))),
            nn.Tanh()
        )

    def _create_layer_1(self, size_in, size_out, normalize=True):
        layers = [nn.Linear(size_in, size_out)]
        if normalize:
            layers.append(nn.BatchNorm1d(size_out))
        layers.append(nn.LeakyReLU(0.2, inplace=True))
        return layers

    def _create_layer_2(self, size_in, size_out, normalize=True):
        layers = [nn.ConvTranspose2d(size_in, size_out, 4, 2, 1, bias=False)]
        if normalize:
            layers.append(nn.BatchNorm1d(size_out))
        layers.append(nn.LeakyReLU(0.2, inplace=True))
        return layers

    def forward(self, noise, labels):
        z = torch.cat((self.label_embedding(labels), noise), -1)
        x = self.model(z)
        x = x.view(x.size(0), *self.img_shape)
        return x


class Discriminator(nn.Module):
    def __init__(self, classes, channels, img_size, latent_dim):
        super(Discriminator, self).__init__()
        self.classes = classes
        self.channels = channels
        self.img_size = img_size
        self.latent_dim = latent_dim
        self.img_shape = (self.channels, self.img_size, self.img_size)
        self.label_embedding = nn.Embedding(self.classes, self.classes)
        self.adv_loss = torch.nn.BCELoss()

        self.model = nn.Sequential(
            *self._create_layer_1(self.classes + int(np.prod(self.img_shape)), 1024, False, True),
            *self._create_layer_2(1024, 512, True, True),
            *self._create_layer_2(512, 256, True, True),
            *self._create_layer_2(256, 128, False, False),
            *self._create_layer_1(128, 1, False, False),
            nn.Sigmoid()
        )

    def _create_layer_1(self, size_in, size_out, drop_out=True, act_func=True):
        layers = [nn.Linear(size_in, size_out)]
        if drop_out:
            layers.append(nn.Dropout(0.4))
        if act_func:
            layers.append(nn.LeakyReLU(0.2, inplace=True))
        return layers

    def _create_layer_2(self, size_in, size_out, drop_out=True, act_func=True):
        layers = [nn.Conv2d(size_in, size_out, 4, 2, 1, bias=False)]
        if drop_out:
            layers.append(nn.Dropout(0.4))
        if act_func:
            layers.append(nn.LeakyReLU(0.2, inplace=True))
        return layers

    def forward(self, image, labels):
        x = torch.cat((image.view(image.size(0), -1), self.label_embedding(labels)), -1)
        return self.model(x)

    def loss(self, output, label):
        return self.adv_loss(output, label)

设置培训的代码:

class Model(object):
    def __init__(self,
                 name,
                 device,
                 data_loader,
                 classes,
                 channels,
                 img_size,
                 latent_dim,
                 style_dim=3):
        self.name = name
        self.device = device
        self.data_loader = data_loader
        self.classes = classes
        self.channels = channels
        self.img_size = img_size
        self.latent_dim = latent_dim
        self.style_dim = style_dim
        self.netG = cganG(self.classes, self.channels, self.img_size, self.latent_dim)
        self.netG.to(self.device)
        self.netD = cganD(self.classes, self.channels, self.img_size, self.latent_dim)
        self.netD.to(self.device)
        self.optim_G = None
        self.optim_D = None

    @property
    def generator(self):
        return self.netG

    @property
    def discriminator(self):
        return self.netD

    def create_optim(self, lr, alpha=0.5, beta=0.999):
        self.optim_G = torch.optim.Adam(filter(lambda p: p.requires_grad,
                                        self.netG.parameters()),
                                        lr=lr,
                                        betas=(alpha, beta))
        self.optim_D = torch.optim.Adam(filter(lambda p: p.requires_grad,
                                        self.netD.parameters()),
                                        lr=lr,
                                        betas=(alpha, beta))

    def _to_onehot(self, var, dim):
        res = torch.zeros((var.shape[0], dim), device=self.device)
        res[range(var.shape[0]), var] = 1.
        return res

    def train(self,
              epochs,
              log_interval=100,
              out_dir='',
              verbose=True):
        self.netG.train()
        self.netD.train()
        viz_z = torch.zeros((self.data_loader.batch_size, self.latent_dim), device=self.device)
        viz_noise = torch.randn(self.data_loader.batch_size, self.latent_dim, device=self.device)
        nrows = self.data_loader.batch_size // 8
        viz_label = torch.LongTensor(np.array([num for _ in range(nrows) for num in range(8)])).to(self.device)
        viz_onehot = self._to_onehot(viz_label, dim=self.classes)
        viz_style = torch.zeros((self.data_loader.batch_size, self.style_dim), device=self.device)
        total_time = time.time()
        for epoch in range(epochs):
            batch_time = time.time()
            for batch_idx, (data, target) in enumerate(self.data_loader):
                data, target = data.to(self.device), target.to(self.device)
                batch_size = data.size(0)
                real_label = torch.full((batch_size, 1), 1., device=self.device)
                fake_label = torch.full((batch_size, 1), 0., device=self.device)

                # Train G
                self.netG.zero_grad()
                z_noise = torch.randn(batch_size, self.latent_dim, device=self.device)
                x_fake_labels = torch.randint(0, self.classes, (batch_size,), device=self.device)
                x_fake = self.netG(z_noise, x_fake_labels)
                y_fake_g = self.netD(x_fake, x_fake_labels)
                g_loss = self.netD.loss(y_fake_g, real_label)
                g_loss.backward()
                self.optim_G.step()

                # Train D
                self.netD.zero_grad()
                y_real = self.netD(data, target)
                d_real_loss = self.netD.loss(y_real, real_label)
                y_fake_d = self.netD(x_fake.detach(), x_fake_labels)
                d_fake_loss = self.netD.loss(y_fake_d, fake_label)
                d_loss = (d_real_loss + d_fake_loss) / 2
                d_loss.backward()
                self.optim_D.step()

                if verbose and batch_idx % log_interval == 0 and batch_idx > 0:
                    print('Epoch {} [{}/{}] loss_D: {:.4f} loss_G: {:.4f} time: {:.2f}'.format(
                            epoch, batch_idx, len(self.data_loader),
                            d_loss.mean().item(),
                            g_loss.mean().item(),
                            time.time() - batch_time))
                    vutils.save_image(data, os.path.join(out_dir, 'real_samples.png'), normalize=True)
                    with torch.no_grad():
                        viz_sample = self.netG(viz_noise, viz_label)
                        vutils.save_image(viz_sample, os.path.join(out_dir, 'fake_samples_{}.png'.format(epoch)), nrow=8, normalize=True)
                    batch_time = time.time()   
            
            torch.save(self.netG.state_dict(), os.path.join(out_dir, 'netG_{}.pth'.format(epoch)))
            torch.save(self.netD.state_dict(), os.path.join(out_dir, 'netD_{}.pth'.format(epoch)))

            self.save_to(path=out_dir, name=self.name, verbose=False)
        if verbose:
            print('Total train time: {:.2f}'.format(time.time() - total_time))

设置:

def main():
    device = torch.device("cuda:0" if FLAGS.cuda else "cpu")
    if FLAGS.train:
        dataloader = torch.utils.data.DataLoader(
            dset.ImageFolder(FLAGS.data_dir, transforms.Compose([
                transforms.Resize(FLAGS.img_size),
                transforms.CenterCrop(FLAGS.img_size),
                transforms.ToTensor()
                ])),
                batch_size=FLAGS.batch_size,
                shuffle=True, 
                num_workers=4, 
                pin_memory=True
                )
        model = Model(FLAGS.model, device, dataloader, FLAGS.classes, FLAGS.channels, FLAGS.img_size, FLAGS.latent_dim)
        model.create_optim(FLAGS.lr)

        # Train
        print("Start training...\n")
        model.train(FLAGS.epochs, FLAGS.log_interval, FLAGS.out_dir, True)

if __name__ == '__main__':
    from utils import boolean_string
    parser.add_argument('--cuda', type=boolean_string, default=True, help='enable CUDA.')
    parser.add_argument('--train', type=boolean_string, default=True, help='train mode or eval mode.')
    parser.add_argument('--data_dir', type=str, default='../datasets', help='Directory for dataset.')
    parser.add_argument('--out_dir', type=str, default='output', help='Directory for output.')
    parser.add_argument('--epochs', type=int, default=800, help='number of epochs')
    parser.add_argument('--batch_size', type=int, default=32, help='size of batches')
    parser.add_argument('--lr', type=float, default=0.0002, help='learning rate')
    parser.add_argument('--latent_dim', type=int, default=62, help='latent space dimension')
    parser.add_argument('--classes', type=int, default=25, help='number of classes')
    parser.add_argument('--img_size', type=int, default=128, help='size of images')
    parser.add_argument('--channels', type=int, default=3, help='number of image channels')

图像尺寸作为输入:

PyTorch version: 1.1.0
CUDA version: 9.0.176

         Args         |    Type    |    Value
--------------------------------------------------
  cuda                |  bool      |  True
  train               |  bool      |  True
  resume              |  bool      |  False
  data_dir            |  str       |  ../datasets
  out_dir             |  str       |  output
  epochs              |  int       |  800
  batch_size          |  int       |  32
  lr                  |  float     |  0.0002
  latent_dim          |  int       |  62
  classes             |  int       |  25
  img_size            |  int       |  128
  channels            |  int       |  3

模型结构:

torch.Size([32, 3, 128, 128])

我得到的错误:

Generator(
  (label_embedding): Embedding(25, 25)
  (model): Sequential(
    (0): Linear(in_features=87, out_features=128, bias=True)
    (1): LeakyReLU(negative_slope=0.2, inplace)
    (2): ConvTranspose2d(128, 256, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1), bias=False)
    (3): BatchNorm1d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (4): LeakyReLU(negative_slope=0.2, inplace)
    (5): ConvTranspose2d(256, 512, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1), bias=False)
    (6): BatchNorm1d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (7): LeakyReLU(negative_slope=0.2, inplace)
    (8): ConvTranspose2d(512, 1024, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1), bias=False)
    (9): BatchNorm1d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (10): LeakyReLU(negative_slope=0.2, inplace)
    (11): Linear(in_features=1024, out_features=49152, bias=True)
    (12): Tanh()
  )
)

Discriminator(
  (label_embedding): Embedding(25, 25)
  (adv_loss): BCELoss()
  (model): Sequential(
    (0): Linear(in_features=49177, out_features=1024, bias=True)
    (1): LeakyReLU(negative_slope=0.2, inplace)
    (2): Conv2d(1024, 512, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1), bias=False)
    (3): Dropout(p=0.4)
    (4): LeakyReLU(negative_slope=0.2, inplace)
    (5): Conv2d(512, 256, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1), bias=False)
    (6): Dropout(p=0.4)
    (7): LeakyReLU(negative_slope=0.2, inplace)
    (8): Conv2d(256, 128, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1), bias=False)
    (9): Linear(in_features=128, out_features=1, bias=True)
    (10): Sigmoid()
  )
)

我正在使用具有3个通道和25个类的自己的图像数据集。我试图更改图像大小和内核大小,但仍然遇到相同的错误。在调试方面应该做的任何帮助将不胜感激。

1 个答案:

答案 0 :(得分:0)

问题实际上出在您的模型架构上。您正在尝试在线性完全连接的层之后放置一个conv2d层。_create_layer_1产生一维输出。您正在尝试将此1d输出馈送到需要多维输入的conv2d层。

从您的代码中,我一次就能感觉到最好的事情是从生成器类中完全删除“ _create_layer_2”函数,并使用_create_layer_1函数定义所有层(以便所有层都是完全连接的层) 。还要对您的鉴别器执行此操作

如果仍然需要使用conv2d。您应该将输入转换为2d张量以将conv2d整形。另外,您必须在最终线性层之前将2d张量展平为1d。或者,您可以放弃第一个线性nn.Linear图层,并完全从conv2d开始。

总结 在设计GAN时,您可能会有开发CNN的经验。关键是,您不能简单地将conv2d / conv图层与线性图层混合在一起,而无需使用适当的展平/整形。

欢呼