需要帮助来了解CGAN中的标签输入

时间:2019-03-20 09:53:54

标签: machine-learning pytorch dcgan

我正在尝试实现CGAN。我了解在卷积生成器和鉴别器模型中,您通过添加代表标签的深度来增加输入的体积。因此,如果您的数据中有10个类别,则生成器和鉴别器都将基本深度+ 10作为其输入量。

但是,我正在在线阅读各种实现,但似乎找不到他们真正在哪里获得此标签。当然,不能不能对CGAN进行监督,因为您需要获取要输入的标签。例如在cifar10中,如果您要在青蛙的真实图像上训练鉴别器,则需要使用“青蛙”注释。

这是我正在研究的代码之一:

class CGAN(object):
def __init__(self, args):
    # parameters
    self.epoch = args.epoch
    self.batch_size = args.batch_size
    self.save_dir = args.save_dir
    self.result_dir = args.result_dir
    self.dataset = args.dataset
    self.log_dir = args.log_dir
    self.gpu_mode = args.gpu_mode
    self.model_name = args.gan_type
    self.input_size = args.input_size
    self.z_dim = 62
    self.class_num = 10
    self.sample_num = self.class_num ** 2

    # load dataset
    self.data_loader = dataloader(self.dataset, self.input_size, self.batch_size)
    data = self.data_loader.__iter__().__next__()[0]

    # networks init
    self.G = generator(input_dim=self.z_dim, output_dim=data.shape[1], input_size=self.input_size, class_num=self.class_num)
    self.D = discriminator(input_dim=data.shape[1], output_dim=1, input_size=self.input_size, class_num=self.class_num)
    self.G_optimizer = optim.Adam(self.G.parameters(), lr=args.lrG, betas=(args.beta1, args.beta2))
    self.D_optimizer = optim.Adam(self.D.parameters(), lr=args.lrD, betas=(args.beta1, args.beta2))

    if self.gpu_mode:
        self.G.cuda()
        self.D.cuda()
        self.BCE_loss = nn.BCELoss().cuda()
    else:
        self.BCE_loss = nn.BCELoss()

    print('---------- Networks architecture -------------')
    utils.print_network(self.G)
    utils.print_network(self.D)
    print('-----------------------------------------------')

    # fixed noise & condition
    self.sample_z_ = torch.zeros((self.sample_num, self.z_dim))
    for i in range(self.class_num):
        self.sample_z_[i*self.class_num] = torch.rand(1, self.z_dim)
        for j in range(1, self.class_num):
            self.sample_z_[i*self.class_num + j] = self.sample_z_[i*self.class_num]

    temp = torch.zeros((self.class_num, 1))
    for i in range(self.class_num):
        temp[i, 0] = i

    temp_y = torch.zeros((self.sample_num, 1))
    for i in range(self.class_num):
        temp_y[i*self.class_num: (i+1)*self.class_num] = temp

    self.sample_y_ = torch.zeros((self.sample_num, self.class_num)).scatter_(1, temp_y.type(torch.LongTensor), 1)
    if self.gpu_mode:
        self.sample_z_, self.sample_y_ = self.sample_z_.cuda(), self.sample_y_.cuda()

def train(self):
    self.train_hist = {}
    self.train_hist['D_loss'] = []
    self.train_hist['G_loss'] = []
    self.train_hist['per_epoch_time'] = []
    self.train_hist['total_time'] = []

    self.y_real_, self.y_fake_ = torch.ones(self.batch_size, 1), torch.zeros(self.batch_size, 1)
    if self.gpu_mode:
        self.y_real_, self.y_fake_ = self.y_real_.cuda(), self.y_fake_.cuda()

    self.D.train()
    print('training start!!')
    start_time = time.time()
    for epoch in range(self.epoch):
        self.G.train()
        epoch_start_time = time.time()
        for iter, (x_, y_) in enumerate(self.data_loader):
            if iter == self.data_loader.dataset.__len__() // self.batch_size:
                break

            z_ = torch.rand((self.batch_size, self.z_dim))
            y_vec_ = torch.zeros((self.batch_size, self.class_num)).scatter_(1, y_.type(torch.LongTensor).unsqueeze(1), 1)
            y_fill_ = y_vec_.unsqueeze(2).unsqueeze(3).expand(self.batch_size, self.class_num, self.input_size, self.input_size)
            if self.gpu_mode:
                x_, z_, y_vec_, y_fill_ = x_.cuda(), z_.cuda(), y_vec_.cuda(), y_fill_.cuda()

            # update D network
            self.D_optimizer.zero_grad()

            D_real = self.D(x_, y_fill_)
            D_real_loss = self.BCE_loss(D_real, self.y_real_)

            G_ = self.G(z_, y_vec_)
            D_fake = self.D(G_, y_fill_)
            D_fake_loss = self.BCE_loss(D_fake, self.y_fake_)

            D_loss = D_real_loss + D_fake_loss
            self.train_hist['D_loss'].append(D_loss.item())

            D_loss.backward()
            self.D_optimizer.step()

            # update G network
            self.G_optimizer.zero_grad()

            G_ = self.G(z_, y_vec_)
            D_fake = self.D(G_, y_fill_)
            G_loss = self.BCE_loss(D_fake, self.y_real_)
            self.train_hist['G_loss'].append(G_loss.item())

            G_loss.backward()
            self.G_optimizer.step()

似乎y_vec_和y_fill_是图像的标签,但是在y_fill_用来为鉴别器标记真实图像的实例中,它等于y_fill_ = y_vec_.unsqueeze(2).unsqueeze(3).expand(self.batch_size, self.class_num, self.input_size, self.input_size)

似乎没有从数据集中获取标签上的任何信息吗? 如何为鉴别器提供正确的标签?

谢谢!

1 个答案:

答案 0 :(得分:2)

y_fill_基于y_vec_,而y_基于scatter,因此他们正在从小批量读取正确的标签信息。您可能会对describe('MyComponent Service Integration', () => { let component: MyComponent; let fixture: ComponentFixture<MyComponent>; class MockService{ getData(){ return of(data); //data imported from json file } } beforeEach(() => { TestBed.configureTestingModule({ declarations: [ MyComponent], imports:[HttpClientTestingModule], providers:[{provide : MyService, useClass: MockService}], }) .compileComponents(); }); beforeEach(() => { fixture = TestBed.createComponent(BalanceComponent); component = fixture.componentInstance; fixture.detectChanges(); }); it('should create a my component with data from the service', () => { ngOnInit(); expect(component.data).toBe(data); } 操作感到困惑,基本上,代码的作用是将标签转移为一键编码