Question

我有一个如下所示的通用对抗网络（GAN）Keras代码。我的火车目录由512x512x3张图片组成。为什么print语句返回以下内容？如何使生成的图像也具有(374, 512, 512, 3)形状？

这是生成的图像的形状（374、32、32、3）

这是真实图像的形状（374、512、512、3）

import keras
from keras import layers
import numpy as np
import cv2
import os
from keras.preprocessing import image

latent_dimension = 512
height = 512
width = 512
channels = 3
iterations = 100
batch_size = 20
number_of_images = 374
real_images = []

# paths to the training and results directories
train_directory = '/train'
results_directory = '/results'

# GAN generator
generator_input = keras.Input(shape=(latent_dimension,))

# transform the input into a 16x16 128-channel feature map
x = layers.Dense(128*16*16)(generator_input)
x = layers.LeakyReLU()(x)
x = layers.Reshape((16,16,128))(x)

x = layers.Conv2D(256,5,padding='same')(x)
x = layers.LeakyReLU()(x)

# upsample to 32x32
x = layers.Conv2DTranspose(256,4,strides=2,padding='same')(x)
x = layers.LeakyReLU()(x)

x = layers.Conv2D(256,5,padding='same')(x)
x = layers.LeakyReLU()(x)
x = layers.Conv2D(256,5,padding='same')(x)
x = layers.LeakyReLU()(x)

# a 32x32 1-channel feature map is generated (i.e. shape of image)
x = layers.Conv2D(channels,7,activation='tanh',padding='same')(x)
# instantiae the generator model, which maps the input of shape (latent dimension) into an image of shape (32,32,1)
generator = keras.models.Model(generator_input,x)
generator.summary()

# GAN discriminator
discriminator_input = layers.Input(shape=(height,width,channels))

x = layers.Conv2D(128,3)(discriminator_input)
x = layers.LeakyReLU()(x)
x = layers.Conv2D(128,4,strides=2)(x)
x = layers.LeakyReLU()(x)
x = layers.Conv2D(128,4,strides=2)(x)
x = layers.LeakyReLU()(x)
x = layers.Conv2D(128,4,strides=2)(x)
x = layers.LeakyReLU()(x)
x = layers.Flatten()(x)

# dropout layer
x = layers.Dropout(0.4)(x)

# classification layer
x = layers.Dense(1,activation='sigmoid')(x)

# instantiate the discriminator model, and turn a (32,32,1) input
# into a binary classification decision (fake or real)
discriminator = keras.models.Model(discriminator_input,x)
discriminator.summary()

discriminator_optimizer = keras.optimizers.RMSprop(
    lr=0.0008,
    clipvalue=1.0,
    decay=1e-8)

discriminator.compile(optimizer=discriminator_optimizer, loss='binary_crossentropy')

# adversarial network
discriminator.trainable = False

gan_input = keras.Input(shape=(latent_dimension,))
gan_output = discriminator(generator(gan_input))
gan = keras.models.Model(gan_input,gan_output)

gan_optimizer = keras.optimizers.RMSprop(
    lr=0.0004,
    clipvalue=1.0,
    decay=1e-8)

gan.compile(optimizer=gan_optimizer,loss='binary_crossentropy')

for step in range(iterations):
    # sample random points in the latent space
    random_latent_vectors = np.random.normal(size=(number_of_images,latent_dimension))
    # decode the random latent vectors into fake images
    generated_images = generator.predict(random_latent_vectors)

    #i = start
    for root, dirs, files in os.walk(train_directory):
        for i in range(number_of_images):
            img = cv2.imread(root + '/' + str(i) + '.jpg')
            real_images.append(img)

    print 'This is the shape of the generated images'
    print np.array(generated_images).shape
    print 'This is the shape of the real images'
    print np.array(real_images).shape

    # combine fake images with real images
    combined_images = np.concatenate([generated_images,real_images])
    # assemble labels and discrminate between real and fake images
    labels = np.concatenate([np.ones((number_of_images,1)),np.zeros((number_of_images,1))])
    # add random noise to the labels
    labels = labels + 0.05 * np.random.random(labels.shape)
    # train the discriminator
    discriminator_loss = discriminator.train_on_batch(combined_images,labels)
    random_latent_vectors = np.random.normal(size=(number_of_images,latent_dimension))
    # assemble labels that classify the images as "real", which is not true
    misleading_targets = np.zeros((number_of_images,1))
    # train the generator via the GAN model, where the discriminator weights are frozen
    adversarial_loss = gan.train_on_batch(random_latent_vectors,misleading_targets)

    # save the model weights
    gan.save_weights('gan.h5')
    print'discriminator loss: ' 
    print discriminator_loss
    print 'adversarial loss: '
    print adversarial_loss
    img = image.array_to_img(generated_images[0] * 255.)
    img.save(os.path.join(results_directory,'generated_melanoma_image' + str(step) + '.png'))
    img = image.array_to_img(real_images[0] * 255.)
    img.save(os.path.join(results_directory,'real_melanoma_image' + str(step) + '.png'))

谢谢。

Answer 1

代码中的注释提示解决方案： # upsample to 32x32和 a 32x32 1-channel feature map is generated (i.e. shape of image)。

您可以通过在生成器中添加更多Conv2DTranspose层来对更大的图像尺寸进行升采样。

Answer 2

我注意到，为了使生成的图像的大小为512x512，可以如下编辑以下语句：

x = layers.Dense(128*256*256)(generator_input)

x = layers.Reshape((256,256,128))(x)

为什么生成的图像与该GAN中的真实图像具有不同的形状？

2 个答案: