我正在尝试使用图像形状(64,64,3)在"Labeled Faces in the Wild"数据集上训练DCGAN。经过1000个或更多时期的训练后,它只会产生噪音。 代码如下:
class GAN():
img_rows = 64
img_cols = 64
channels = 3
optimizer_G = Adam(0.0002, 0.5)
optimizer_D = Adam(0.0002, 0.5)
def __init__(self):
# Initialize
self.img_rows = img_rows
self.img_cols = img_cols
self.channels = channels
self.img_shape = (self.img_rows, self.img_cols, self.channels)
self.latent_dim = 100
# Build the discriminator
self.discriminator = self.build_discriminator()
# Build the generator
self.generator = self.build_generator()
# The generator takes noise as input and generates imgs
z = Input(shape=(100,))
img = self.generator(z)
# For the combined model we will only train the generator
self.discriminator.trainable = False
# The discriminator takes generated images as input and determines validity
validity = self.discriminator(img)
# The combined model (stacked generator and discriminator)
# Trains the generator to fool the discriminator
self.combined = Model(inputs=z, outputs=validity)
self.combined.compile(loss='binary_crossentropy', optimizer=optimizer_G)
def build_generator(self):
input_shape = (self.img_rows, self.img_cols, self.channels)
mom=0.8
generator = Sequential()
generator.add(Dense(units= 512*4*4, kernel_initializer='glorot_uniform', input_dim=100))
generator.add(Reshape(target_shape=(4, 4, 512)))
generator.add(BatchNormalization(momentum=0.5))
generator.add(Activation('relu'))
generator.add(Conv2DTranspose(filters=256, kernel_size=(5, 5), strides=(2, 2), padding='same',
data_format='channels_last',
kernel_initializer='glorot_uniform'))
generator.add(BatchNormalization(momentum=mom))
generator.add(Activation('relu'))
generator.add(Conv2DTranspose(filters=128, kernel_size=(5, 5), strides=(2, 2), padding='same',
data_format='channels_last',
kernel_initializer='glorot_uniform'))
generator.add(BatchNormalization(momentum=mom))
generator.add(Activation('relu'))
generator.add(Conv2DTranspose(filters=64, kernel_size=(5, 5), strides=(2, 2), padding='same',
data_format='channels_last',
kernel_initializer='glorot_uniform'))
generator.add(BatchNormalization(momentum=mom))
generator.add(Activation('relu'))
generator.add(Conv2DTranspose(filters=3, kernel_size=(5, 5), strides=(2, 2), padding='same',
data_format='channels_last',
kernel_initializer='glorot_uniform'))
generator.add(Activation('tanh'))
print("Generator: ")
generator.summary()
# optimizer = Adam(lr=0.00015, beta_1=0.5)
generator.compile(loss='binary_crossentropy', optimizer=optimizer_D, metrics=None)
return generator
def build_discriminator(self):
drp = 0.5
mom=0.8
discriminator = Sequential()
discriminator.add(Conv2D(filters=64, kernel_size=(5, 5), strides=(2, 2), padding='same',
data_format='channels_last',
kernel_initializer='glorot_uniform',
input_shape=self.img_shape))
discriminator.add(LeakyReLU(0.2))
discriminator.add(Dropout(drp))
discriminator.add(Conv2D(filters=128, kernel_size=(5, 5), strides=(2, 2), padding='same',
data_format='channels_last',
kernel_initializer='glorot_uniform'))
discriminator.add(BatchNormalization(momentum=mom))
discriminator.add(LeakyReLU(0.2))
discriminator.add(Dropout(drp))
discriminator.add(Conv2D(filters=256, kernel_size=(5, 5), strides=(2, 2), padding='same',
data_format='channels_last',
kernel_initializer='glorot_uniform'))
discriminator.add(BatchNormalization(momentum=mom))
discriminator.add(LeakyReLU(0.2))
discriminator.add(Dropout(drp))
discriminator.add(Conv2D(filters=512, kernel_size=(5, 5), strides=(2, 2), padding='same',
data_format='channels_last',
kernel_initializer='glorot_uniform'))
discriminator.add(BatchNormalization(momentum=mom))
discriminator.add(LeakyReLU(0.2))
discriminator.add(Dropout(drp))
discriminator.add(Flatten())
discriminator.add(Dense(1))
discriminator.add(Activation('sigmoid'))
print("Discriminator: ")
discriminator.summary()
# optimizer = Adam(lr=0.0002, beta_1=0.5)
discriminator.compile(loss='binary_crossentropy', optimizer=optimizer_D, metrics=['accuracy'])
return discriminator
def load_data(self):
path = 'faces1'
num_samples=len(os.listdir(path))
print(num_samples)
imlist = os.listdir(path)
immatrix = np.array([np.array(Image.open(path + '/' + im2).resize((img_rows, img_cols))).flatten() for im2 in imlist], 'f')
label=np.ones((num_samples,),dtype = int)
label[0:] = 0
train_data = [immatrix,label]
nb_classes = 2
X_train, X_test, y_train, y_test = train_test_split(train_data[0], train_data[1], test_size=0.1, random_state=4)
X_train = X_train.reshape(X_train.shape[0], img_rows, img_cols, channels)
X_test = X_test.reshape(X_test.shape[0], img_rows, img_cols, channels)
X_train = X_train.astype('float32')
X_test = X_test.astype('float32')
X_train /= 255
X_test /= 255
print('X_train shape:', X_train.shape)
print(X_train.shape[0], 'train samples')
Y_train = np_utils.to_categorical(y_train, nb_classes)
Y_test = np_utils.to_categorical(y_test, nb_classes)
return X_train, X_test, Y_train, Y_test
def train(self, epochs, batch_size=128, sample_interval=50):
# Load the dataset
X_train, _, _, _ = self.load_data()
# Rescale -1 to 1
X_train = X_train / 127.5 - 1.
# print(X_train.shape," 1")
# X_train = np.expand_dims(X_train, axis=3)
# print(X_train.shape," 2")
# print(X_train)
# Adversarial ground truths
valid = np.ones((batch_size, 1))
fake = np.zeros((batch_size, 1))
d_loss_all, g_loss_all = [], []
for epoch in range(epochs):
# Train Discriminator
# Select a random batch of images
idx = np.random.randint(0, X_train.shape[0], batch_size)
imgs = X_train[idx]
# print(idx)
noise = np.random.normal(0, 1, (batch_size, 100))
# Generate a batch of new images
gen_imgs = self.generator.predict(noise)
# Train the discriminator
d_loss_real = self.discriminator.train_on_batch(imgs, valid)
d_loss_fake = self.discriminator.train_on_batch(gen_imgs, fake)
d_loss = 0.5 * np.add(d_loss_real, d_loss_fake)
# Train Generator
noise = np.random.normal(0, 1, (batch_size, 100))
# Train the generator (to have the discriminator label samples as valid)
g_loss = self.combined.train_on_batch(noise, valid)
d_loss_all.append(d_loss[0])
g_loss_all.append(g_loss)
# Plot the progress
print ("%d [D loss: %f, acc.: %.2f%%] [G loss: %f]" % (epoch, d_loss[0], 100*d_loss[1], g_loss))
# If at save interval => save generated image samples
if epoch % sample_interval == 0:
self.sample_images(epoch)
fig, ax = plt.subplots()
plt.plot(d_loss_all, label='Discriminator', alpha=0.5)
plt.plot(g_loss_all, label='Generator', alpha=0.5)
plt.title("Training Losses")
plt.legend()
if epoch == 0:
plt.legend()
plt.pause(0.0000000001)
plt.show()
plt.savefig('trainingLossPlot.png')
def sample_images(self, epoch):
r, c = 5, 5
noise = np.random.normal(0, 1, (r * c, 100))
gen_imgs = self.generator.predict(noise)
# Rescale images 0 - 1
gen_imgs = 0.5 * gen_imgs + 0.5
fig, axs = plt.subplots(r, c)
cnt = 0
for i in range(r):
for j in range(c):
axs[i,j].imshow(gen_imgs[cnt, :,:,0], cmap='gray')
axs[i,j].axis('off')
cnt += 1
fig.savefig("images/%d.png" % epoch)
plt.show()
plt.close()
gan = GAN()
gan.train(epochs=1000, batch_size=128, sample_interval=100)
损失情节类似于this
1000个纪元后生成的图像为these
损失是发电机和鉴别器的收敛仍在产生噪声。需要做些什么调整?
答案 0 :(得分:0)
您是否在imshow()中尝试过gen_imgs[cnt, :,:,0].astype(np.uint8)
?
如果不是,那可能是网络设计...
祝你好运!