我正在努力回到TensorFlow和Keras的最新发展。 我试着阅读是否使用tf.keras或keras,我得到的印象是,当使用TF作为后端时,它应该没什么关系,即使tf.keras的代码库与keras不同。
然而,使用Colaboratory和GPU后端,我试图关注https://github.com/eriklindernoren/Keras-GAN/blob/master/dcgan/dcgan.py
首先使用tf.keras。我收到很多警告:
WARNING:tensorflow:Discrepancy between trainable weights and collected trainable weights, did you set `model.trainable` without calling `model.compile` after ?
950 [D loss: 0.000024, acc.: 100.00%] [G loss: 0.000006]
(我确实按照这里的讨论:见https://github.com/keras-team/keras/issues/8585)
准确度为100%且损失非常小,但是,当我使用训练模型生成并可视化图片时,我只得到1000次迭代后没有任何结构的褪色灰色图片。延长时间不会改善事情。
如果我通过更改import语句来使用“native”keras,我只会收到一次警告
/usr/local/lib/python3.6/dist-packages/keras/engine/training.py:975: UserWarning: Discrepancy between trainable weights and collected trainable weights, did you set `model.trainable` without calling `model.compile` after ?
'Discrepancy between trainable weights and collected trainable'
0 [D loss: 1.239847, acc.: 39.06%] [G loss: 0.439355]
50 [D loss: 0.746472, acc.: 58.59%] [G loss: 1.010316]
...
950 [D loss: 0.672316, acc.: 58.59%] [G loss: 0.916285]
注意与tf.keras相比,准确度如何保持在大约60%和1左右的损失。即使经过1000次迭代(与上述相同),我也可以生成合理的数字,但不是很好,但大多数可以识别。
我错过了一些明显的东西(在这种情况下,我的道歉)?或者使用tf.keras不是一个好主意吗?
我在这里制作了笔记本的副本: https://colab.research.google.com/drive/1SKYDidvwkbQBH4cuT1YXSO_KUNM_OagJ
并粘贴下面的代码。非常感谢提前
from matplotlib import pyplot as plt
import numpy as np
import pandas as pd
import tensorflow as tf
#Keras from TensorFlow
from tensorflow.python.keras.models import Sequential, Model
from tensorflow.python.keras.layers import Input, Dense, Activation, Dropout, LeakyReLU, BatchNormalization
from tensorflow.python.keras.layers import Conv2D, MaxPooling2D, UpSampling2D, Flatten, Reshape, ZeroPadding2D
from tensorflow.python.keras.optimizers import Adam
#pure keras
#from keras.datasets import mnist
#from keras.layers import Input, Dense, Reshape, Flatten, Dropout
#from keras.layers import BatchNormalization, Activation, ZeroPadding2D
#from keras.layers.advanced_activations import LeakyReLU
#from keras.layers.convolutional import UpSampling2D, Conv2D
#from keras.models import Sequential, Model
#from keras.optimizers import Adam
from tensorflow.python.keras.datasets import mnist
print("all modules imported")
##
## DATA
## (already as 28x28 pix 2D numpy array)
(x_train, y_train), (x_test, y_test) = mnist.load_data()
plt.clf()
plt.imshow(x_train[0],cmap='gray',interpolation='none')
plt.title("Label: {}".format(y_train[0]))
def discriminator():
model = Sequential()
model.add(Conv2D(32, kernel_size=3, strides=2, input_shape=(28,28,1), padding="same"))
model.add(LeakyReLU(alpha=0.2))
model.add(Dropout(0.25))
model.add(Conv2D(64, kernel_size=3, strides=2, padding="same"))
model.add(ZeroPadding2D(padding=((0,1),(0,1))))
model.add(BatchNormalization(momentum=0.8))
model.add(LeakyReLU(alpha=0.2))
model.add(Dropout(0.25))
model.add(Conv2D(128, kernel_size=3, strides=2, padding="same"))
model.add(BatchNormalization(momentum=0.8))
model.add(LeakyReLU(alpha=0.2))
model.add(Dropout(0.25))
model.add(Conv2D(256, kernel_size=3, strides=1, padding="same"))
model.add(BatchNormalization(momentum=0.8))
model.add(LeakyReLU(alpha=0.2))
model.add(Dropout(0.25))
model.add(Flatten())
model.add(Dense(1, activation='sigmoid'))
#model.summary()
img = Input(shape=(28,28,1))
out = model(img)
return Model(img, out)
def generator():
model = Sequential()
model.add(Dense(128 * 7 * 7, activation="relu", input_dim=100))
model.add(Reshape((7, 7, 128)))
model.add(UpSampling2D())
model.add(Conv2D(128, kernel_size=3, padding="same"))
model.add(BatchNormalization(momentum=0.8))
model.add(Activation("relu"))
model.add(UpSampling2D())
model.add(Conv2D(64, kernel_size=3, padding="same"))
model.add(BatchNormalization(momentum=0.8))
model.add(Activation("relu"))
model.add(Conv2D(1, kernel_size=3, padding="same"))
model.add(Activation("tanh"))
#model.summary()
noise = Input(shape=(100,))
out = model(noise)
return Model(noise,out)
optimizer = Adam(0.0002, 0.5)
discr_ = discriminator()
discr_.compile(loss='binary_crossentropy', optimizer=optimizer,metrics=['accuracy'] )
n_disc_trainable = len(discr_.trainable_weights)
# generator takes noise and generates images
gen_ = generator()
n_gen_trainable = len(gen_.trainable_weights)
z = Input(shape=(100,))
img = gen_(z)
#combined model - only train generator
discr_.trainable = False
valid_image = discr_(img)
combined_model = Model(z,valid_image)
combined_model.compile(loss='binary_crossentropy', optimizer=optimizer)
#check number of trainable weights, both numbers should be the same
#see https://github.com/keras-team/keras/issues/8585
ok_discr = False
ok_combined = False
n_discr_collected = len(discr_._collected_trainable_weights)
print("Discriminator collected trainable weights {}, set trainable weights {}".format(n_discr_collected, n_disc_trainable))
if n_discr_collected == n_disc_trainable:
print("Disriminator weights OK")
ok_discr = True
n_combined = len(combined_model._collected_trainable_weights)
print("Combined collected trainable weights {}, set trainable weights {}".format(n_combined, n_gen_trainable))
if n_combined == n_gen_trainable:
print("Combined Model weights OK")
ok_combined = True
if ok_discr and ok_combined:
print("All weights OK, ignore warnings about model.compile / trainable")
batch_size = 64
# Rescale -1 to 1 --?
x_train = x_train / 127.5 - 1.
# dataset is in shape (28,28) but require (28,28,1) - last digit is #channels
x2 = np.expand_dims(x_train, axis=3)
# Adversarial ground truths
valid = np.ones((batch_size, 1))
fake = np.zeros((batch_size, 1))
for epoch in range(1000):
##
## train discriminator
##
# Select a random half of images
idx = np.random.randint(0, x2.shape[0], batch_size)
imgs = x2[idx]
# Sample noise and generate a batch of new images
noise = np.random.normal(0, 1, (batch_size, 100))
gen_imgs = gen_.predict(noise)
# Train the discriminator (real classified as ones and generated as zeros)
d_loss_real = discr_.train_on_batch(imgs, valid)
d_loss_fake = discr_.train_on_batch(gen_imgs, fake)
d_loss = 0.5 * np.add(d_loss_real, d_loss_fake)
##
## train generator
##
# Train the generator (wants discriminator to mistake images as real)
g_loss = combined_model.train_on_batch(noise, valid)
# Plot the progress
if epoch % 50 == 0:
print ("%d [D loss: %f, acc.: %.2f%%] [G loss: %f]" % (epoch, d_loss[0], 100*d_loss[1], g_loss))