我目前正在编写 GAN 以生成序列。当独立训练时,生成器和鉴别器都在工作。一旦我将两者结合到完整的 GAN 模型中(用冻结的判别器权重训练生成器),就会出现以下错误,并且生成器和判别器之间的图形似乎没有连接。
<块引用>ValueError: 没有为任何变量提供梯度:['generator_lstm/kernel:0', 'generator_lstm/recurrent_kernel:0', 'generator_lstm/bias:0', 'generator_softmax/kernel:0', 'generator_softmax/bias: 0']。
起初我以为是我的自定义激活函数导致了这个问题。但由于它独立工作,我认为两个“子模型”都没有正确连接。我不确定它是否重要,但在张量板图中,两个模型之间没有联系。
错误发生在 train() 函数的最后一行。 TF 2.1和2.4.1版本我已经试过了,没什么区别。
# softargmax and build[...]() functions are located in my "gan" python module
# custom softargmax implementation
@tf.function
def softargmax(values, beta = 1000000.0):
# tf.range over all possible indices
range_tensor = tf.range(54, dtype=tf.float32)
range_tensor = tf.reshape(range_tensor, [-1, 1])
# softmax activation of (input*beta)
values = tf.cast(values, dtype=tf.float32)
beta = tf.cast(beta, dtype=tf.float32)
softmax = tf.nn.softmax(((values*beta) - tf.reduce_max(values*beta)))
return softmax @ range_tensor
callable_softargmax = tf.function(softargmax)
get_custom_objects().update({'custom_activation': Activation(callable_softargmax)})
def build_generator(z_dim, seq_len, num_of_words):
gen_input = Input(shape=(z_dim,), name="generator_input")
gen_repeat = RepeatVector(seq_len, name="generator_repeat")(gen_input)
gen_lstm = LSTM(128, activation="relu", return_sequences=True, name="generator_lstm")(gen_repeat)
gen_softmax = Dense(num_of_words, name="generator_softmax")(gen_lstm)
#gen_activation = tf.keras.layers.Activation(callable_softargmax)(gen_softmax)
gen_soft_argmax = Lambda(callable_softargmax, name="generator_soft_argmax")(gen_softmax)
generator = Model(gen_input, gen_soft_argmax, name="generator_model")
generator.summary()
return generator
def build_discriminator(seq_len, num_of_words, embedding_len):
embedding = np.load(PATH + MODELS + "embedding_ae.npy")
discriminator = Sequential(name="gan_discriminator")
discriminator.add(tf.keras.layers.InputLayer(input_shape=(seq_len,1), name="discriminator_input"))
discriminator.add(Reshape(target_shape=[18,], dtype=tf.float32, name="discriminator_reshape"))
discriminator.add(Embedding(input_dim=num_of_words, output_dim=embedding_len, input_length=seq_len, mask_zero=False,
embeddings_initializer=tf.keras.initializers.Constant(embedding), trainable=False, name="discriminator_emb"))
discriminator.add(Bidirectional(LSTM(128, activation="tanh", recurrent_activation="sigmoid", recurrent_dropout=0, unroll=False, use_bias=True,
return_sequences=True), name="discriminator_lstm"))
discriminator.add(Dropout(0.2, name="discriminator_dropout"))
discriminator.add(LSTM(128, activation="tanh", recurrent_activation="sigmoid", recurrent_dropout=0, unroll=False, use_bias=True,
name="discriminator_lstm2"))
discriminator.add(Dropout(0.2, name="discriminator_dropout2"))
discriminator.add(Dense(1, activation="sigmoid", name="discriminator_output"))
discriminator.summary()
return discriminator
def build_gan(generator, discriminator):
gan = Sequential(name="gan")
gan.add(generator)
gan.add(discriminator)
return gan
def train(train_data, generator, discriminator, gan, iterations, batch_size, z_dim):
real = np.ones((batch_size, 1))
fake = np.zeros((batch_size, 1))
for iteration in range(iterations):
idx = np.random.randint(0, train_data.shape[0], batch_size)
train_samples = train_data[idx]
train_samples = np.reshape(train_samples, [batch_size, 18, 1])
# train discriminator
z = np.random.normal(0, 1, (batch_size, z_dim))
z = np.reshape(z, [batch_size, z_dim])
gen_samples = generator.predict(z)
d_loss_real = discriminator.train_on_batch(train_samples, real)
d_loss_fake = discriminator.train_on_batch(gen_samples, fake)
d_loss, accuracy, = 0.5 * np.add(d_loss_real, d_loss_fake)
# train generator
z = np.random.normal(0, 1, (batch_size, z_dim))
gen_samples = generator.predict(z)
g_loss = gan.train_on_batch(z, real)
# compiling and running models in main.py
discriminator = gan.build_discriminator(seq_len=18, num_of_words=54, embedding_len=200)
discriminator.compile(loss="binary_crossentropy", optimizer=tf.keras.optimizers.Adam(lr=0.001), metrics=["accuracy"])
discriminator.trainable = False
generator = gan.build_generator(z_dim, seq_len=18, num_of_words=54)
gan_model = gan.build_gan(generator, discriminator)
gan_model.compile(loss="binary_crossentropy", optimizer=tf.keras.optimizers.Adam(lr=0.001))
gan.train(train_data=train, generator=generator, discriminator=discriminator,
gan=gan_model, iterations=iterations, batch_size=batch_size, z_dim=z_dim)
Model: "gan_discriminator"
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
discriminator_reshape (Resha (None, 18) 0
_________________________________________________________________
discriminator_emb (Embedding (None, 18, 200) 10800
_________________________________________________________________
discriminator_lstm (Bidirect (None, 18, 256) 336896
_________________________________________________________________
discriminator_dropout (Dropo (None, 18, 256) 0
_________________________________________________________________
discriminator_lstm2 (LSTM) (None, 128) 197120
_________________________________________________________________
discriminator_dropout2 (Drop (None, 128) 0
_________________________________________________________________
discriminator_output (Dense) (None, 1) 129
=================================================================
Total params: 544,945
Trainable params: 534,145
Non-trainable params: 10,800
_________________________________________________________________
Model: "generator_model"
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
generator_input (InputLayer) [(None, 128)] 0
_________________________________________________________________
generator_repeat (RepeatVect (None, 18, 128) 0
_________________________________________________________________
generator_lstm (LSTM) (None, 18, 128) 131584
_________________________________________________________________
generator_softmax (Dense) (None, 18, 54) 6966
_________________________________________________________________
generator_soft_argmax (Lambd (None, 18, 1) 0
=================================================================
Total params: 138,550
Trainable params: 138,550
Non-trainable params: 0
_________________________________________________________________
Model: "gan"
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
generator_model (Model) (None, 18, 1) 138550
_________________________________________________________________
gan_discriminator (Sequentia (None, 1) 544945
=================================================================
Total params: 683,495
Trainable params: 138,550
Non-trainable params: 544,945
_________________________________________________________________
您对模型有什么建议吗?可能有什么问题?