K.gradients()返回“无”。 (带有tensorflow后端的Keras)。我的模型结构有问题吗?
我已经用文本的keras修改了WGAN-GP模型,请参考以下代码:[https://github.com/OctThe16th/WGAN-GP-with-keras-for-text/blob/master/Exploration/GenerativeAdverserialWGAN-GP.py]。
from keras.layers import Dense, Flatten, Input, BatchNormalization, Dropout, GRU, Bidirectional, Reshape, Activation
from keras.layers.noise import GaussianNoise
from keras.models import Model
from keras.layers.merge import _Merge
from keras.layers import Convolution1D, AveragePooling1D, ZeroPadding1D, UpSampling1D, concatenate, regularizers
from keras.layers import Embedding, Reshape, Lambda
from keras.layers import LSTM, multiply
from keras.optimizers import Adam, RMSprop
from random import randint
from keras.initializers import Constant
from keras import backend as K
from keras import layers
import numpy as np
from functools import partial
import pickle
import os
import tensorflow as tf
def wasserstein_loss(y_true, y_pred):
return K.mean(y_true * y_pred)
def gradient_penalty_loss(y_true, y_pred, averaged_samples):
'''Computes gradient penalty based on prediction and weighted real / fake samples'''
gradients = K.gradients(K.sum(y_pred), averaged_samples)
# compute the euclidean norm by squaring ...
gradients_sqr = K.square(gradients)
# ... summing over the rows ...
gradients_sqr_sum = K.sum(gradients_sqr)
# ... and sqrt
gradient_l2_norm = K.sqrt(gradients_sqr_sum)
# compute lambda * (1 - ||grad||)^2 still for each single sample
gradient_penalty = K.square(1 - gradient_l2_norm)
# return the mean as loss over all the batch samples
return K.mean(gradient_penalty)
class RandomWeightedAverage(_Merge):
def _merge_function(self, inputs):
weights = K.random_uniform((BATCH_SIZE, 1), dtype='float32')
return (weights * inputs[0]) + ((1 - weights) * inputs[1])
#K.argmax() is not differentiable, this function is defined to repalce K.argmax(), and it is differentiable.
def argmax(x):
y = tf.reduce_sum(tf.cumsum(tf.ones_like(x), axis=-1) * tf.exp(beta * x) / tf.reduce_sum(tf.exp(beta * x), axis=-1, keep_dims=True), axis=-1) - 1
return y
def generator_mod(softmax_shape):
person1_input = Input(shape=(1,), dtype='float32')
noise_input = Input(shape=(1, person_embedding_dim), dtype='float32')
relation_input = Input(shape=(1,), dtype='float32')
person1_embedded = Embedding(1,person_embedding_dim)(person1_input)
relation_embedded = Embedding(1,relation_embedding_dim)(relation_input)
embedded_layer = concatenate([person1_embedded, relation_embedded, noise_input], axis=1)
drop_1 = BatchNormalization(momentum=0.8)(embedded_layer)
x_1 = Convolution1D(filters=64, kernel_size=3, padding='same', activation='elu')(drop_1)
x_1 = BatchNormalization()(x_1)
x_1 = Convolution1D(filters=32, kernel_size=3, padding='same', activation='elu')(x_1)
x_1 = BatchNormalization()(x_1)
x_1 = Flatten()(x_1)
x_1 = Dense(32, activation='relu')(x_1)
######################################################################
person1_description = Input(shape=(max_sequence_length,), dtype='float32')
embedded_sequences1 = Embedding(len(word_index) + 1, word_embeddings_dim)(person1_description)
lstm_out1 = Bidirectional(LSTM(64))(embedded_sequences1)
attention_1 = Dense(128, activation='softmax', name='attention_vec')(lstm_out1)
attention_mul = multiply([lstm_out1, attention_1], name='attention_mul')
#####globel attention finish#####
x_2 = Dense(32, activation='relu')(attention_mul)
full_connected = multiply([x_1, x_2], name='full_connected')
x = Dense(softmax_shape, activation='softmax')(full_connected)
output = Lambda(argmax)(x)#shape (?,)
output = Lambda(K.expand_dims, arguments={'axis': -1})(output) #shape (?,1)
model = Model(inputs = [person1_input, noise_input, relation_input, person1_description], outputs = output)
return model
def discriminator_mod():
person1_input = Input(shape=(1,), dtype='float32')
person2_input = Input(shape=(1,), dtype='float32')
relation_input = Input(shape=(1,), dtype='float32')
person1_embedded = Embedding(1, person_embedding_dim)(person1_input)
person2_embedded = Embedding(1, person_embedding_dim)(person2_input)
relation_embedded = Embedding(len(word_index) + 1, word_embeddings_dim)(relation_input)
embedded_layer = concatenate([person1_embedded, person2_embedded, relation_embedded], axis=1)
drop_1 = Dropout(0.5)(embedded_layer)
x = Convolution1D(128, 1, activation='relu')(drop_1)
x = BatchNormalization()(x)
x = Convolution1D(filters=64, kernel_size=3, padding='same', activation='elu')(x)
x = BatchNormalization()(x)
x = Convolution1D(filters=32, kernel_size=3, padding='same', activation='elu')(x)
x = BatchNormalization()(x)
x = Flatten()(x)
x = Dense(32, activation='relu')(x)
auxiliary_input1 = Input(shape=(max_sequence_length,), dtype='float32', name='aux_input1')
embedded_sequences1 = Embedding(len(word_index) + 1, word_embeddings_dim)(auxiliary_input1)
lstm_out1 = Bidirectional(LSTM(64))(embedded_sequences1)
lstm_drop1 = Dropout(0.5)(lstm_out1)
auxiliary_input2 = Input(shape=(max_sequence_length,), dtype='float32', name='aux_input2')
embedded_sequences2 = Embedding(len(word_index) + 1, word_embeddings_dim)(auxiliary_input2)
lstm_out2 = Bidirectional(LSTM(64))(embedded_sequences2)
lstm_drop2 = Dropout(0.5)(lstm_out2)
lstm_drop = multiply([lstm_drop1, lstm_drop2])
#####globel attention start#####
attention_1 = Dense(128, activation='softmax', name='attention_vec')(lstm_drop)
attention_mul = multiply([lstm_drop, attention_1], name='attention_mul')
#####globel attention finish#####
# attention_mul = Flatten()(attention_mul)
attention_mul = Dense(32, activation='relu')(attention_mul)
#####globel attention start#####
full_connected = multiply([x, attention_mul], name='full_connected')
attention_2 = Dense(32, activation='softmax')(full_connected)
attention_final = multiply([full_connected, attention_2])
#####globel attention finish#####
dense_layer = Dense(16, activation='relu')(attention_final)
main_output = Dense(1, activation='tanh', name='main_output')(dense_layer)
model = Model(inputs=[person1_input, person2_input, relation_input, auxiliary_input1, auxiliary_input2], outputs= main_output)
return model
def train(from_save_point=False, suffix='rnn'):
X_train = np.random.randn(10,243)
generator = generator_mod(person_total)
discriminator = discriminator_mod()
generator.summary()
discriminator.summary()
for layer in discriminator.layers:
layer.trainable = False
discriminator.trainable = False
for layer in discriminator.layers:
layer.trainable = False
discriminator.trainable = False
person1 = Input(shape=(1,))
relation = Input(shape=(1,))
seed = Input(shape=(1,person_embedding_dim))
person1_description = Input(shape=(max_sequence_length,))
genarated_person2 = generator([person1, seed, relation, person1_description])
person2_description = Input(shape=(max_sequence_length,))
discriminator_layers_for_generator = discriminator([person1, genarated_person2, relation, person1_description, person2_description])
generator_model = Model(inputs=[person1, relation, seed, person1_description, person2_description], outputs=[discriminator_layers_for_generator])
generator_model.compile(optimizer= RMSprop(lr=0.0001, rho=0.9), loss=wasserstein_loss)
for layer in discriminator.layers:
layer.trainable = True
for layer in generator.layers:
layer.trainable = False
discriminator.trainable = True
generator.trainable = False
person2 = Input(shape=(1,))
generated_samples_for_discriminator = generator([person1, seed, relation, person1_description])
discriminator_output_from_generator = discriminator([person1, generated_samples_for_discriminator, relation, person1_description, person2_description])
discriminator_output_from_real_samples = discriminator([person1, person2, relation, person1_description, person2_description])
averaged_samples = RandomWeightedAverage()([person2, generated_samples_for_discriminator])
averaged_samples_out = discriminator([person1, averaged_samples, relation, person1_description, person2_description])
partial_gp_loss = partial(gradient_penalty_loss, averaged_samples= averaged_samples)
partial_gp_loss.__name__ = 'gradient_penalty'
discriminator_model = Model(inputs=[person1, person2, relation, person1_description, person2_description, seed], outputs=[discriminator_output_from_real_samples, discriminator_output_from_generator, averaged_samples_out])
# averaged_samples_out
discriminator_model.compile(optimizer=RMSprop(lr=0.0001, rho=0.9), loss=[wasserstein_loss, wasserstein_loss, partial_gp_loss])
# partial_gp_loss
positive_y = np.ones((BATCH_SIZE, 1), dtype=np.float32)
negative_y = -positive_y
dummy_y = np.zeros((BATCH_SIZE, 1), dtype=np.float32)
if __name__ == "__main__":
# convert_text_to_nptensor(cutoff=50, min_frequency_words=100000, max_lines=20000000)
train(from_save_point=False, suffix='Google')
但是,当代码执行到此行时: 渐变= K.gradients(K.sum(y_pred),averaged_samples) 错误消息是: 'TypeError:无法将类型的对象转换为Tensor。内容:[无]。考虑将元素强制转换为受支持的类型'
有人可以帮助我吗?非常感谢你!