K.gradients返回[无]

时间:2019-01-07 15:07:27

标签: python-3.x keras

K.gradients()返回“无”。 (带有tensorflow后端的Keras)。我的模型结构有问题吗?

我已经用文本的keras修改了WGAN-GP模型,请参考以下代码:[https://github.com/OctThe16th/WGAN-GP-with-keras-for-text/blob/master/Exploration/GenerativeAdverserialWGAN-GP.py]

from keras.layers import Dense, Flatten, Input, BatchNormalization, Dropout, GRU, Bidirectional, Reshape, Activation
from keras.layers.noise import GaussianNoise
from keras.models import Model
from keras.layers.merge import _Merge
from keras.layers import Convolution1D, AveragePooling1D, ZeroPadding1D, UpSampling1D, concatenate, regularizers
from keras.layers import Embedding, Reshape, Lambda
from keras.layers import LSTM, multiply
from keras.optimizers import Adam, RMSprop
from random import randint
from keras.initializers import Constant
from keras import backend as K
from keras import layers
import numpy as np
from functools import partial
import pickle
import os
import tensorflow as tf

def wasserstein_loss(y_true, y_pred):
    return K.mean(y_true * y_pred)

def gradient_penalty_loss(y_true, y_pred, averaged_samples):
'''Computes gradient penalty based on prediction and weighted real / fake samples'''
    gradients = K.gradients(K.sum(y_pred), averaged_samples)
    # compute the euclidean norm by squaring ...
    gradients_sqr = K.square(gradients)
    #   ... summing over the rows ...
    gradients_sqr_sum = K.sum(gradients_sqr)
    #   ... and sqrt
    gradient_l2_norm = K.sqrt(gradients_sqr_sum)
    # compute lambda * (1 - ||grad||)^2 still for each single sample
    gradient_penalty = K.square(1 - gradient_l2_norm)
    # return the mean as loss over all the batch samples
    return K.mean(gradient_penalty)



class RandomWeightedAverage(_Merge):
    def _merge_function(self, inputs):
        weights = K.random_uniform((BATCH_SIZE, 1), dtype='float32')
        return (weights * inputs[0]) + ((1 - weights) * inputs[1])

#K.argmax() is not differentiable, this function is defined to repalce K.argmax(), and it is differentiable.
def argmax(x):
    y = tf.reduce_sum(tf.cumsum(tf.ones_like(x), axis=-1) * tf.exp(beta * x) / tf.reduce_sum(tf.exp(beta * x), axis=-1, keep_dims=True), axis=-1) - 1
    return y

def generator_mod(softmax_shape):
    person1_input = Input(shape=(1,), dtype='float32')
    noise_input = Input(shape=(1, person_embedding_dim), dtype='float32')
    relation_input = Input(shape=(1,), dtype='float32')
    person1_embedded = Embedding(1,person_embedding_dim)(person1_input)
    relation_embedded = Embedding(1,relation_embedding_dim)(relation_input)
    embedded_layer = concatenate([person1_embedded, relation_embedded, noise_input], axis=1)
    drop_1 = BatchNormalization(momentum=0.8)(embedded_layer)
    x_1 = Convolution1D(filters=64, kernel_size=3, padding='same', activation='elu')(drop_1)
    x_1 = BatchNormalization()(x_1)
    x_1 = Convolution1D(filters=32, kernel_size=3, padding='same', activation='elu')(x_1)
    x_1 = BatchNormalization()(x_1)
    x_1 = Flatten()(x_1)
    x_1 = Dense(32, activation='relu')(x_1)
######################################################################
    person1_description = Input(shape=(max_sequence_length,), dtype='float32')
    embedded_sequences1 = Embedding(len(word_index) + 1, word_embeddings_dim)(person1_description)
    lstm_out1 = Bidirectional(LSTM(64))(embedded_sequences1)
    attention_1 = Dense(128, activation='softmax', name='attention_vec')(lstm_out1)
    attention_mul = multiply([lstm_out1, attention_1], name='attention_mul')
    #####globel attention finish#####
    x_2 = Dense(32, activation='relu')(attention_mul)
    full_connected = multiply([x_1, x_2], name='full_connected')
    x = Dense(softmax_shape, activation='softmax')(full_connected)
    output = Lambda(argmax)(x)#shape (?,)
    output = Lambda(K.expand_dims, arguments={'axis': -1})(output) #shape (?,1)

    model = Model(inputs = [person1_input, noise_input, relation_input, person1_description], outputs = output)
    return model

def discriminator_mod():
    person1_input = Input(shape=(1,), dtype='float32')
    person2_input = Input(shape=(1,), dtype='float32')
    relation_input = Input(shape=(1,), dtype='float32')

    person1_embedded = Embedding(1, person_embedding_dim)(person1_input)
    person2_embedded = Embedding(1, person_embedding_dim)(person2_input)
    relation_embedded = Embedding(len(word_index) + 1, word_embeddings_dim)(relation_input)
    embedded_layer = concatenate([person1_embedded, person2_embedded, relation_embedded], axis=1)
    drop_1 = Dropout(0.5)(embedded_layer)
    x = Convolution1D(128, 1, activation='relu')(drop_1)
    x = BatchNormalization()(x)
    x = Convolution1D(filters=64, kernel_size=3, padding='same', activation='elu')(x)
    x = BatchNormalization()(x)
    x = Convolution1D(filters=32, kernel_size=3, padding='same', activation='elu')(x)
    x = BatchNormalization()(x)
    x = Flatten()(x)
    x = Dense(32, activation='relu')(x)

    auxiliary_input1 = Input(shape=(max_sequence_length,), dtype='float32', name='aux_input1')
    embedded_sequences1 = Embedding(len(word_index) + 1, word_embeddings_dim)(auxiliary_input1)
    lstm_out1 = Bidirectional(LSTM(64))(embedded_sequences1)
    lstm_drop1 = Dropout(0.5)(lstm_out1)
    auxiliary_input2 = Input(shape=(max_sequence_length,), dtype='float32', name='aux_input2')
    embedded_sequences2 = Embedding(len(word_index) + 1, word_embeddings_dim)(auxiliary_input2)
    lstm_out2 = Bidirectional(LSTM(64))(embedded_sequences2)
    lstm_drop2 = Dropout(0.5)(lstm_out2)

    lstm_drop = multiply([lstm_drop1, lstm_drop2])
    #####globel attention start#####
    attention_1 = Dense(128, activation='softmax', name='attention_vec')(lstm_drop)
    attention_mul = multiply([lstm_drop, attention_1], name='attention_mul')
    #####globel attention finish#####
    # attention_mul = Flatten()(attention_mul)
    attention_mul = Dense(32, activation='relu')(attention_mul)

    #####globel attention start#####
    full_connected = multiply([x, attention_mul], name='full_connected')
    attention_2 = Dense(32, activation='softmax')(full_connected)
    attention_final = multiply([full_connected, attention_2])
    #####globel attention finish#####
    dense_layer = Dense(16, activation='relu')(attention_final)
    main_output = Dense(1, activation='tanh', name='main_output')(dense_layer)

    model = Model(inputs=[person1_input, person2_input, relation_input, auxiliary_input1, auxiliary_input2], outputs= main_output)
    return model

def train(from_save_point=False, suffix='rnn'):
    X_train = np.random.randn(10,243)

    generator = generator_mod(person_total)
    discriminator = discriminator_mod()
    generator.summary()
    discriminator.summary()

    for layer in discriminator.layers:
        layer.trainable = False
    discriminator.trainable = False

    for layer in discriminator.layers:
        layer.trainable = False
    discriminator.trainable = False

    person1 = Input(shape=(1,))
    relation = Input(shape=(1,))
    seed = Input(shape=(1,person_embedding_dim))
    person1_description = Input(shape=(max_sequence_length,))

    genarated_person2 = generator([person1, seed, relation, person1_description])
    person2_description = Input(shape=(max_sequence_length,))

    discriminator_layers_for_generator = discriminator([person1, genarated_person2, relation, person1_description, person2_description])
    generator_model = Model(inputs=[person1, relation, seed, person1_description, person2_description], outputs=[discriminator_layers_for_generator])
    generator_model.compile(optimizer= RMSprop(lr=0.0001, rho=0.9), loss=wasserstein_loss)

    for layer in discriminator.layers:
        layer.trainable = True
    for layer in generator.layers:
        layer.trainable = False
    discriminator.trainable = True
    generator.trainable = False

    person2 = Input(shape=(1,))

    generated_samples_for_discriminator = generator([person1, seed, relation, person1_description])

    discriminator_output_from_generator = discriminator([person1, generated_samples_for_discriminator, relation, person1_description, person2_description])
    discriminator_output_from_real_samples = discriminator([person1, person2, relation, person1_description, person2_description])

    averaged_samples = RandomWeightedAverage()([person2, generated_samples_for_discriminator])

    averaged_samples_out = discriminator([person1, averaged_samples, relation, person1_description, person2_description])

    partial_gp_loss = partial(gradient_penalty_loss, averaged_samples= averaged_samples)
    partial_gp_loss.__name__ = 'gradient_penalty'

    discriminator_model = Model(inputs=[person1, person2, relation, person1_description, person2_description, seed], outputs=[discriminator_output_from_real_samples, discriminator_output_from_generator, averaged_samples_out])
# averaged_samples_out
    discriminator_model.compile(optimizer=RMSprop(lr=0.0001, rho=0.9), loss=[wasserstein_loss, wasserstein_loss, partial_gp_loss])
# partial_gp_loss
    positive_y = np.ones((BATCH_SIZE, 1), dtype=np.float32)
    negative_y = -positive_y
    dummy_y = np.zeros((BATCH_SIZE, 1), dtype=np.float32)


if __name__ == "__main__":
# convert_text_to_nptensor(cutoff=50, min_frequency_words=100000, max_lines=20000000)
    train(from_save_point=False, suffix='Google')

但是,当代码执行到此行时:     渐变= K.gradients(K.sum(y_pred),averaged_samples) 错误消息是:     'TypeError:无法将类型的对象转换为Tensor。内容:[无]。考虑将元素强制转换为受支持的类型'

有人可以帮助我吗?非常感谢你!

0 个答案:

没有答案