使用tf.keras.Model作为基类来定义RNN Cell

时间:2019-04-07 12:52:01

标签: tensorflow keras recurrent-neural-network eager-execution

我正在TensorFlow的EagerExecution中工作,以开发顺序数据设置中的变体自动编码器(VAE)的变体。由于循环网络结构及其输入输出流都不是标准的,因此我必须构建自己的自定义RNNCell,以后可以将其传递给tf.nn.raw_rnn API。

关于构建所需RNNCell的类,我将tf.keras.Model用作基类。但是,当我将此RNNCell传递给tf.nn.raw_rnn时,得到了nan输出。怎么了?

这是我的实现方式(请告诉我您是否还不清楚)

import tensorflow as tf
tfe = tf.contrib.eager
tf.enable_eager_execution()
import numpy as np
from tensorflow.keras.layers import Input, Dense, Lambda
from tensorflow.keras.models import Model

数据集称为“输入”,其所有有边界的float32 dtype和shape条目(time_steps,batch_size,input_depth)=(20,1000,4)。请注意,与使用更熟悉的tf.nn.dynamic_rnn API相比,形状格式有所不同(使用后者API时,形状的格式为(batch_size,time_steps,input_depth))。

#defining sampling and reparameterizing function
def sampling(args):
    mean, logvar = args
    batch = batch_size
    dim = latent_dim
    # by default, random_normal has mean = 0 and std = 1.0
    epsilon = tf.random_normal(shape=(batch, dim))
    return mean + tf.exp(0.5 * logvar) * epsilon



#defining class of the model (PreSSM = without transition module yet)
class PreSSM(tf.keras.Model):
    def __init__(self, latent_dim = 4, intermediate_dim = 4):
        super(PreSSM, self).__init__()
        self.latent_dim = latent_dim
        self.input_dim = self.latent_dim + 4 #toy problem

        inputs = Input(shape=(self.latent_dim + 4,), name='inference_input')
        layer_1 = Dense(intermediate_dim, activation='relu')(inputs)
        layer_2 = Dense(intermediate_dim, activation='relu')(layer_1)
        mean = Dense(latent_dim, name='mean')(layer_2)
        logvar = Dense(latent_dim, name='logvar')(layer_2)        
        s = Lambda(sampling, output_shape=(latent_dim,), name='s')([mean, logvar])
        self.inference_net = Model(inputs, [mean, logvar, s], name='inference_net')

        latent_inputs = Input(shape=(latent_dim,), name='s_sampling')
        layer_3 = Dense(intermediate_dim, activation='relu')(latent_inputs)
        layer_4 = Dense(intermediate_dim, activation='relu')(layer_3)
        outputs = Dense(2)(layer_4)
        self.generative_net = Model(latent_inputs, outputs, name='generative_net')

    @property
    def state_size(self):
        return latent_dim

    @property
    def output_size(self):
        return 2 #(x,y) coordinate

    @property
    def zero_state(self):
        return init_state #global variable we have defined

    def __call__(self, inputs, state):
        next_state = self.inference_net(inputs)[-1]
        output = self.generative_net(next_state)
        return output, next_state

#instantiate cell == model instant
model = PreSSM()

#define a class with instant super_loop_fn(inputs) that has method called loop_fn
class SuperLoop:
    def __init__(self, inputs, output_dim = 2):
        inputs_ta = tf.TensorArray(dtype=tf.float32, size=max_time, clear_after_read=False)
        inputs_ta = inputs_ta.unstack(inputs) #ini datanya
        self.inputs_ta = inputs_ta
        self.output_dim = output_dim

    def loop_fn(self,time, cell_output, cell_state, loop_state):
        emit_output = cell_output # ==None for time == 0
        if cell_output is None: # when time == 0
            next_cell_state = init_state
            emit_output = tf.zeros([self.output_dim])
        else :
            emit_output = cell_output
            next_cell_state = cell_state

        elements_finished = (time >= seq_length)
        finished = tf.reduce_all(elements_finished)

        if finished :
            next_input = tf.zeros(shape=(self.output_dim), dtype=tf.float32)
        else :
            next_input = tf.concat([self.inputs_ta.read(time), next_cell_state], -1)

        next_loop_state = None
        return (elements_finished, next_input, next_cell_state, emit_output, next_loop_state)


#defining a model
def SSM_model(inputs, RNN_cell = model, output_dim = 2):
    superloop = SuperLoop(inputs, output_dim)
    outputs_ta, final_state, final_loop_state = tf.nn.raw_rnn(RNN_cell, superloop.loop_fn)
    outputs = outputs_ta.stack()
    return outputs

#model checking
SSM_model(inputs = inputs, RNN_cell = model)

这里,输出是nan ...

因此,我无法继续进行训练。怎么了?在上面使用tf.keras.Model作为基类定义RNNCell时,我会错过什么吗?

0 个答案:

没有答案