我正在TensorFlow的EagerExecution中工作,以开发顺序数据设置中的变体自动编码器(VAE)的变体。由于循环网络结构及其输入输出流都不是标准的,因此我必须构建自己的自定义RNNCell,以后可以将其传递给tf.nn.raw_rnn API。
关于构建所需RNNCell的类,我将tf.keras.Model用作基类。但是,当我将此RNNCell传递给tf.nn.raw_rnn时,得到了nan输出。怎么了?
这是我的实现方式(请告诉我您是否还不清楚)
import tensorflow as tf
tfe = tf.contrib.eager
tf.enable_eager_execution()
import numpy as np
from tensorflow.keras.layers import Input, Dense, Lambda
from tensorflow.keras.models import Model
数据集称为“输入”,其所有有边界的float32 dtype和shape条目(time_steps,batch_size,input_depth)=(20,1000,4)。请注意,与使用更熟悉的tf.nn.dynamic_rnn API相比,形状格式有所不同(使用后者API时,形状的格式为(batch_size,time_steps,input_depth))。
#defining sampling and reparameterizing function
def sampling(args):
mean, logvar = args
batch = batch_size
dim = latent_dim
# by default, random_normal has mean = 0 and std = 1.0
epsilon = tf.random_normal(shape=(batch, dim))
return mean + tf.exp(0.5 * logvar) * epsilon
#defining class of the model (PreSSM = without transition module yet)
class PreSSM(tf.keras.Model):
def __init__(self, latent_dim = 4, intermediate_dim = 4):
super(PreSSM, self).__init__()
self.latent_dim = latent_dim
self.input_dim = self.latent_dim + 4 #toy problem
inputs = Input(shape=(self.latent_dim + 4,), name='inference_input')
layer_1 = Dense(intermediate_dim, activation='relu')(inputs)
layer_2 = Dense(intermediate_dim, activation='relu')(layer_1)
mean = Dense(latent_dim, name='mean')(layer_2)
logvar = Dense(latent_dim, name='logvar')(layer_2)
s = Lambda(sampling, output_shape=(latent_dim,), name='s')([mean, logvar])
self.inference_net = Model(inputs, [mean, logvar, s], name='inference_net')
latent_inputs = Input(shape=(latent_dim,), name='s_sampling')
layer_3 = Dense(intermediate_dim, activation='relu')(latent_inputs)
layer_4 = Dense(intermediate_dim, activation='relu')(layer_3)
outputs = Dense(2)(layer_4)
self.generative_net = Model(latent_inputs, outputs, name='generative_net')
@property
def state_size(self):
return latent_dim
@property
def output_size(self):
return 2 #(x,y) coordinate
@property
def zero_state(self):
return init_state #global variable we have defined
def __call__(self, inputs, state):
next_state = self.inference_net(inputs)[-1]
output = self.generative_net(next_state)
return output, next_state
#instantiate cell == model instant
model = PreSSM()
#define a class with instant super_loop_fn(inputs) that has method called loop_fn
class SuperLoop:
def __init__(self, inputs, output_dim = 2):
inputs_ta = tf.TensorArray(dtype=tf.float32, size=max_time, clear_after_read=False)
inputs_ta = inputs_ta.unstack(inputs) #ini datanya
self.inputs_ta = inputs_ta
self.output_dim = output_dim
def loop_fn(self,time, cell_output, cell_state, loop_state):
emit_output = cell_output # ==None for time == 0
if cell_output is None: # when time == 0
next_cell_state = init_state
emit_output = tf.zeros([self.output_dim])
else :
emit_output = cell_output
next_cell_state = cell_state
elements_finished = (time >= seq_length)
finished = tf.reduce_all(elements_finished)
if finished :
next_input = tf.zeros(shape=(self.output_dim), dtype=tf.float32)
else :
next_input = tf.concat([self.inputs_ta.read(time), next_cell_state], -1)
next_loop_state = None
return (elements_finished, next_input, next_cell_state, emit_output, next_loop_state)
#defining a model
def SSM_model(inputs, RNN_cell = model, output_dim = 2):
superloop = SuperLoop(inputs, output_dim)
outputs_ta, final_state, final_loop_state = tf.nn.raw_rnn(RNN_cell, superloop.loop_fn)
outputs = outputs_ta.stack()
return outputs
#model checking
SSM_model(inputs = inputs, RNN_cell = model)
这里,输出是nan ...
因此,我无法继续进行训练。怎么了?在上面使用tf.keras.Model作为基类定义RNNCell时,我会错过什么吗?