我在TensorFlow中创建了一个GRU的自定义实现来进行实验。在训练时,它不像原生TensorFlow GRUcell那样执行任何操作。我一直试图找出原因,而且根本无法做到。任何意见都将不胜感激。
原生实施:
encoder = tf.nn.rnn_cell.GRUCell(feature_no,activation = tf.nn.tanh)
encoding = tf.nn.dynamic_rnn(encoder,inputs,dtype=tf.float32)[1]
我的实施:
class GRU:
def __init__(self, hid_dim, vec_dim,
name = None):
self.hid_dim = hid_dim #Dimension of the hidden layer
self.vec_dim = vec_dim #Dimension of the input layer
## Declare the variables
## Update gate
self.W_ux = tf.get_variable(name = name + ".GRU.W_ux",
shape = [vec_dim, hid_dim],
initializer = tf.random_normal_initializer())
self.W_uh = tf.get_variable(name = name + ".GRU.W_uh",
shape = [hid_dim, hid_dim],
initializer = tf.random_normal_initializer())
self.b_u = tf.get_variable(name = name + ".GRU.g_u",
shape = [hid_dim],
initializer = tf.random_normal_initializer())
## Forget gate
self.W_rx = tf.get_variable(name = name + ".GRUt.W_rx",
shape = [vec_dim, hid_dim],
initializer = tf.random_normal_initializer())
self.W_rh = tf.get_variable(name = name + ".GRU.W_rh",
shape = [hid_dim, hid_dim],
initializer = tf.random_normal_initializer())
self.b_r = tf.get_variable(name = name + ".GRU.b_r",
shape = [hid_dim],
initializer = tf.random_normal_initializer())
## Update function
self.W_hx = tf.get_variable(name = name + ".GRU.W_hx",
shape = [vec_dim, hid_dim],
initializer = tf.random_normal_initializer())
self.W_hh = tf.get_variable(name = name + ".GRU.W_hh",
shape = [hid_dim, hid_dim],
initializer = tf.random_normal_initializer())
self.b_h = tf.get_variable(name = name + ".GRU.b_h",
shape = [hid_dim],
initializer = tf.random_normal_initializer())
def update_state(self, x, h):
u = tf.sigmoid(tf.matmul(x, self.W_ux) + tf.matmul(h, self.W_uh) +
self.b_u)
## 'Forget' gate
r = tf.sigmoid(tf.matmul(x, self.W_rx) + tf.matmul(h, self.W_rh) + self.b_r)
## Hidden state
hp = tf.tanh(tf.matmul(x, self.W_hx) + r*tf.matmul(h, self.W_hh)
+ self.b_h)
return (1 - u) * hp + u * h
def get_states(self, x):
init = tf.reshape(tf.tile(tf.zeros_like(x[:,0,0]),[self.hid_dim]),
shape = [-1,self.hid_dim])
x_t = tf.transpose(x,perm=[1,0,2])
self.h_set = tf.transpose(tf.scan(lambda h, x: self.update_state(x, h),
x_t, init), perm = [1,0,2])
self.h = self.h_set[:,-1]
def __call__(self, x):
self.get_states(x)
return self.h
encoder = GRU(feature_no,vec_dim,name='encoder')
encoding = encoder(sent)
答案 0 :(得分:0)
发现问题。我知道RNN对初始化非常敏感,但我使用的是tf.random_normal_initializer()
而不是tf.glorot_uniform_initializer()
。我做了交换,现在他们表现相同。