TensorFlow包围了nan问题

时间:2017-06-02 11:08:37

标签: tensorflow recurrent-neural-network

我正在使用TensorFlow编写GRU模型。然而,有时在训练一些时间后,计算的损失变为纳米。问题是它并不总是发生,所以我很难调试。我在代码中没有0/0或log0操作。所以我猜是因为梯度操作?有人能给我一些建议吗?

我的GRU代码:

class GRU(object):
def __init__(self, input_dim, hidden_dim, period):
    self.period = period
    self.graph = tf.Graph()
    with self.graph.as_default():
        # Parameters:
        Uz = tf.Variable(tf.random_uniform([hidden_dim, input_dim], -np.sqrt(1. / hidden_dim), np.sqrt(1. / hidden_dim)))
        Ur = tf.Variable(tf.random_uniform([hidden_dim, input_dim], -np.sqrt(1. / hidden_dim), np.sqrt(1. / hidden_dim)))
        U_ = tf.Variable(tf.random_uniform([hidden_dim, input_dim], -np.sqrt(1. / hidden_dim), np.sqrt(1. / hidden_dim)))

        Wz = tf.Variable(tf.random_uniform([hidden_dim, hidden_dim], -np.sqrt(1. / hidden_dim), np.sqrt(1. / hidden_dim)))
        Wr = tf.Variable(tf.random_uniform([hidden_dim, hidden_dim], -np.sqrt(1. / hidden_dim), np.sqrt(1. / hidden_dim)))
        W_ = tf.Variable(tf.random_uniform([hidden_dim, hidden_dim], -np.sqrt(1. / hidden_dim), np.sqrt(1. / hidden_dim)))

        bz = tf.Variable(tf.zeros([hidden_dim, 1]))
        br = tf.Variable(tf.zeros([hidden_dim, 1]))
        b_ = tf.Variable(tf.zeros([hidden_dim, 1]))

        self.V = tf.Variable(tf.random_uniform([1, hidden_dim], -np.sqrt(1. / hidden_dim), np.sqrt(1. / hidden_dim)))
        self.c = tf.Variable(tf.zeros([1, 1]))

        # Definition of the cell computation.
        def gru_cell(x_t, h_t_prev):
            z = tf.sigmoid(tf.matmul(Uz, x_t) + tf.matmul(Wz, h_t_prev) + bz)
            r = tf.sigmoid(tf.matmul(Ur, x_t) + tf.matmul(Wr, h_t_prev) + br)
            h_ = tf.tanh(tf.matmul(U_, x_t) + tf.matmul(W_, r * h_t_prev) + b_)
            h = tf.multiply((1 - z), h_) + tf.multiply(z, h_t_prev)
            output = tf.tanh(tf.matmul(self.V, h) + self.c)[0][0]   
            return output, h

        # Input data.
        self.train_data = list()
        for _ in range(period + 1):
            self.train_data.append(
                tf.placeholder(tf.float32, shape=[input_dim, 1]))

        train_inputs = self.train_data[:period]
        train_labels = self.train_data[period]

        # Unrolled GRU loop.
        outputs = list()
        state = tf.Variable(tf.zeros([hidden_dim, 1]), trainable=False)
        for i in train_inputs:
            output, state = gru_cell(i, state)
            outputs.append(output)

        self.logits = outputs
        self.loss = tf.sqrt(tf.squared_difference(self.logits[-1], train_labels[-1][0]))              

        # Optimizer.
        self.learning_rate = tf.placeholder(tf.float32, shape=[])
        optimizer = tf.train.GradientDescentOptimizer(self.learning_rate)
        # optimizer = tf.train.RMSPropOptimizer(self.learning_rate, epsilon=1e-6)
        gradients, v = zip(*optimizer.compute_gradients(self.loss))
        gradients, _ = tf.clip_by_global_norm(gradients, 1.25)  
        self.optimizer = optimizer.apply_gradients(
            zip(gradients, v))

        # Predictions.
        self.train_prediction = self.logits[-1] 

0 个答案:

没有答案