Tensorflow:ValueError:未为任何变量提供渐变

时间:2020-08-24 15:42:38

标签: python tensorflow

我使用TensorFlow 2.3,当我尝试运行模型时出现错误:

ValueError: No gradients provided for any variable:

我查看了有关此错误的其他问题,但没有发现任何帮助。 所以我问这里是否有人有想法?

我使用图形模式,但在急切模式下也会遇到相同的错误。

代码是:

    self.loss = tf.keras.losses.Huber(reduction=tf.keras.losses.Reduction.SUM)
    self.optimizer = tf.keras.optimizers.Adam(learning_rate=lr)

    self.eps = np.finfo(np.float32).eps.item()

def run_episode(self, prepa, batch_sz, env):
    nb_action = np.zeros((2,1), dtype=np.int32)

    action_probs_black = tf.TensorArray(dtype=tf.float32, size=0, dynamic_size=True)
    values_black = tf.TensorArray(dtype=tf.float32, size=0, dynamic_size=True)
    rewards_black = tf.TensorArray(dtype=tf.int32, size=0, dynamic_size=True)

    action_probs_white = tf.TensorArray(dtype=tf.float32, size=0, dynamic_size=True)
    values_white = tf.TensorArray(dtype=tf.float32, size=0, dynamic_size=True)
    rewards_white = tf.TensorArray(dtype=tf.int32, size=0, dynamic_size=True)

    for step in range(batch_sz):
        state, current_player, invalid_move = prepa.give_donnee()
        nb_action_player = int(nb_action[current_player])
        action_logit, value = self.model(state)

        if current_player == 0:
            values_black.write(nb_action_player, tf.squeeze(value))
            action = env.uniform_random_action()
            action_prob = tf.nn.softmax(action_logit)
            action_probs_black.write(nb_action_player, action_prob[0, action])
        else:
            values_white.write(nb_action_player, tf.squeeze(value))
            action = env.uniform_random_action()
            action_prob = tf.nn.softmax(action_logit)
            action_probs_white.write(nb_action_player, action_prob[0, action])


        next_state, rewards, dones, _ = env.step(action)

        prepa.new_obs(next_state)
     
        nb_action[current_player] += 1

        if current_player == 1:
            rewards = rewards * -1
            rewards_white.write(nb_action_player, rewards)
        else:
            rewards_black.write(nb_action_player, rewards)

        if dones:
            break

    rewards_black = rewards_black.stack()
    rewards_white = rewards_white.stack()
    values_black = values_black.stack()
    values_white = values_white.stack()
    action_probs_black = action_probs_black.stack()
    action_probs_white = action_probs_white.stack()

    return rewards_black, values_black, action_probs_black, rewards_white, values_white, action_probs_white

@tf.function    
def train(self, env, batch_sz=1000, updates=10000):
    state = env.reset()
    prepa = PrepaGoObs(state)

    rewards_black, values_black, action_probs_black, rewards_white, values_white, action_probs_white = self.run_episode(prepa, batch_sz, env)

    with tf.GradientTape() as tape:
        returns = self.get_expected_return(rewards_black)

        action_probs, values, returns_tf = [tf.expand_dims(x, 1) for x in [action_probs_black, values_black, returns]]   
        loss = self.compute_loss(action_probs, values, returns_tf)
    grads = tape.gradient(loss, self.model.trainable_variables)
    self.optimizer.apply_gradients(zip(grads, self.model.trainable_variables))

    with tf.GradientTape() as tape:
        returns = self.get_expected_return(rewards_white)

        action_probs, values, returns = [tf.expand_dims(x, 1) for x in [action_probs_white, values_white, returns]]

        loss = self.compute_loss(action_probs, values, returns)
    grads = tape.gradient(loss, self.model.trainable_variables)
    self.optimizer.apply_gradients(zip(grads, self.model.trainable_variables))
            
    return loss

def get_expected_return(self, rewards, standardize=True):
    n = tf.shape(rewards)[0]
    returns = tf.TensorArray(dtype=tf.float32, size=n)

    rewards = tf.cast(rewards[::-1], dtype=tf.float32)
    discounted_sum = tf.constant(0.0)
    discounted_sum_shape = discounted_sum.shape

    for i in tf.range(n):
        reward = rewards[i]
        discounted_sum = reward + self.gamma * discounted_sum
        discounted_sum.set_shape(discounted_sum_shape)
        returns = returns.write(i, discounted_sum)
    returns = returns.stack()[::-1]

    if standardize:
        ((returns - tf.math.reduce_mean(returns)) /
            (tf.math.reduce_std(returns) + self.eps))

    return returns

def compute_loss(self, action_probs, values, returns):
    print(f'returns : {returns}')
    print(f'values : {values}, shape : {values.shape}')
    print(f'ations_probs : {action_probs}')
    advantage = returns - values
    print(f'advantage : {advantage}')
    action_log_probs = tf.math.log(action_probs)
    print(f'action_probs_log : {action_log_probs}')
    actor_loss = tf.math.reduce_sum(action_log_probs * advantage)

    critic_loss = self.loss(values, returns)

    print(f'actor_loss : {actor_loss}')
    print(f'critic_loss : {critic_loss}')

    return actor_loss + critic_loss

有人可以帮我吗?

编辑:尝试打印(渐变)后,我意识到tape.gradiant返回None

0 个答案:

没有答案