我使用TensorFlow 2.3,当我尝试运行模型时出现错误:
ValueError: No gradients provided for any variable:
我查看了有关此错误的其他问题,但没有发现任何帮助。 所以我问这里是否有人有想法?
我使用图形模式,但在急切模式下也会遇到相同的错误。
代码是:
self.loss = tf.keras.losses.Huber(reduction=tf.keras.losses.Reduction.SUM)
self.optimizer = tf.keras.optimizers.Adam(learning_rate=lr)
self.eps = np.finfo(np.float32).eps.item()
def run_episode(self, prepa, batch_sz, env):
nb_action = np.zeros((2,1), dtype=np.int32)
action_probs_black = tf.TensorArray(dtype=tf.float32, size=0, dynamic_size=True)
values_black = tf.TensorArray(dtype=tf.float32, size=0, dynamic_size=True)
rewards_black = tf.TensorArray(dtype=tf.int32, size=0, dynamic_size=True)
action_probs_white = tf.TensorArray(dtype=tf.float32, size=0, dynamic_size=True)
values_white = tf.TensorArray(dtype=tf.float32, size=0, dynamic_size=True)
rewards_white = tf.TensorArray(dtype=tf.int32, size=0, dynamic_size=True)
for step in range(batch_sz):
state, current_player, invalid_move = prepa.give_donnee()
nb_action_player = int(nb_action[current_player])
action_logit, value = self.model(state)
if current_player == 0:
values_black.write(nb_action_player, tf.squeeze(value))
action = env.uniform_random_action()
action_prob = tf.nn.softmax(action_logit)
action_probs_black.write(nb_action_player, action_prob[0, action])
else:
values_white.write(nb_action_player, tf.squeeze(value))
action = env.uniform_random_action()
action_prob = tf.nn.softmax(action_logit)
action_probs_white.write(nb_action_player, action_prob[0, action])
next_state, rewards, dones, _ = env.step(action)
prepa.new_obs(next_state)
nb_action[current_player] += 1
if current_player == 1:
rewards = rewards * -1
rewards_white.write(nb_action_player, rewards)
else:
rewards_black.write(nb_action_player, rewards)
if dones:
break
rewards_black = rewards_black.stack()
rewards_white = rewards_white.stack()
values_black = values_black.stack()
values_white = values_white.stack()
action_probs_black = action_probs_black.stack()
action_probs_white = action_probs_white.stack()
return rewards_black, values_black, action_probs_black, rewards_white, values_white, action_probs_white
@tf.function
def train(self, env, batch_sz=1000, updates=10000):
state = env.reset()
prepa = PrepaGoObs(state)
rewards_black, values_black, action_probs_black, rewards_white, values_white, action_probs_white = self.run_episode(prepa, batch_sz, env)
with tf.GradientTape() as tape:
returns = self.get_expected_return(rewards_black)
action_probs, values, returns_tf = [tf.expand_dims(x, 1) for x in [action_probs_black, values_black, returns]]
loss = self.compute_loss(action_probs, values, returns_tf)
grads = tape.gradient(loss, self.model.trainable_variables)
self.optimizer.apply_gradients(zip(grads, self.model.trainable_variables))
with tf.GradientTape() as tape:
returns = self.get_expected_return(rewards_white)
action_probs, values, returns = [tf.expand_dims(x, 1) for x in [action_probs_white, values_white, returns]]
loss = self.compute_loss(action_probs, values, returns)
grads = tape.gradient(loss, self.model.trainable_variables)
self.optimizer.apply_gradients(zip(grads, self.model.trainable_variables))
return loss
def get_expected_return(self, rewards, standardize=True):
n = tf.shape(rewards)[0]
returns = tf.TensorArray(dtype=tf.float32, size=n)
rewards = tf.cast(rewards[::-1], dtype=tf.float32)
discounted_sum = tf.constant(0.0)
discounted_sum_shape = discounted_sum.shape
for i in tf.range(n):
reward = rewards[i]
discounted_sum = reward + self.gamma * discounted_sum
discounted_sum.set_shape(discounted_sum_shape)
returns = returns.write(i, discounted_sum)
returns = returns.stack()[::-1]
if standardize:
((returns - tf.math.reduce_mean(returns)) /
(tf.math.reduce_std(returns) + self.eps))
return returns
def compute_loss(self, action_probs, values, returns):
print(f'returns : {returns}')
print(f'values : {values}, shape : {values.shape}')
print(f'ations_probs : {action_probs}')
advantage = returns - values
print(f'advantage : {advantage}')
action_log_probs = tf.math.log(action_probs)
print(f'action_probs_log : {action_log_probs}')
actor_loss = tf.math.reduce_sum(action_log_probs * advantage)
critic_loss = self.loss(values, returns)
print(f'actor_loss : {actor_loss}')
print(f'critic_loss : {critic_loss}')
return actor_loss + critic_loss
有人可以帮我吗?
编辑:尝试打印(渐变)后,我意识到tape.gradiant返回None