我在训练TensorFlow模型时遇到问题,如果有人可以诊断出训练功能中的任何问题,我将不胜感激。它也可能是超参数或模型本身的问题,所以我已将代码放在GitHub
def update(self, batch_size):
states, actions, rewards, next_states, _ = self.memory.sample(batch_size)
states = tf.convert_to_tensor(states)
actions = tf.convert_to_tensor(actions)
rewards = tf.convert_to_tensor(rewards)
next_states = tf.convert_to_tensor(next_states)
#Critic
with tf.GradientTape() as tape:
Qvals = self.critic([states, actions])
next_actions = self.actor_target(next_states)
next_Q = self.critic_target([next_states, next_actions])
Qprime = rewards + self.gamma * next_Q
critic_loss = tf.keras.losses.mean_squared_error(Qvals, Qprime)
gradients_of_critic = tape.gradient(critic_loss, self.critic.trainable_variables)
self.critic_optimizer.apply_gradients(zip(gradients_of_critic, self.critic.trainable_variables))
#Actor
with tf.GradientTape() as tape:
actions = self.actor(states)
q_gradients = -self.critic([states, actions])
parameter_gradients = tape.gradient(q_gradients, self.actor.trainable_variables)
self.actor_optimizer.apply_gradients(zip(parameter_gradients, self.actor.trainable_variables))
#Update target Critic
target_param = self.critic_target.get_weights()
param = self.critic.get_weights()
for layer in range(0, len(target_param)):
target_param[layer] = self.tau * param[layer] + (1.0 - self.tau) * target_param[layer]
self.critic_target.set_weights(target_param)
#update target Actor
target_param = self.actor_target.get_weights()
param = self.actor.get_weights()
for layer in range(0, len(target_param)):
target_param[layer] = self.tau * param[layer] + (1.0 - self.tau) * target_param[layer]
self.actor_target.set_weights(target_param)
谢谢