我正在尝试编写DDPG代理程序以在pytorch中玩类似足球的游戏。该代理最初很好(当存在噪音时),但是随着学习的进行(噪音降低),actor网络只会输出零,从而使玩家无法移动。
我已经用示例输入检查了网络的输出,并且看起来工作正常(没有给出零)。 pytorch中是否有任何可能导致此错误的错误,或者是由于代码错误导致的错误? 演员网络::
class Actor(nn.Module):
def __init__(self, nb_states, nb_actions, hidden1=20, hidden2=30, init_w=5):
super(Actor, self).__init__()
self.fc1 = nn.Linear(nb_states, hidden1)
self.fc2 = nn.Linear(hidden1, hidden2)
self.fc3 = nn.Linear(hidden2, nb_actions)
self.relu = nn.ReLU()
self.tanh = nn.Tanh()
self.init_weights(init_w)
def init_weights(self, init_w):
self.fc1.weight.data = fanin_init(self.fc1.weight.data.size())
self.fc2.weight.data = fanin_init(self.fc2.weight.data.size())
self.fc3.weight.data.uniform_(-init_w, init_w)
def forward(self, x):
out = self.fc1(x)
out = self.relu(out)
out = self.fc2(out)
out = self.relu(out)
out = self.fc3(out)
out = self.tanh(out)
return out
培训::
def critic_train(self, s1, a1, r1, s2):
a2 = self.trgt_actor.forward(s2).detach()
next_val = torch.squeeze(self.trgt_critic.forward((s2, a2)).detach())
y_expected = r1 + self.GAMMA * next_val
y_predicted = torch.squeeze(self.critic.forward((s1, a1)))
loss_critic = F.smooth_l1_loss(y_predicted, y_expected)
self.critic_optim.zero_grad()
loss_critic.backward()
self.critic_optim.step()
return None
def actor_train(self, s1, a1, r1, s2):
pred_a1 = self.actor.forward(s1)
loss_actor = -1 * torch.sum(self.critic.forward((s1, pred_a1)))
self.actor_optim.zero_grad()
loss_actor.backward()
self.actor_optim.step()
soft_update(self.trgt_actor, self.actor, 0.01)
soft_update(self.trgt_critic, self.critic, 0.01)
return None
谢谢