我用pytorch创建了一个简单的神经网络, 用于计算单元在网格内的运动。
actions = [
'none',
'left',
'right',
'up',
'down'
]
class Network(nn.Module):
def __init__(self):
super(Network, self).__init__()
hl_dim = 32
self.hidden = (
torch.zeros(2, 1, hl_dim),
torch.zeros(2, 1, hl_dim)
)
in_len = 4
hl_len = 32
ou_len = len(actions)
self.in1 = nn.Linear(in_len, hl_len)
self.hl1 = nn.LSTM(hl_len, hl_dim, 2, dropout=0.05)
self.ou1 = nn.Linear(hl_len, ou_len)
def forward(self, input):
output = F.relu(self.in1(input.view(1, -1)).unsqueeze(1))
output, self.hidden = self.hl1(output, self.hidden)
output = F.relu(self.ou1(output))
return output
model = Network()
model.zero_grad()
每个步骤/决定之后,都会为该动作计算奖励并更新世界。
# initial world
pos_x = 6
pos_y = 6
goal_x = 10
goal_y = 10
while True:
old_state = torch.tensor([pos_x, pos_y, goal_x, goal_y], dtype=torch.float)
result = model(old_state)
action = torch.argmax(result)
action_str = actions[action]
# calculate world updates...
terminal = # goal found, 0 or 1
reward = # in range [-1, +1]
new_state = torch.tensor([new_pos_x, new_pos_y, goal_x, goal_y], dtype=torch.float)
基于所做的决定,我现在可以使用old_state
,result
,new_state
,terminal
和reward
。
现在,我要计算损失并基于这些值运行优化器。我假设我可以为此使用pytorch的常规损失和优化器功能,我已初步声明如下。
learningrate = 0.01
loss_function = nn.MSELoss()
optimizer = optim.SGD(model.parameters(), lr=learningrate)
在这种情况下,这是正确的优化方法吗?以及如何生成调用损失函数所需的参数?