我正在尝试调整我在 github 上找到的代码,但一直在某一部分崩溃并收到 TypeError: only size-1 arrays can be converted to Python scalars
错误。我已经尝试解决它两天了。如果我正确理解了问题,我的张量 dtypes 不兼容,但我不知道如何解决这个问题。每次我尝试将张量类型更改为 DoubleTensor 或 float64 时,我都会遇到其他错误。我现在根本不知道哪个部分需要更改以及如何更改。
这是我定义的模型:
class Policy(nn.Module):
def __init__(self):
super(Policy, self).__init__()
self.input_layer = nn.Linear(11, 128)
self.hidden_1 = nn.Linear(128, 128)
self.hidden_2 = nn.Linear(32,31)
self.hidden_state = torch.tensor(torch.zeros(2,1,32)).cuda()
self.rnn = nn.GRU(128, 32, 2)
self.action_head = nn.Linear(31, 5)
self.value_head = nn.Linear(31, 1)
self.saved_actions = []
self.rewards = []
def reset_hidden(self):
self.hidden_state = torch.tensor(torch.zeros(2,1,32)).cuda()
def forward(self, x):
x = torch.tensor(x).cuda()
x = torch.sigmoid(self.input_layer(x))
x = torch.tanh(self.hidden_1(x))
x, self.hidden_state = self.rnn(x.view(1,-1,128), self.hidden_state.data)
x = F.relu(self.hidden_2(x.squeeze()))
action_scores = self.action_head(x)
state_values = self.value_head(x)
return F.softmax(action_scores, dim=-1), state_values
def forward(self, x):
conv_out = self.conv(x).view(x.size()[0], -1)
val = self.fc_val(conv_out)
adv = self.fc_adv(conv_out)
return val + (adv - adv.mean(dim=1, keepdim=True))
def act(self, state):
probs, state_value = self.forward(state)
m = Categorical(probs)
action = m.sample()
if action == 1 and env.state[0] < 1: action = torch.LongTensor([2]).squeeze().cuda.DoubleTensor()
if action == 4 and env.state[1] < 1: action = torch.LongTensor([2]).squeeze().cuda.DoubleTensor()
if action == 6 and env.state[2] < 1: action = torch.LongTensor([2]).squeeze().cuda.DoubleTensor()
self.saved_actions.append((m.log_prob(action), state_value))
return action.item()
这是它崩溃的地方
env.reset()
# In case you're running this a second time with the same model, delete the gradients
del model.rewards[:]
del model.saved_actions[:]
gamma = 0.9
log_interval = 60
def finish_episode():
R = 0
saved_actions = model.saved_actions
policy_losses = []
value_losses = []
rewards = []
for r in model.rewards[::-1]:
R = r + (gamma * R)
rewards.insert(0, R)
rewards = torch.tensor(rewards)
epsilon = (torch.rand(1) / 1e4) - 5e-5
# With different architectures, I found the following standardization step sometimes
# helpful, sometimes unhelpful.
#
rewards = (rewards - rewards.mean()) / (rewards.std(unbiased=False) + epsilon)
# Alternatively, comment it out and use the following line instead:
rewards += epsilon
for (log_prob, value), r in zip(saved_actions, rewards):
reward = torch.tensor(r - value.item()).cuda()
policy_losses.append(-log_prob * reward)
value_losses.append(F.smooth_l1_loss(value, torch.tensor([r]).cuda()))
optimizer.zero_grad()
loss = torch.stack(policy_losses).sum() + torch.stack(value_losses).sum()
loss = torch.clamp(loss, -1e-5, 1e5)
loss.backward()
optimizer.step()
del model.rewards[:]
del model.saved_actions[:]
running_reward = 0
for episode in range(0, 4000):
state = env.reset()
state = state.type(torch.float32)
reward = 0
done = False
msg = None
while not done:
action = model.act(state)
state, reward, done, msg = env.step(action)
model.rewards.append(reward)
if done:
break
running_reward = running_reward * (1 - 1/log_interval) + reward * (1/log_interval)
finish_episode()
# Resetting the hidden state seems unnecessary - it's effectively random from the previous
# episode anyway, more random than a bunch of zeros.
# model.reset_hidden()
if msg["msg"] == "done" and env.portfolio_value() > env.starting_portfolio_value * 1.1 and running_reward > 500:
print("Early Stopping: " + str(int(reward)))
break
if episode % log_interval == 0:
print("""Episode {}: started at {:.1f}, finished at {:.1f} because {} @ t={}, \
last reward {:.1f}, running reward {:.1f}""".format(episode, env.starting_portfolio_value, \
env.portfolio_value(), msg["msg"], env.cur_timestep, reward, running_reward))
这是我得到的错误
/usr/local/lib/python3.7/dist-packages/ipykernel_launcher.py:18: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).
---------------------------------------------------------------------------
RuntimeError Traceback (most recent call last)
<ipython-input-74-21b617d2e36f> in <module>()
46 msg = None
47 while not done:
---> 48 action = model.act(state)
49 state, reward, done, msg = env.step(action)
50 model.rewards.append(reward)
4 frames
/usr/local/lib/python3.7/dist-packages/torch/nn/functional.py in linear(input, weight, bias)
1751 if has_torch_function_variadic(input, weight):
1752 return handle_torch_function(linear, (input, weight), input, weight, bias=bias)
-> 1753 return torch._C._nn.linear(input, weight, bias)
1754
1755
RuntimeError: expected scalar type Double but found Float
这是我使用的github: https://github.com/tomgrek/RL-stocktrading/blob/master/Finance%20final.ipynb
有人建议我这个问题是因为当我使用“state”作为参数时,torch会抛出一个错误,因为它需要一个数字类型,但我不能将状态更改为任何浮点数,因为我得到另一个列表不能的错误改为 float32。
如果你能证明我做错了什么,我将不胜感激。
答案 0 :(得分:0)
首先,您是否使用相同的环境“env”和/或数据集?
其次,您添加了这一行 state = state.type(torch.float32) 并且它没有抛出错误,所以我认为 state 已经是一个张量(这有点奇怪)。如果您必须将类型更改为 float32,那么在下一个 while 循环中您可能忘记更改类型。
while not done:
action = model.act(state)
state, reward, done, msg = env.step(action)
state = state.float() # To add as I think env.step(action) returns a long tensor for some reason
model.rewards.append(reward)
if done:
break
祝你好运。