我要使用大量格式正确的时间序列数据,以用于预测问题。数据的结构形式如下:
2017-09-26 10:11:57 0.073648
2017-09-26 10:12:57 0.137538
2017-09-26 10:13:57 0.200694
2017-09-26 10:14:57 0.254013
...
将此表示为np.array:
array([0.07364818, 0.13753798, 0.20069409, ..., 0.47428173, 0.47428599,
0.45940167])
到目前为止,我的模型如下:
序列
class Sequence(nn.Module):
def __init__(self):
super(Sequence, self).__init__()
self.lstm1 = nn.LSTMCell(1, 51)
self.lstm2 = nn.LSTMCell(51, 51)
self.linear = nn.Linear(51, 1)
def forward(self, input, future = 0):
outputs = []
h_t = torch.zeros(input.size(0), 51, dtype=torch.double)
c_t = torch.zeros(input.size(0), 51, dtype=torch.double)
h_t2 = torch.zeros(input.size(0), 51, dtype=torch.double)
c_t2 = torch.zeros(input.size(0), 51, dtype=torch.double)
for i, input_t in enumerate(input.chunk(input.size(1), dim=1)):
h_t, c_t = self.lstm1(input_t, (h_t, c_t))
h_t2, c_t2 = self.lstm2(h_t, (h_t2, c_t2))
output = self.linear(h_t2)
outputs += [output]
for i in range(future):# if we should predict the future
h_t, c_t = self.lstm1(output, (h_t, c_t))
h_t2, c_t2 = self.lstm2(h_t, (h_t2, c_t2))
output = self.linear(h_t2)
outputs += [output]
outputs = torch.stack(outputs, 1).squeeze(2)
return outputs
火车
# set random seed to 0
np.random.seed(0)
torch.manual_seed(0)
split = int(len(CO) * 0.7)
train = torch.from_numpy(CO[:split])
valid = torch.from_numpy(CO[split:])
# build the model
seq = Sequence()
seq.double()
criterion = nn.MSELoss()
# use LBFGS as optimizer since we can load the whole data to train
optimizer = optim.LBFGS(seq.parameters(), lr=0.8)
for i in range(15):
print('STEP: ', i)
def closure():
optimizer.zero_grad()
out = seq(input)
loss = criterion(out, target)
print('loss:', loss.item())
loss.backward()
return loss
optimizer.step(closure)
with torch.no_grad():
future = 1000
pred = seq(test_input, future=future)
loss = criterion(pred[:, :-future], test_target)
print('test loss:', loss.item())
y = pred.detach().numpy()
我收到的错误指出:
RuntimeError:尺寸超出范围(预计在[-1,0]范围内,但得到1)
我不确定我理解这是什么原因或如何解决。任何建议将不胜感激。
谢谢