我对MQRNN有问题-此处描述的多水平分位数递归预测器: https://arxiv.org/abs/1711.11053
这是我的代码(简短版本):
import torch
from torch import nn
import torch.nn.functional as F
# Structure of neural network
class MQRNN(nn.Module):
def __init__(self, device, output_horizon = 5, n_products = 100, hidden_dim = 200, n_layers = 2):
self.device = device
super(MQRNN, self).__init__()
# encoded = hidden_dim*2
self.device = device
encoded = hidden_dim
self.output_horizon = output_horizon
self.n_products = n_products
self.output = output_horizon*n_products
self.n_layers = n_layers
self.hidden_dim = hidden_dim
# LAYERS
self.LSTM_encoding = nn.LSTM(self.n_products, self.hidden_dim, self.n_layers, batch_first=True).to(self.device)
self.MLPglob = nn.Linear(encoded, self.output+1).to(self.device)
self.MLPlocs = nn.ModuleList([nn.Linear(2, 3) for i in range(self.output)]).to(self.device)
def init_hidden(self, batch_size):
h0, c0 = (torch.zeros(self.n_layers, batch_size, self.hidden_dim),
torch.zeros(self.n_layers, batch_size, self.hidden_dim))
return h0, c0
def forward(self, x):
batch_size = x.size(0)
_, (h, c) = self.LSTM_encoding(x, self.init_hidden(batch_size))
# h, c = h[-1], c[-1]
# hc = torch.cat((h,c),1) # Concatenate, łączenie macierzy w jedną.
# hc = F.relu(hc)
# C = self.MLPglob(hc)
c = F.relu(h[-1])
C = self.MLPglob(c)
C = F.relu(C)
C, Ca = C[:,:-1], C[:,-1:]
C = C.view(-1,self.n_products,self.output_horizon)
output = torch.rand(self.n_products,self.output_horizon,batch_size,3).to(self.device) # 3 Quantyle -> dlatego 3 na końcu.
# output[0,0] = self.MLPlocs[0](torch.cat((C[:, 0, 0].view(-1,1), Ca),1))
for i in range(output.size(0)):
for j in range(output.size(1)):
output[i,j] = self.MLPlocs[output.size(1)*i+j](torch.cat((C[:, i, j].view(-1,1), Ca),1))
return output.permute(2,1,0,3)
# Loss function
def quantile_loss(y_pred, y_real):
"""
:param y_pred: 4 dimensions: batch_size, horizon, n_products, n_quantiles
:param y_real: 3 dimensions: batch_size, horizon, n_products
:return:
"""
y_pred = y_pred.permute(3,0,1,2)
loss = sum(sum(sum(2*(0.1*F.relu(y_real - y_pred[0])+0.9*F.relu(y_pred[0]-y_real)+
0.5*F.relu(y_real - y_pred[1]) + 0.5 * F.relu(y_pred[1] - y_real) +
0.9 * F.relu(y_real - y_pred[2]) + 0.1 * F.relu(y_pred[2] - y_real)))))
loss = loss/sum(sum(sum(y_real)))
return loss
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
# Sample data
x_1 = torch.FloatTensor([[[5000],[5043],[5015],[5100],[5109],[5150],[5183],[5222],[5243],[5259],[5298],
[5350],[5340],[5392],[5422],[5465],[5492],[5520],[5589],[5643]]]).to(device)
y_1 = torch.FloatTensor([[[5700],[5743],[5798],[5782],[5834]]]).to(device)
x_2 = torch.FloatTensor([[[5000],[5050],[5100],[5150],[5200],[5250],[5300],[5350],[5400],[5450],[5500],
[5550],[5600],[5650],[5700],[5750],[5800],[5850],[5900],[5950]]]).to(device)
y_2 = torch.FloatTensor([[[6000],[6050],[6100],[6150],[6200]]]).to(device)
x_3 = torch.FloatTensor([[[423],[413],[400],[392],[379],[354],[359],[352],[320],[298],[250],
[254],[243],[212],[2140],[201],[204],[254],[214],[355]]]).to(device)
y_3 = torch.FloatTensor([[[241],[231],[231],[221],[150]]]).to(device)
xx = torch.cat((x_1, x_3)).permute(2,1,0)
yy = torch.cat((y_1, y_3)).permute(2,1,0)
xx_2 = xx*10
yy_2 = yy*10
hidden_dim = 10
n_output = 5
n_products = xx.size(2)
model = MQRNN(device=device, n_products=n_products, n_layers=1,
output_horizon=n_output, hidden_dim=hidden_dim)
model.to(device)
ADAM = torch.optim.Adam(model.parameters(), lr=0.0126) # lr=100.0126
n_epochs = 1600
for epoch in range(n_epochs):
pred = model(xx)
loss = quantile_loss(pred, yy)
# ADAM.zero_grad()
# loss.backward()
# ADAM.step()
pred = model(xx_2)
loss += quantile_loss(pred, yy_2)
ADAM.zero_grad()
loss.backward()
ADAM.step()
if epoch % 100 == 0:
print('Epoch: {}/{}.............'.format(epoch, n_epochs), end=' ')
print("Loss: {:.4f}".format(loss.item()))
我当然有不同的真实数据,但是在此示例中,我的问题看起来很清楚。
第一个问题是模型学习速度非常慢,开始时损失没有很大的下降(正常学习率例如0.0126)。实际上,我需要设置很高的学习率-一开始就要设置100,以使学习更快。如果我使用正常的学习率,结果会相似,但是需要很长时间。
从控制台:
Epoch: 0/1600............. Loss: 6.0000
Epoch: 100/1600............. Loss: 2.7139
Epoch: 200/1600............. Loss: 2.7096
Epoch: 300/1600............. Loss: 2.7103
Epoch: 400/1600............. Loss: 2.7096
Epoch: 500/1600............. Loss: 2.7085
Epoch: 600/1600............. Loss: 2.7090
Epoch: 700/1600............. Loss: 2.7099
Epoch: 800/1600............. Loss: 2.7082
Epoch: 900/1600............. Loss: 2.7098
Epoch: 1000/1600............. Loss: 2.7119
Epoch: 1100/1600............. Loss: 2.7107
Epoch: 1200/1600............. Loss: 2.7097
Epoch: 1300/1600............. Loss: 2.7105
Epoch: 1400/1600............. Loss: 2.7088
Epoch: 1500/1600............. Loss: 2.7108
第二个问题,更重要问题不影响输入数据的结构。 我给出了两个张量,其中第二个是第10次作为输入数据,并且您可以看到网络达到某个局部最小值,从而为每个输入找到相同的输出。
model(xx)
tensor([[[[5665.4424, 5699.3140, 6114.7104],
[ 215.9684, 236.8552, 655.0918]],
[[5719.0410, 5759.0107, 6109.4790],
[ 121.5060, 241.7687, 653.9695]],
[[5693.7803, 5769.5596, 6224.1328],
[ 237.4870, 241.9645, 652.4545]],
[[5752.0298, 5783.4038, 6215.9785],
[ 183.8642, 209.5887, 656.7449]],
[[5815.0850, 5814.8887, 6209.8477],
[ 120.6066, 148.9004, 677.6846]]]], grad_fn=<PermuteBackward>)
model(xx_2)
tensor([[[[5665.4424, 5699.3140, 6114.7104],
[ 215.9684, 236.8552, 655.0918]],
[[5719.0410, 5759.0107, 6109.4790],
[ 121.5060, 241.7687, 653.9695]],
[[5693.7803, 5769.5596, 6224.1328],
[ 237.4870, 241.9645, 652.4545]],
[[5752.0298, 5783.4038, 6215.9785],
[ 183.8642, 209.5887, 656.7449]],
[[5815.0850, 5814.8887, 6209.8477],
[ 120.6066, 148.9004, 677.6846]]]], grad_fn=<PermuteBackward>)
yy
tensor([[[5700., 241.],
[5743., 231.],
[5798., 231.],
[5782., 221.],
[5834., 150.]]])
yy_2
tensor([[[57000., 2410.],
[57430., 2310.],
[57980., 2310.],
[57820., 2210.],
[58340., 1500.]]])
如何解决此问题以获得合理的预测?
答案 0 :(得分:0)
您找到解决方案了吗? 我在这里找到了可行的实现:https://github.com/awslabs/gluon-ts/blob/0d963f7dc55ef866d86e33633a28d57dfab33adb/src/gluonts/model/seq2seq/_mq_dnn_estimator.py 但会喜欢PyTorch更简单的打火机/版本。