我正在使用Pytorch使用LSTM单元实现递归神经网络。 目标是预测多元时间序列。网络采用了来自同一过程的不同运行的数据窗口:
这是我实施网络的方式:
for t in range(nepochs):
# Sample a random number or runs from all the available runs and take this as train set
exp_number = random.sample([i for i in range(int(train_set * nexp))], nbatch)
X_train = np.zeros((time_points, nbatch, nx))
Y_train = np.zeros((time_points, nbatch, ny))
for k, ex in enumerate(exp_number):
X_train[:, k, :] = train_input[:, ex, :]
Y_train[:, k, :] = train_output[:, ex, :]
X_train = torch.tensor(X_train, device=model.device, dtype=model.dtype)
Y_train = torch.tensor(Y_train, device=model.device, dtype=model.dtype)
# initialize hidden states at 0
h = torch.zeros(nbatch, nhidden, device=model.device, dtype=model.dtype)
c = torch.zeros(nbatch, nhidden, device=model.device, dtype=model.dtype)
h_val = torch.zeros(nbatch_val, nhidden, device=model.device, dtype=model.dtype)
c_val = torch.zeros(nbatch_val, nhidden, device=model.device, dtype=model.dtype)
# loop over horizons of length num_seq through the whole horizon
offset = time_window
for ii, i in enumerate(range(0, time_points - time_window, offset)):
# propagate the features through the batch to get the predictions
model.zero_grad()
loss_train = 0
loss_val = 0
r2 = 0
# within the horizon we estimate the predicted values and penalize for them
for j in range(time_window):
Y_pred, (h, c) = model(X_train[i + j, :, :], (h, c), train=True)
loss_train += MyLoss(Y_pred, Y_train[i + j, :, :])
with torch.no_grad():
Y_val_pred, (h_val, c_val) = model(X_test[i + j, :, :], (h_val, c_val))
loss_val += MyLoss(Y_test_val, Y_val[i + j, :, :])
y_train_pred_array[i + j, :, :] = Y_pred.detach().numpy()
# Use autograd to compute the backward pass
optimizer.zero_grad()
loss_train.backward()
optimizer.step()
h = h.detach()
c = c.detach()
loss_array_train.append(loss_train.item()/nbatch)
loss_array_test.append(loss_val.item()/nbatch_test)
print('==> Training ... {:.1f}%'.format((t + 1) / nepochs * 100))
print('==> Loss Train... {:.4f}'.format(loss_train.item()/nbatch))
print('==> Loss Validation... {:.4f}'.format(loss_val.item()/nbatch_test))
这是LSTM类:
class MyLSTM(nn.Module):
def __init__(self, ni=6, no=3, nh=10, nlayers=1):
super(MyLSTM, self).__init__()
self.ni = ni
self.no = no
self.nh = nh
self.nlayers = nlayers
self.lstms = nn.ModuleList(
[nn.LSTMCell(self.ni, self.nh)] + [nn.LSTMCell(self.nh, self.nh) for i in range(nlayers - 1)])
self.out = nn.Linear(self.nh, self.no)
self.do = nn.Dropout(p=0.2)
self.actfn = nn.Tanh()
self.device = torch.device('cpu')
self.dtype = torch.float
# description of the whole block
def forward(self, x, h0, train=False):
hs = x # initiate hidden state
if h0 is None:
h = torch.zeros(hs.shape[0], self.nh, device=device)
c = torch.zeros(hs.shape[0], self.nh, device=device)
else:
(h, c) = h0
# LSTM cells
for i in range(self.nlayers):
h, c = self.lstms[i](hs, (h, c))
if train:
hs = self.do(h)
else:
hs = h
# Output layer
y = self.actfn(self.out(hs))
return y, (h, c)
我想我在验证集丢失的计算上做错了,因为验证集比测试集大20倍。
LSTM的Pytorch示例非常稀少,我找不到计算验证损失的示例。我显示的方式正确吗?