我正在使用Pytorch解决一个简单的回归问题。我正在尝试做的事情似乎有些棘手。所以我有两个模型,一个粗糙模型和一个差异模型。我知道我的简单回归问题只能使用一个模型来解决,但是我正在研究如何减少神经网络回归模型错误,这就是为什么我使用这种复杂的体系结构。该架构由两个MLP组成,第一个称为粗模型,用于学习输入输出关系,它实际上应该足以学习和拟合数据并做出良好的预测,但是我要添加一个额外的神经网络,称为差异模型它应该学习输入错误关系,并将这些额外的知识加回到粗略模型中。 我知道要理解它可能很复杂,因此我将在此处编写一些架构工作方式的步骤:
1。训练粗略模型
2.得到粗略模型的预测,我们称其为y_coarse
3。计算粗略的模型误差(不是我所说的MSE 在此计算模型预测的偏差 true目标值),换句话说,我正在y_true-
4。训练差异模型(输入与粗略模型相同的输入,但是这里的输出是我在第3步中计算出的误差
5。获取差异模型的预测,我们将其称为y_diff
6。最终预测为y_coarse + y_diff
所有这些的目的是通过将差模型学习到的偏差添加到粗模型的预测中,最终获得更好的预测。优点是这样的体系结构可以确保对嘈杂的数据进行更好的预测,但是我在Pytorch中实现此问题。粗模型的训练很好,但是我得到了很好的结果,但是差模型的训练给出了更差的结果,差模型nn不适合误差数据。我试图调整超参数,但没有机会。这是我的代码:from sklearn.datasets import make_regression
from sklearn.model_selection import train_test_split
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data import TensorDataset, DataLoader
# create random dataset for regression
dataset = make_regression(n_samples=1000, n_features=1, noise=3, random_state=0)
X, y = dataset
# implementation using pytorch
# transform data to Tensors and split testset and validation set
y = y.reshape(-1, 1)
print(X.shape, y.shape)
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42, test_size=0.2)
X_train_torch = torch.from_numpy(X_train.astype(np.float32))
y_train_torch = torch.from_numpy(y_train.astype(np.float32))
X_test_torch = torch.from_numpy(X_test.astype(np.float32))
y_test_torch = torch.from_numpy(y_test.astype(np.float32))
# normalize data
X_train_torch = (X_train_torch - torch.mean(X_train_torch)) / torch.std(X_train_torch)
X_test_torch = (X_test_torch - torch.mean(X_test_torch)) / torch.std(X_test_torch)
print(X_train_torch.shape, y_train_torch.shape)
# create a TensorDataset from the actual dataset
coarse_trainset = TensorDataset(X_train_torch, y_train_torch)
coarse_testset = TensorDataset(X_test_torch, y_test_torch)
# create the loader of the dataset
coarse_train_loader = DataLoader(coarse_trainset, batch_size=32, shuffle=True)
coarse_test_loader = DataLoader(coarse_testset, batch_size=32, shuffle=False)
# definition of the Coarse Model
class CoarseModel(nn.Module):
"""main neural network to learn input/position relationship"""
def __init__(self, n_features, n_hidden, n_out):
super().__init__()
self.fc1 = nn.Linear(n_features, n_hidden)
self.fc2 = nn.Linear(n_hidden, n_out)
def forward(self, x):
out = F.relu(self.fc1(x))
out = self.fc2(out)
return out
# definition of the Difference Model
class DifferenceModel(nn.Module):
"""difference model to enhance accuracy via predicting input/error relationship"""
def __init__(self, n_features, n_hidden, n_out):
super().__init__()
self.fc1 = nn.Linear(n_features, n_hidden)
self.fc2 = nn.Linear(n_hidden, n_out)
def forward(self, x):
out = F.relu(self.fc1(x))
out = self.fc2(out)
return out
coarse_model = CoarseModel(X.shape[1], 10, 1)
# training function => will perform a single training step given a batch
def training_func(model, criterion, optimizer):
def train_step(sample, target):
model.train()
optimizer.zero_grad()
out = model(sample)
loss = criterion(out, target)
loss.backward()
optimizer.step()
return loss.item()
return train_step
# fit function will train the given Model over defined epochs
def fit(epochs, model, lr, train_loader, test_loader):
criterion = nn.MSELoss()
optimizer = optim.SGD(model.parameters(), lr=lr, momentum=0.9)
train_losses, test_losses, accuracy_list = ([] for _ in range(3))
train = training_func(model=model, criterion=criterion, optimizer=optimizer)
for e in range(epochs):
for x, y in train_loader:
loss_val = train(x, y)
train_losses.append(loss_val)
else:
with torch.no_grad():
model.eval()
for x_test, y_test in test_loader:
pred = model(x_test)
loss = criterion(pred, y_test)
test_losses.append(loss.item())
accuracy = r2_score(y_test.numpy(), pred.numpy())
accuracy_list.append(accuracy)
print(f"Epoch: {e} => average training loss= {sum(train_losses)/ len(train_losses)} average test losses= {sum(test_losses)/ len(test_losses)} and accuracy={sum(accuracy_list)/ len(accuracy_list)}")
return train_losses, test_losses, accuracy_list
# plot the results of the training and testing loss values over the iterations
def plot_results(train_losses, test_losses, accs):
plt.plot(train_losses, color="b")
plt.title("train losses over time")
plt.show()
plt.plot(test_losses, color= "b")
plt.title("test losses over time")
plt.show()
plt.plot(accs, color= "b")
plt.title("accuracy over time")
plt.show()
# train the Coarse Model
train_losses, test_losses, accs = fit(epochs=1000,
model=coarse_model,
lr=1e-4,
train_loader=coarse_train_loader,
test_loader=coarse_test_loader)
plot_results(train_losses, test_losses, accs)
print("final train loss value = ", train_losses[-1])
print("final test loss value = ", test_losses[-1])
print("final accuracy of the model => ", accs[-1])
#### and now I train the difference Model, My problem is that this Model is not fitting the data #####
# training process of the Difference Model: => the desired value is the difference between the desired output and the the coarse Model Predictions
diff_model = DifferenceModel(X.shape[1], 10, 1)
y_coarse = coarse_model(X_train_torch)
y_coarse = y_coarse.detach() # y_coarse: predictions of the Coarse Model
d_coarse = y_train_torch.detach() # d_coarse: desired Coarse Model output
d_diff_train = d_coarse - y_coarse # d_diff: desired difference Model output
y_coarse_test = coarse_model(X_test_torch)
y_coarse_test = y_coarse_test.detach() # y_coarse: predictions of the Coarse Model
d_coarse_test = y_test_torch.detach() # d_coarse: desired Coarse Model output
d_diff_test = d_coarse_test - y_coarse_test # d_diff: desired difference Model output
diff_trainset = TensorDataset(X_train_torch, d_diff_train)
diff_testset = TensorDataset(X_test_torch, d_diff_test)
diff_train_loader = DataLoader(diff_trainset, batch_size=32, shuffle=True)
diff_test_loader = DataLoader(diff_testset, batch_size=32, shuffle=False)
train_losses, test_losses, accs = fit(epochs=1000, model=diff_model, lr=1e-3, train_loader=diff_train_loader, test_loader=diff_test_loader)
plot_results(train_losses, test_losses, accs)
print("final train loss value = ", train_losses[-1])
print("final test loss value = ", test_losses[-1])
print("final accuracy = ", accs[-1])
我还使用此代码来测试整个模型,并将整个预测与粗略模型预测进行比较:
with torch.no_grad():
coarse_model.eval()
diff_model.eval()
criterion = nn.MSELoss()
coarse_preds = coarse_model(X_test_torch)
diff_model_preds = diff_model(X_test_torch)
y_final = coarse_preds + diff_model_preds
coarse_loss = criterion(coarse_preds, y_test_torch)
# diff_model_loss = criterion(diff_model_preds, y_test_torch)
y_final_loss = criterion(y_final, y_test_torch)
print(f"coarse Model loss = {coarse_loss.item()} / Final Loss = {y_final_loss.item()}")
coarse_score = r2_score(y_test_torch.detach().numpy(), coarse_preds.numpy())
y_final_score = r2_score(y_test_torch.detach().numpy(), y_final.numpy())
print(f"coarse Model score = {coarse_score} / y_final score = {y_final_score}")
您可以在google colab上运行代码,以便获得我想做的事情。奇怪的是,如果我注释了random_model,则差值模型将能够拟合数据并给出良好的预测,但是当我取消注释coarse_model时,差值模型将失去拟合数据的能力。如您所见,我在一开始就对数据进行了标准化,所以我不知道为什么会这样。当我单独使用每个模型时,它就像一个咒语,但是如果我像这样组合它们,那么差异模型将无法容纳数据。有人知道为什么吗?我希望我对这个问题的解释足够好,我知道要解决这个问题有些棘手,希望这里有人可以帮助我。预先感谢