我正在尝试使用以下代码将每个训练过的模型的最终权重添加到列表中:
%reset -f
import torch
import torch.nn as nn
import torchvision
import torchvision.transforms as transforms
import torch
import torch.nn as nn
import torchvision
import torchvision.transforms as transforms
import torch.utils.data as data_utils
import numpy as np
import matplotlib.pyplot as plt
from sklearn.datasets import make_moons
from matplotlib import pyplot
from pandas import DataFrame
import torchvision.datasets as dset
import os
import torch.nn.functional as F
import time
import random
import pickle
from sklearn.metrics import confusion_matrix
import pandas as pd
import sklearn
trans = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.5,), (1.0,))])
root = './data'
if not os.path.exists(root):
os.mkdir(root)
train_set = dset.MNIST(root=root, train=True, transform=trans, download=True)
test_set = dset.MNIST(root=root, train=False, transform=trans, download=True)
batch_size = 64
train_loader = torch.utils.data.DataLoader(
dataset=train_set,
batch_size=batch_size,
shuffle=True)
test_loader = torch.utils.data.DataLoader(
dataset=test_set,
batch_size=batch_size,
shuffle=True)
class NeuralNet(nn.Module):
def __init__(self):
super(NeuralNet, self).__init__()
self.fc1 = nn.Linear(28*28, 500)
self.fc2 = nn.Linear(500, 256)
self.fc3 = nn.Linear(256, 2)
def forward(self, x):
x = x.view(-1, 28*28)
x = F.relu(self.fc1(x))
x = F.relu(self.fc2(x))
x = self.fc3(x)
return x
num_epochs = 2
random_sample_size = 200
values_0_or_1 = [t for t in train_set if (int(t[1]) == 0 or int(t[1]) == 1)]
values_0_or_1_testset = [t for t in test_set if (int(t[1]) == 0 or int(t[1]) == 1)]
print(len(values_0_or_1))
print(len(values_0_or_1_testset))
train_loader_subset = torch.utils.data.DataLoader(
dataset=values_0_or_1,
batch_size=batch_size,
shuffle=True)
test_loader_subset = torch.utils.data.DataLoader(
dataset=values_0_or_1_testset,
batch_size=batch_size,
shuffle=False)
train_loader = train_loader_subset
# Hyper-parameters
input_size = 100
hidden_size = 100
num_classes = 2
# learning_rate = 0.00001
learning_rate = .0001
# Device configuration
device = 'cpu'
print_progress_every_n_epochs = 1
model = NeuralNet().to(device)
# Loss and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
N = len(train_loader)
# Train the model
total_step = len(train_loader)
most_recent_prediction = []
test_actual_predicted_dict = {}
rm = random.sample(list(values_0_or_1), random_sample_size)
train_loader_subset = data_utils.DataLoader(rm, batch_size=4)
weights_without_clone = []
weights_with_clone = []
for i in range(0 , 2) :
for epoch in range(num_epochs):
for i, (images, labels) in enumerate(train_loader_subset):
# Move tensors to the configured device
images = images.reshape(-1, 2).to(device)
labels = labels.to(device)
# Forward pass
outputs = model(images)
loss = criterion(outputs, labels)
# Backward and optimize
optimizer.zero_grad()
loss.backward()
optimizer.step()
if (epoch) % print_progress_every_n_epochs == 0:
print ('Epoch [{}/{}], Step [{}/{}], Loss: {:.4f}'.format(epoch+1, num_epochs, i+1, total_step, loss.item()))
print('model fc2 weights ' , model.fc2.weight.data)
weights_without_clone.append(model.fc2.weight.data)
weights_with_clone.append(model.fc2.weight.data.clone())
模型输出:
12665
2115
Epoch [1/2], Step [50/198], Loss: 0.0968
Epoch [2/2], Step [50/198], Loss: 0.0082
model fc2 weights tensor([[-3.9507e-02, -4.0454e-02, 3.5576e-03, ..., 6.2181e-03,
4.1372e-02, -6.2960e-03],
[ 1.8778e-02, 2.7049e-02, -3.5624e-02, ..., 2.6797e-02,
2.2041e-03, -4.2284e-02],
[ 1.9571e-02, -3.2545e-02, 2.6618e-02, ..., -1.6139e-02,
4.1192e-02, -2.3458e-02],
...,
[-4.6123e-03, 2.6943e-02, 3.9979e-02, ..., -3.3848e-02,
3.6096e-02, 2.4211e-02],
[-1.4698e-02, 9.7528e-04, -2.5244e-03, ..., -3.3145e-02,
1.0888e-02, 3.1091e-02],
[-1.7451e-02, -2.1646e-02, 2.5885e-02, ..., 4.0453e-02,
-6.5324e-03, -3.5410e-02]])
Epoch [1/2], Step [50/198], Loss: 0.0025
Epoch [2/2], Step [50/198], Loss: 0.0013
model fc2 weights tensor(1.00000e-02 *
[[-3.9891, -4.0454, 0.3558, ..., 0.7168, 4.1902, -0.6253],
[ 1.8766, 2.7049, -3.5632, ..., 2.6785, 0.2192, -4.2297],
[ 2.1426, -3.2545, 2.6621, ..., -1.6285, 4.1196, -2.2653],
...,
[-0.4930, 2.6943, 3.9971, ..., -3.2940, 3.6641, 2.4248],
[-1.5160, 0.0975, -0.2524, ..., -3.1938, 1.1753, 3.1065],
[-1.8116, -2.1646, 2.5883, ..., 4.1355, -0.5921, -3.5416]])
打印weights_without_clone
的值:
print(weights_without_clone[0])
print(weights_without_clone[1])
输出:
tensor(1.00000e-02 *
[[-3.9891, -4.0454, 0.3558, ..., 0.7168, 4.1902, -0.6253],
[ 1.8766, 2.7049, -3.5632, ..., 2.6785, 0.2192, -4.2297],
[ 2.1426, -3.2545, 2.6621, ..., -1.6285, 4.1196, -2.2653],
...,
[-0.4930, 2.6943, 3.9971, ..., -3.2940, 3.6641, 2.4248],
[-1.5160, 0.0975, -0.2524, ..., -3.1938, 1.1753, 3.1065],
[-1.8116, -2.1646, 2.5883, ..., 4.1355, -0.5921, -3.5416]])
tensor(1.00000e-02 *
[[-3.9891, -4.0454, 0.3558, ..., 0.7168, 4.1902, -0.6253],
[ 1.8766, 2.7049, -3.5632, ..., 2.6785, 0.2192, -4.2297],
[ 2.1426, -3.2545, 2.6621, ..., -1.6285, 4.1196, -2.2653],
...,
[-0.4930, 2.6943, 3.9971, ..., -3.2940, 3.6641, 2.4248],
[-1.5160, 0.0975, -0.2524, ..., -3.1938, 1.1753, 3.1065],
[-1.8116, -2.1646, 2.5883, ..., 4.1355, -0.5921, -3.5416]])
打印weights_with_clone
的值:
print(weights_with_clone[0])
print(weights_with_clone[1])
输出:
tensor([[-3.9507e-02, -4.0454e-02, 3.5576e-03, ..., 6.2181e-03,
4.1372e-02, -6.2960e-03],
[ 1.8778e-02, 2.7049e-02, -3.5624e-02, ..., 2.6797e-02,
2.2041e-03, -4.2284e-02],
[ 1.9571e-02, -3.2545e-02, 2.6618e-02, ..., -1.6139e-02,
4.1192e-02, -2.3458e-02],
...,
[-4.6123e-03, 2.6943e-02, 3.9979e-02, ..., -3.3848e-02,
3.6096e-02, 2.4211e-02],
[-1.4698e-02, 9.7528e-04, -2.5244e-03, ..., -3.3145e-02,
1.0888e-02, 3.1091e-02],
[-1.7451e-02, -2.1646e-02, 2.5885e-02, ..., 4.0453e-02,
-6.5324e-03, -3.5410e-02]])
tensor(1.00000e-02 *
[[-3.9891, -4.0454, 0.3558, ..., 0.7168, 4.1902, -0.6253],
[ 1.8766, 2.7049, -3.5632, ..., 2.6785, 0.2192, -4.2297],
[ 2.1426, -3.2545, 2.6621, ..., -1.6285, 4.1196, -2.2653],
...,
[-0.4930, 2.6943, 3.9971, ..., -3.2940, 3.6641, 2.4248],
[-1.5160, 0.0975, -0.2524, ..., -3.1938, 1.1753, 3.1065],
[-1.8116, -2.1646, 2.5883, ..., 4.1355, -0.5921, -3.5416]])
为什么1.00000e-02 *
放在第二个模型的最终权重值之前?
为什么要使用clone()
来为每次迭代添加最终权重,而忽略clone()
则将相同的权重添加到每次迭代中? :
weights_without_clone.append(model.fc2.weight.data)
weights_with_clone.append(model.fc2.weight.data.clone())
答案 0 :(得分:1)
首先,我将重述您的情况。我将使用非常简单的模型:
代码:
import torch
import torch.nn as nn
import torch.optim as optim
torch.manual_seed(42)
# Some dummy data:
X = torch.randn(100, 5, requires_grad=True, dtype=torch.float)
Y = torch.randn(100, 5, requires_grad=True, dtype=torch.float)
class Model(nn.Module):
def __init__(self):
super().__init__()
self.fc1 = nn.Linear(5, 5, bias=False)
self.relu = nn.ReLU()
self.fc2 = nn.Linear(5, 5, bias=False)
def forward(self, x):
x = self.fc1(x)
x = self.relu(x)
x = self.fc2(x)
return x
def train(model, x, y, loss_fn, optimizer, n_epochs=1000, print_loss=True):
weights = []
for i in range(n_epochs):
y_hat = model(x)
loss = loss_fn(y_hat, y)
optimizer.zero_grad()
loss.backward()
if print_loss:
print(f'| {i+1} | Loss: {loss.item():.4f}')
optimizer.step()
print('W:\n', model.fc2.weight.data)
weights.append(model.fc2.weight.data)
return weights
torch.manual_seed(42)
model = Model()
loss_fn = nn.MSELoss()
optimizer = torch.optim.SGD(model.parameters(), lr=0.01)
n_epochs = 2
weights = train(model=model,
x=X,
y=Y,
loss_fn=loss_fn,
optimizer=optimizer,
n_epochs=n_epochs,
print_loss=True)
输出:
| 1 | Loss: 1.0285
W:
tensor([[-0.2052, -0.1257, -0.2684, 0.0425, -0.4413],
[ 0.4034, -0.3797, 0.3448, 0.0741, -0.1450],
[ 0.2759, 0.0695, 0.3608, 0.0487, -0.1411],
[ 0.1201, -0.1213, 0.1881, 0.3990, 0.2583],
[-0.1956, 0.2581, 0.0798, 0.2270, -0.2725]])
| 2 | Loss: 1.0279
W:
tensor([[-0.2041, -0.1251, -0.2679, 0.0428, -0.4410],
[ 0.4030, -0.3795, 0.3444, 0.0738, -0.1447],
[ 0.2755, 0.0693, 0.3603, 0.0484, -0.1411],
[ 0.1200, -0.1213, 0.1879, 0.3987, 0.2580],
[-0.1958, 0.2580, 0.0796, 0.2269, -0.2725]])
好的,效果很好。现在让我们看一下weights
:
代码:
print(*weights, sep='\n')
输出:
tensor([[-0.2041, -0.1251, -0.2679, 0.0428, -0.4410],
[ 0.4030, -0.3795, 0.3444, 0.0738, -0.1447],
[ 0.2755, 0.0693, 0.3603, 0.0484, -0.1411],
[ 0.1200, -0.1213, 0.1879, 0.3987, 0.2580],
[-0.1958, 0.2580, 0.0796, 0.2269, -0.2725]])
tensor([[-0.2041, -0.1251, -0.2679, 0.0428, -0.4410],
[ 0.4030, -0.3795, 0.3444, 0.0738, -0.1447],
[ 0.2755, 0.0693, 0.3603, 0.0484, -0.1411],
[ 0.1200, -0.1213, 0.1879, 0.3987, 0.2580],
[-0.1958, 0.2580, 0.0796, 0.2269, -0.2725]])
好吧,这不是我们想要的,但是实际上这是预期的行为。如果再看一次,您将看到列表中的值对应于第二个时期的权重值。这意味着我们没有附加新的张量,而是指向实际权重存储的赋值,这就是为什么我们只得到相同的最终结果。
换句话说,使用常规附加时会获得相同的值,因为渐变仍会传播到原始权重张量。附加的“权重张量”指向模型在反向传播期间发生变化的相同张量。
这就是为什么您需要使用clone
创建新的张量但的原因,建议使用tensor.clone().detach()
,而clone
记录在计算图上,这意味着如果您通过此克隆张量反向传播,
传播到克隆张量的渐变将传播到原始张量。 clone docs
因此,如果您想安全地添加体重,请使用以下方法:
weights.append(model.fc2.weight.data.clone().detach())