我想获得潜在空间中矩阵乘法的乘积,并通过优化器优化权重矩阵。我使用不同的方式来做到这一点。同时,以下代码中的“ pi_”值永远不变。我该怎么办?
我尝试了不同的功能来获取产品,例如torch.mm(),torch.matual()和@。权重矩阵“ pi_”从未改变。
import torch
from torch.utils.data import DataLoader
from torch.utils.data import TensorDataset
#from torchvision import transforms
from torchvision.datasets import MNIST
def get_mnist(data_dir='./data/mnist/',batch_size=128):
train=MNIST(root=data_dir,train=True,download=True)
test=MNIST(root=data_dir,train=False,download=True)
X=torch.cat([train.data.float().view(-1,784)/255.,test.data.float().view(-1,784)/255.],0)
Y=torch.cat([train.targets,test.targets],0)
dataset=dict()
dataset['X']=X
dataset['Y']=Y
dataloader=DataLoader(TensorDataset(X,Y),batch_size=batch_size,shuffle=True)
return dataloader
class tests(torch.nn.Module):
def __init__(self):
super(tests, self).__init__()
self.pi_= torch.nn.Parameter(torch.FloatTensor(10, 1).fill_(1),requires_grad=True)
self.linear0 = torch.nn.Linear(784,10)
self.linear1 = torch.nn.Linear(1,784)
def forward(self, data):
data = torch.nn.functional.relu(self.linear0(data))
# data = data.mm(self.pi_)
# data = torch.mm(data, self.pi_)
# data = data @ self.pi_
data = torch.matmul(data, self.pi_)
data = torch.nn.functional.relu(self.linear1(data))
return data
if __name__ == '__main__':
DL=get_mnist()
t = tests().cuda()
optimizer = torch.optim.Adam(t.parameters(), lr = 2e-3)
for i in range(100):
for inputs, classes in DL:
inputs = inputs.cuda()
res = t(inputs)
loss = torch.nn.functional.mse_loss(res, inputs)
optimizer.zero_grad()
loss.backward()
optimizer.step()
print("Epoch:", i,"pi:",t.pi_)
答案 0 :(得分:1)
TL; DR 您的神经网络中有太多参数,其中一些变得无用,因此不再更新。更改网络体系结构以减少无用的参数。
完整说明:
权重矩阵pi_
确实发生了变化。您将pi_
初始化为全部1
,在运行了第一个历元之后,权重矩阵pi_
变为
output >>>
tensor([[0.9879],
[0.9874],
[0.9878],
[0.9880],
[0.9876],
[0.9878],
[0.9878],
[0.9873],
[0.9877],
[0.9871]], device='cuda:0', requires_grad=True)
因此,它只更改了一次。其背后的真正原因涉及一些数学。但是以非数学的方式来表示它意味着该层对丢失的影响不大,因此,网络决定不更新此层。即该网络中pi_
的存在是多余的。
如果要观察pi_
中的变化,则应修改神经网络,以使pi_
不再多余。
一种可能的修改是将您的重构问题更改为分类问题
import torch
from torch.utils.data import DataLoader
from torch.utils.data import TensorDataset
#from torchvision import transforms
from torchvision.datasets import MNIST
def get_mnist(data_dir='./data/mnist/',batch_size=128):
train=MNIST(root=data_dir,train=True,download=True)
test=MNIST(root=data_dir,train=False,download=True)
X=torch.cat([train.data.float().view(-1,784)/255.,test.data.float().view(-1,784)/255.],0)
Y=torch.cat([train.targets,test.targets],0)
dataset=dict()
dataset['X']=X
dataset['Y']=Y
dataloader=DataLoader(TensorDataset(X,Y),batch_size=batch_size,shuffle=True)
return dataloader
class tests(torch.nn.Module):
def __init__(self):
super(tests, self).__init__()
# self.pi_= torch.nn.Parameter(torch.randn((10, 1),requires_grad=True))
self.pi_= torch.nn.Parameter(torch.FloatTensor(10, 1).fill_(1),requires_grad=True)
self.linear0 = torch.nn.Linear(784,10)
# self.linear1 = torch.nn.Linear(1,784)
def forward(self, data):
data = torch.nn.functional.relu(self.linear0(data))
# data = data.mm(self.pi_)
# data = torch.mm(data, self.pi_)
# data = data @ self.pi_
data = torch.matmul(data, self.pi_)
# data = torch.nn.functional.relu(self.linear1(data))
return data
if __name__ == '__main__':
DL=get_mnist()
t = tests().cuda()
optimizer = torch.optim.Adam(t.parameters(), lr = 2e-3)
for i in range(100):
for inputs, classes in DL:
inputs = inputs.cuda()
classes = classes.cuda().float()
output = t(inputs)
loss = torch.nn.functional.mse_loss(output.view(-1), classes)
optimizer.zero_grad()
loss.backward()
optimizer.step()
# print("Epoch:", i, "pi_grad", t.pi_.grad)
print("Epoch:", i,"pi:",t.pi_)
现在,pi_
会更改每个纪元。
output >>>
Epoch: 0 pi: Parameter containing:
tensor([[1.3429],
[1.0644],
[0.9817],
[0.9767],
[0.9715],
[1.1110],
[1.1139],
[0.9759],
[1.2424],
[1.2632]], device='cuda:0', requires_grad=True)
Epoch: 1 pi: Parameter containing:
tensor([[1.4413],
[1.1977],
[0.9588],
[1.0325],
[0.9241],
[1.1988],
[1.1690],
[0.9248],
[1.2892],
[1.3427]], device='cuda:0', requires_grad=True)
Epoch: 2 pi: Parameter containing:
tensor([[1.4653],
[1.2351],
[0.9539],
[1.1588],
[0.8670],
[1.2739],
[1.2058],
[0.8648],
[1.2848],
[1.3891]], device='cuda:0', requires_grad=True)
Epoch: 3 pi: Parameter containing:
tensor([[1.4375],
[1.2256],
[0.9580],
[1.2293],
[0.8174],
[1.3471],
[1.2035],
[0.8102],
[1.2505],
[1.4201]], device='cuda:0', requires_grad=True)