我首先在pytorch中构建了以下简单模型,但我发现大小不匹配错误没有意义,因为out_feat始终等于in_feat对于下一层...
class Network(nn.Module):
def __init__(self):
super(Network,self).__init__()
#first linear block
self.fc1=nn.Linear(32,1024)
self.b1=nn.BatchNorm1d(1024)
#Two Linear 1
self.fc2=nn.Linear(1024,1024)
self.b2=nn.BatchNorm1d(1024)
self.fc3=nn.Linear(1024,1024)
self.b3=nn.BatchNorm1d(1024)
#Two Linear 2
self.fc4=nn.Linear(1024,1024)
self.b4=nn.BatchNorm1d(1024)
self.fc5=nn.Linear(1024,1024)
self.b5=nn.BatchNorm1d(1024)
#Final Linear Layer
self.fc6=nn.Linear(1024,48)
def forward(self,x):
x1=self.fc1(x)
x1=self.b1(x1)
x1=nn.ReLU(x1)
x2=self.fc2(x1)
x2=self.b2(x2)
x2=nn.ReLU(x2)
x2=self.fc3(x2)
x2=self.b3(x2)
x2=nn.ReLU(x2)
x3=x1+x2
x4=self.fc4(x3)
x4=self.b4(x4)
x4=nn.ReLU(x4)
x4=self.fc5(x4)
x4=self.b5(x4)
x4=nn.ReLU(x4)
x5=x3+x4
x6=self.fc6(x5)
return x6
model=Network()
zeros=np.zeros((1,32))
outputs=model(torch.FloatTensor(zeros))
RuntimeError: size mismatch, m1: [1 x 32], m2: [1024 x 32] at ..\aten\src\TH/generic/THTensorMath.cpp:41
我不明白当所有尺寸都匹配时,怎么会出现此错误?
=================================================================
Layer (type:depth-idx) Param #
=================================================================
├─Linear: 1-1 33,792
├─BatchNorm1d: 1-2 4,096
├─Linear: 1-3 1,049,600
├─BatchNorm1d: 1-4 4,096
├─Linear: 1-5 1,049,600
├─BatchNorm1d: 1-6 4,096
├─Linear: 1-7 1,049,600
├─BatchNorm1d: 1-8 4,096
├─Linear: 1-9 1,049,600
├─BatchNorm1d: 1-10 4,096
├─Linear: 1-11 49,200
=================================================================
Total params: 4,301,872
Trainable params: 4,301,872
Non-trainable params: 0
这是模型摘要
答案 0 :(得分:2)
当批量大小大于1时,批量归一化有效,因此形状(1, 32)
的输入将不起作用。尝试更大的批处理大小,例如2。
此外,您正在尝试使用x = nn.ReLU(x)
形式的ReLU。这是错误的,因为nn.ReLU是一层。这行代码返回ReLU层本身,而不是张量。在您的 init 方法中定义nn.ReLU()
层,或者使用F.relu(x)
或nn.ReLU()(x)
。像这样:
import torch
from torch import nn
import torch.nn.functional as F
class Network(nn.Module):
def __init__(self):
super(Network,self).__init__()
#first linear block
self.fc1=nn.Linear(32,1024)
self.b1=nn.BatchNorm1d(1024)
#Two Linear 1
self.fc2=nn.Linear(1024,1024)
self.b2=nn.BatchNorm1d(1024)
self.fc3=nn.Linear(1024,1024)
self.b3=nn.BatchNorm1d(1024)
#Two Linear 2
self.fc4=nn.Linear(1024,1024)
self.b4=nn.BatchNorm1d(1024)
self.fc5=nn.Linear(1024,1024)
self.b5=nn.BatchNorm1d(1024)
#Final Linear Layer
self.fc6=nn.Linear(1024,48)
def forward(self,x):
x1=self.fc1(x)
x1=self.b1(x1)
x1=F.relu(x1)
x2=self.fc2(x1)
x2=self.b2(x2)
x2=F.relu(x2)
x2=self.fc3(x2)
x2=self.b3(x2)
x2=F.relu(x2)
x3=x1+x2
x4=self.fc4(x3)
x4=self.b4(x4)
x4=F.relu(x4)
x4=self.fc5(x4)
x4=self.b5(x4)
x4=F.relu(x4)
x5=x3+x4
x6=self.fc6(x5)
return x6
model=Network()
zeros=torch.zeros((10, 32))
outputs=model(zeros)
print(outputs.shape)
# torch.Size([10, 48])