我试图通过对未标记的数据进行旋转(0、90、180,dn 270度:4个标记)模型的预训练(自我监督学习)来进行转移学习。 这是模型:
class RotNet1(nn.Module):
def __init__(self):
keep_prob = 0.9
super(RotNet1, self).__init__()
self.layer1 = nn.Sequential(nn.Conv2d(in_channels = 3,
out_channels = 80,
kernel_size = 7,
stride = 1,
padding = 0),
nn.ReLU(),
nn.MaxPool2d(kernel_size = 2,
stride = 2,
padding = 1),
nn.Dropout(p=1 - keep_prob)
)
self.bn1 = nn.BatchNorm2d(num_features = 80)
self.dropout1 = nn.Dropout2d(p=0.02)
self.layer2 = nn.Sequential(nn.Conv2d(in_channels = 80,
out_channels = 128,
kernel_size = 3,
stride = 1,
padding = 1),
nn.ReLU(),
nn.MaxPool2d(kernel_size = 2,
stride = 2,
padding = 1),
nn.Dropout(p=1 - keep_prob)
)
self.bn2 = nn.BatchNorm2d(num_features = 128)
self.layer3 = nn.Sequential(nn.Conv2d(in_channels = 128,
out_channels = 256,
kernel_size = 3,
stride = 1,
padding = 0),
nn.ReLU(),
nn.MaxPool2d(kernel_size = 2,
stride = 2,
padding = 1),
nn.Dropout(p=1 - keep_prob)
)
self.bn3 = nn.BatchNorm2d(num_features = 256)
self.layer4 = nn.Sequential(nn.Conv2d(in_channels = 256,
out_channels = 512,
kernel_size = 3,
stride = 1,
padding = 0),
nn.ReLU(),
nn.MaxPool2d(kernel_size = 2,
stride = 2,
padding = 1),
nn.Dropout(p=1 - keep_prob)
)
self.bn4 = nn.BatchNorm2d(num_features = 512)
self.layer5 = nn.Sequential(nn.Conv2d(in_channels = 512,
out_channels = 512,
kernel_size = 3,
stride = 1,
padding = 0),
nn.ReLU(),
nn.MaxPool2d(kernel_size = 2,
stride = 2,
padding = 1),
nn.Dropout(p=1 - keep_prob)
)
self.bn5 = nn.BatchNorm2d(num_features = 512)
self.drop_out = nn.Dropout()
self.fc1 = nn.Linear(512* 2 * 2, 200)
self.fc2 = nn.Linear(200, 4)
#self.fc3 = nn.Linear(200, 100)
def forward(self, input):
out = self.layer1(input)
out = self.bn1(out)
out = self.dropout1(out)
out = self.layer2(out)
out = self.bn2(out)
out = self.layer3(out)
out = self.bn3(out)
out = self.layer4(out)
out = self.bn4(out)
out = self.layer5(out)
out = self.bn5(out)
out = out.reshape(out.size(0), -1)
out = self.drop_out(out)
out = self.fc1(out)
out = self.fc2(out)
#out = self.fc3(out)
return out
我在这4个标签上训练了该模型,并将模型命名为model_ssl
。然后,我复制了模型,并将最后一个完全连接的层的层数从4更改为200(这是标记的训练和验证集中的标签数,其中示例数受到限制:
model_a = copy.copy(model_ssl)
#model_a.classifier
num_classes = 200
model_a.fc2 = nn.Linear(256,num_classes).cuda()
model_a.to(device)
loss_fn = torch.nn.CrossEntropyLoss()
n_epochs_a = 20
learning_rate_a = 0.01
alpha_a = 1e-5
momentum_a = 0.9
optimizer = torch.optim.SGD(model_a.parameters(),
momentum = momentum_a,
nesterov=True,
weight_decay = alpha_a,
lr=learning_rate_a)
train_losses_a, val_losses_a, train_acc_a, val_acc_a = train(model_a,
train_dataloader_sl,
val_dataloader_sl,
optimizer,
n_epochs_a,
loss_fn)
这是错误消息:
---------------------------------------------------------------------------
RuntimeError Traceback (most recent call last)
<ipython-input-27-f6f362ba8c53> in <module>()
15 optimizer,
16 n_epochs_a,
---> 17 loss_fn)
6 frames
<ipython-input-23-df58f17c5135> in train(model, train_dataloader, val_dataloader, optimizer, n_epochs, loss_function)
57 for epoch in range(n_epochs):
58 model.train()
---> 59 train_loss, train_accuracy = train_epoch(model, train_dataloader, optimizer, loss_fn)
60 model.eval()
61 val_loss, val_accuracy = evaluate(model, val_dataloader, loss_fn)
<ipython-input-23-df58f17c5135> in train_epoch(model, train_dataloader, optimizer, loss_fn)
10 labels = labels.to(device=device, dtype=torch.int64)
11 # Run predictions
---> 12 output = model(images)
13 # Set gradients to zero
14 optimizer.zero_grad()
/usr/local/lib/python3.6/dist-packages/torch/nn/modules/module.py in __call__(self, *input, **kwargs)
548 result = self._slow_forward(*input, **kwargs)
549 else:
--> 550 result = self.forward(*input, **kwargs)
551 for hook in self._forward_hooks.values():
552 hook_result = hook(self, input, result)
<ipython-input-11-2cd851b6d8e4> in forward(self, input)
85 out = self.drop_out(out)
86 out = self.fc1(out)
---> 87 out = self.fc2(out)
88 #out = self.fc3(out)
89 return out
/usr/local/lib/python3.6/dist-packages/torch/nn/modules/module.py in __call__(self, *input, **kwargs)
548 result = self._slow_forward(*input, **kwargs)
549 else:
--> 550 result = self.forward(*input, **kwargs)
551 for hook in self._forward_hooks.values():
552 hook_result = hook(self, input, result)
/usr/local/lib/python3.6/dist-packages/torch/nn/modules/linear.py in forward(self, input)
85
86 def forward(self, input):
---> 87 return F.linear(input, self.weight, self.bias)
88
89 def extra_repr(self):
/usr/local/lib/python3.6/dist-packages/torch/nn/functional.py in linear(input, weight, bias)
1608 if input.dim() == 2 and bias is not None:
1609 # fused op is marginally faster
-> 1610 ret = torch.addmm(bias, input, weight.t())
1611 else:
1612 output = input.matmul(weight.t())
RuntimeError: size mismatch, m1: [256 x 200], m2: [256 x 200] at /pytorch/aten/src/THC/generic/THCTensorMathBlas.cu:283
矩阵m1
和m2
的大小似乎匹配,但是仍然存在该错误消息。我该怎么办?
答案 0 :(得分:1)
fc1
的输出形状的输出大小为200,因此fc2
的输入大小应为200而不是256,num_classes
和256应该切换:
num_classes = 200
model_a.fc2 = nn.Linear(num_classes, 256).cuda()