Question

正在执行的任务是对 MNIST 数据集进行简单分类。我面临的问题是，对于下面显示的架构，我能够执行训练步骤，该步骤之前给出了类似的错误，但这是由于一些架构问题，但现在它给出了测试步骤的错误。我尝试更改批量大小并检查每一步的输入大小，但不明白出了什么问题。

感谢您的帮助！

LeNet5 架构：

class LeNet5(nn.Module):

def __init__(self, n_classes):
    super(LeNet5, self).__init__()
    
    self.feature_extractor = nn.Sequential(            
        nn.Conv2d(in_channels=1, out_channels=6, kernel_size=4, stride=1),
        nn.Tanh(),
        nn.AvgPool2d(kernel_size=2),
        nn.Conv2d(in_channels=6, out_channels=16, kernel_size=4, stride=1),
        nn.Tanh(),
        nn.AvgPool2d(kernel_size=2),
        nn.Conv2d(in_channels=16, out_channels=120, kernel_size=4, stride=1),
        nn.Tanh()
    )

    self.classifier = nn.Sequential(
        nn.Linear(in_features=120*2*2, out_features=84),
        nn.Tanh(),
        nn.Linear(in_features=84, out_features=n_classes),
    )


def forward(self, x):
  # print(x.shape)
  x = self.feature_extractor(x)
  # print(x.shape)
  x = x.view(-1, 120*2*2)
  # print(x.shape)
  logits = self.classifier(x)
  # print(logits.shape)
  return logits

训练函数：

def train(model, epoch, delta=1e-5):
  global best_train_acc, best_test_acc
  device = 'cuda'
  model.to(device)
  print('\nEpoch: %d' % epoch)
  model.train()
  train_loss = 0
  correct = 0
  total = 0
  total_acc = 0
  for batch_idx, (inputs, targets) in enumerate(trainloader):
      inputs, targets = inputs.to(device), targets.to(device)
      
      # for params in model.feature_extractor.parameters():
      #   params.requires_grad = False

      # model.feature_extractor[6].parameters().requires_grad = True
      optimizer.zero_grad()
      outputs = model(inputs)
      print(outputs.shape)
      loss = criterion(outputs, targets)
      loss.backward()
      optimizer.step()

      train_loss += loss.item()
      _, predicted = outputs.max(1)
      total += targets.size(0)
      correct += predicted.eq(targets).sum().item()
      epsilon, best_alpha = optimizer.privacy_engine.get_privacy_spent(delta)

      print(f'Epoch {epoch} Step {batch_idx}/{len(trainloader)}', 'Loss: %.3f | Acc: %.3f%% (%d/%d)'
                    % (train_loss/(batch_idx+1), 100.*correct/total, correct, total),  f"(ε = {epsilon:.2f}, δ = {delta}) for α = {best_alpha}")
      
      acc = 100.*correct/total
      total_acc += acc
      if acc>best_train_acc:
          best_train_acc = acc
    
  return total_acc/len(trainloader)

测试功能：

def test(model, epoch, model_name, lr):
  global best_train_acc, best_test_acc
  device = 'cuda'
  model.eval()
  test_loss = 0
  correct = 0
  total = 0
  total_acc = 0
  with torch.no_grad():
      for batch_idx, (inputs, targets) in enumerate(testloader):
          # print(inputs.shape, inputs[0].shape)
          inputs, targets = inputs.to(device), targets.to(device)
          outputs = model(inputs)
          loss = criterion(outputs, targets)

          test_loss += loss.item()
          _, predicted = outputs.max(1)
          total += targets.size(0)
          correct += predicted.eq(targets).sum().item()

          print(f'Epoch {epoch} Step {batch_idx}/{len(testloader)}', 'Test Loss: %.3f | Test Acc: %.3f%% (%d/%d)'
                        % (test_loss/(batch_idx+1), 100.*correct/total, correct, total))

  # Save checkpoint.
  acc = 100.*correct/total
  total_acc += acc
  if acc > best_test_acc:
      print('Saving..')
      state = {
          'model': model.state_dict(),
          'acc': acc,
          'epoch': epoch,
      }
      if not os.path.isdir('/content/drive/My Drive/checkit'):
          os.mkdir('/content/drive/My Drive/checkit')
      torch.save(state, f'/content/drive/My Drive/checkit/ckpt_{model_name}_{lr}_MNIST.pth')
      best_test_acc = acc
      
  return total_acc

错误

    ValueError                                Traceback (most recent call last)
<ipython-input-25-5c2bb9b3b3d1> in <module>()
     22 for epoch in range(start_epoch, start_epoch+50):
     23     LeNet5_train_acc.append([train(model, epoch), epoch+1])
---> 24     LeNet5_test_acc.append([test(model, epoch, 'LeNet5', lr), epoch+1])
     25 
     26 best_acc.append([best_train_acc, best_test_acc])

4 frames
/usr/local/lib/python3.6/dist-packages/torch/nn/functional.py in nll_loss(input, target, weight, size_average, ignore_index, reduce, reduction)
   2260     if input.size(0) != target.size(0):
   2261         raise ValueError('Expected input batch_size ({}) to match target batch_size ({}).'
-> 2262                          .format(input.size(0), target.size(0)))
   2263     if dim == 2:
   2264         ret = torch._C._nn.nll_loss(input, target, weight, _Reduction.get_enum(reduction), ignore_index)

ValueError: Expected input batch_size (8) to match target batch_size (32).

Answer 1

两件事：

特征提取器的输出形状是 (batch_size, 120, 1, 1)，所以分类器的输入维度应该是 120*1*1 ie 120 而不是 { {1}}。
正如您在此处所体验到的，将 120*2*2 放在第一个轴上可以快速打开您的心。理想情况下，您会使用 -1 变平。

或者更好的是，在分类器中添加一个 nn.Flatten 来完成这项工作：
```
x.view(x.size(0), -1)
```
这将保持第一个轴完好无损并压平所有其他轴。

ValueError: 预期输入 batch_size (8) 匹配目标 batch_size (32)

1 个答案: