我对pytorch和机器学习一般还是陌生的,我正在尝试创建一个简单的卷积神经网络,对MNIST手写数字进行分类。 不幸的是,当我尝试训练它时,出现以下错误:
ValueError: Expected input batch_size (288) to match target batch_size (64).
这是神经网络代码。
from torch import nn
from torch.nn.functional import relu, log_softmax
class MNIST_SimpleConv(nn.Module):
def __init__(self):
super(MNIST_SimpleConv, self).__init__()
self.conv1 = nn.Conv2d(in_channels=3, out_channels=32, kernel_size=3, stride=1)
self.conv2 = nn.Conv2d(in_channels=32, out_channels=64, kernel_size=3, stride=1)
self.pool1 = nn.MaxPool2d(2, 2)
self.dense1 = nn.Linear(4*4*64, 100)
self.dense2 = nn.Linear(100, 10)
def forward(self, x):
x = relu(self.conv1(x))
x = relu(self.conv2(x))
x = self.pool1(x)
x = x.view(-1, 4*4*64)
x = relu(self.dense1(x))
return log_softmax(self.dense2(x), dim=1)
培训代码如下:
from nets.conv import MNIST_SimpleConv
from torchvision import datasets, transforms
from torch.utils.data import DataLoader
from torch.nn.functional import nll_loss
import torch.optim as optim
import torch
from torch import nn
MNIST_ROOT = "data/MNIST"
#prepare dataset
mnist_train_ds = datasets.ImageFolder(root=MNIST_ROOT+"/train", transform=transforms.Compose([
transforms.ToTensor()]))
mnist_test_ds = datasets.ImageFolder(root=MNIST_ROOT+"/test", transform=transforms.Compose([
transforms.ToTensor()]))
mnist_train = DataLoader(mnist_train_ds, batch_size=64, shuffle=True, num_workers=6)
mnist_test = DataLoader(mnist_test_ds, batch_size=64, shuffle=True, num_workers=6)
criterion = nn.CrossEntropyLoss()
def train(model, device, train_loader, optimizer, epoch):
model.train()
for batch_idx, (data, target) in enumerate(train_loader, 0):
data, target = data.to(device), target.to(device)
optimizer.zero_grad()
output = model(data)
loss = criterion(output, target)
loss.backward()
optimizer.step()
print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
epoch, batch_idx * len(data), len(train_loader.dataset),
100. * batch_idx / len(train_loader), loss.item()))
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
model = MNIST_SimpleConv().to(device)
optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.5)
for epoch in range(1, 10):
train(model, device, mnist_train , optimizer, epoch)
到目前为止,我已经研究了x通过网络转发时'x'的尺寸如何变化。
输入: torch.Size([64,3,28,28])
在x = relu(self.conv1(x))
之后:
torch.Size([64,32,26,26])
在x = relu(self.conv2(x))
之后:
torch.Size([64,64,24,24])
在x = self.pool1(x)
之后:
torch.Size([64,64,12,12])
在x = x.view(-1, 4*4*64)
之后
torch.Size([576,1024])
在x = relu(self.dense1(x))
之后
torch.Size([576,100])
在x = log_softmax(self.dense2(x), dim=1)
之后
torch.Size([576,10])
该错误可能是由x = x.view(-1,4 * 4 * 64)引起的,由于某种原因,该张量将生成形状为[576,1024]而不是[64,1024]的张量。 (如果我正确理解这一点,则第一维应等于批量大小,在我的情况下为64。)
我在做什么错了?
答案 0 :(得分:0)
将-1的值传递给view
中的任何维表示该特定维的值将由其他维确定。例如:
x = torch.rand(1,10) # x.shape = [1,10]
x = x.view(-1, 5) # x.shape = [2, 5]
在您的情况下,如果要合并pool1
输出的所有维度,则应该是这样的:
x = x.view(-1, 64*12,*12) # x.shape = [64, 9216]
在这种情况下,我们还必须更新self.dense1
的输入通道:
self.dense1 = nn.Linear(64*12*12, 100)
但是,我们需要确保的一件事是,self.pool1
的输出维始终为batch_size x 64 x 12 x 12
,特别是后两个维在整个过程中应保持为12。可以通过在整个数据集中固定输入图像尺寸来确保这一点。