就AI和神经网络而言,我仍然是一个初学者,在这段学习时间里,我正在尝试举一些例子,但是我有一个问题,不知道如何解决。如果你们中的任何一个能够帮助我,我将非常感激。
我要做什么?
使用PyTorch和一些自定义数据集对10种动物进行简单的图像分类。
我的图片
每个图像的形状都将为(3,200,200)
我在每个文件夹(训练和测试)上都有40张图片
我的数据文件夹看起来如何?
火车
测试
我的模型
class NetModel(nn.Module):
def __init__(self):
super(NetModel, self).__init__()
self.conv1 = nn.Conv2d(3, 6, 5)
self.pool = nn.MaxPool2d(2, 2)
self.conv2 = nn.Conv2d(6, 16, 5)
self.fc1 = nn.Linear(16 * 5 * 5, 120)
self.fc2 = nn.Linear(120, 84)
self.fc3 = nn.Linear(84, 10)
def forward(self, x):
x = self.pool(F.relu(self.conv1(x)))
x = self.pool(F.relu(self.conv2(x)))
x = x.view(-1, 16 * 5 * 5)
x = F.relu(self.fc1(x))
x = F.relu(self.fc2(x))
x = self.fc3(x)
return x
def train_model(model, train_loader, optimizer, criterion, epochs, save_path):
for epoch in range(epochs): # loop over the dataset multiple times
running_loss = 0.0
for i, data in enumerate(train_loader, 0):
# get the inputs; data is a list of [inputs, labels]
inputs, labels = data
# zero the parameter gradients
optimizer.zero_grad()
# forward + backward + optimize
outputs = model(inputs)
loss = criterion(outputs, labels)
loss.backward()
optimizer.step()
# print statistics
running_loss += loss.item()
if i % 2000 == 1999: # print every 2000 mini-batches
print('[%d, %5d] loss: %.3f' %
(epoch + 1, i + 1, running_loss / 2000))
running_loss = 0.0
print('Finished Training')
# PATH = './cifar_net.pth'
torch.save(model.state_dict(), save_path)
print('Saved in: {}'.format(save_path))
def test(model, test_loader, classes, saved_path):
model.load_state_dict(torch.load(saved_path))
dataiter = iter(test_loader)
images, labels = dataiter.next()
print('GroundTruth: ', ' '.join('%5s' % classes[labels[j]] for j in range(4)))
outputs = model(images)
_, predicted = torch.max(outputs, 1)
print('Predicted: ', ' '.join('%5s' % classes[predicted[j]] for j in range(4)))
transform = transforms.Compose(
[transforms.ToTensor(),
transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])
charset = string.ascii_letters + "-' "
trainset = TESNamesDataset('../data/train', charset, 10)
testset = TESNamesDataset('../data/test', charset, 10)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=4,
shuffle=True, num_workers=2)
testloader = torch.utils.data.DataLoader(testset, batch_size=4,
shuffle=False, num_workers=2)
model = NetModel()
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=0.001, momentum=0.9)
path = "../check_point/saved_model.pth"
train_model(model, trainloader, optimizer, criterion, 100, path)
test(model, testloader, animals, path)
我的自定义数据集
class TESNamesDataset(Dataset):
def __init__(self, data_root, charset, length):
self.data_root = data_root
self.charset = charset + '\0'
self.length = length
self.samples = []
self.char_codec = LabelEncoder()
self._init_dataset()
def __len__(self):
return len(self.samples)
def __getitem__(self, idx):
name, pixels = self.samples[idx]
return self.one_hot_sample(name), pixels
def _init_dataset(self):
names = set()
# self.samples = []
for animal in os.listdir(self.data_root):
animal_filepath = os.path.join(self.data_root, animal)
names.add(animal)
for img_name in os.listdir(animal_filepath):
img_path = os.path.join(animal_filepath, img_name)
im = cv2.imread(img_path)
if len(animal) < self.length:
animal += '\0' * (self.length - len(animal))
else:
animal = animal[:self.length - 1] + '\0'
self.samples.append((animal, im))
self.char_codec.fit(list(self.charset))
def to_one_hot(self, codec, values):
value_idxs = codec.transform(values)
return torch.eye(len(codec.classes_))[value_idxs]
def one_hot_sample(self, name):
t_name = self.to_one_hot(self.char_codec, list(name))
return t_name
错误消息
RuntimeError:预期4维权重为6的4维输入 3 5 5,但改为输入尺寸为[4,10,56]的3维输入
任何人都可以告诉我我做错了什么吗?如果这个问题不合时宜,请告诉我在哪里可以问到。
谢谢