问题是训练损失和测试损失相同,并且损失和准确性没有变化,我的CNN结构和训练过程出了什么问题?
培训结果:
时期:1/30 ..训练损失:2.306 ..测试损失:2.306 ..测试准确度:0.100
训练时间:2/30 ..训练损失:2.306 ..测试损失:2.306 ..测试准确度:0.100
类代码:
class Model(nn.Module):
def __init__(self):
super(Model, self).__init__()
self.conv1 = nn.Conv2d(in_channels=1, out_channels=6, kernel_size=5)
self.conv2 = nn.Conv2d(in_channels=6, out_channels=12, kernel_size=5)
self.fc1 = nn.Linear(in_features=12 * 4 * 4, out_features=120)
self.fc2 = nn.Linear(in_features=120, out_features=60)
self.out = nn.Linear(in_features=60, out_features=10)
#the output will be 0~9 (10)
以下是我的CNN和培训过程:
def forward(self, t):
# implement the forward pass
# (1)input layer
t = t
# (2) hidden conv layer
t = self.conv1(t)
t = F.relu(t)
t = F.max_pool2d(t, kernel_size=2, stride=2)
# (3) hidden conv layer
t = self.conv2(t)
t = F.relu(t)
t = F.max_pool2d(t, kernel_size=2, stride=2)
# (4) hidden linear layer
t = t.reshape(-1, 12 * 4 * 4)
t = self.fc1(t)
t = F.relu(t)
# (5) hidden linear layer
t = self.fc2(t)
t = F.relu(t)
# (6) output layer
t = self.out(t)
#t = F.softmax(t, dim=1)
return t
epoch = 30
train_losses, test_losses = [], []
for e in range(epoch):
train_loss = 0
test_loss = 0
accuracy = 0
for images, labels in train_loader:
optimizer.zero_grad()
op = model(images) #output
loss = criterion(op, labels)
train_loss += loss.item()
loss.backward()
optimizer.step()
else:
with torch.no_grad():
model.eval()
for images,labels in testloader:
log_ps = model(images)
prob = torch.exp(log_ps)
top_probs, top_classes = prob.topk(1, dim=1)
equals = labels == top_classes.view(labels.shape)
accuracy += equals.type(torch.FloatTensor).mean()
test_loss += criterion(log_ps, labels)
model.train()
print("Epoch: {}/{}.. ".format(e+1, epoch),
"Training Loss: {:.3f}.. ".format(train_loss/len(train_loader)),
"Test Loss: {:.3f}.. ".format(test_loss/len(testloader)),
"Test Accuracy: {:.3f}".format(accuracy/len(testloader)))
train_losses.append(train_loss/len(train_loader))
test_losses.append(test_loss/len(testloader))
答案 0 :(得分:1)
在使用
nn.CrossEntropyLoss
和nn.NLLLoss
时请务必小心,不要混淆。
我认为您的代码没有问题,我尝试以与您定义的方式完全相同的方式运行它。也许您没有为其他部分的初始化提供其他代码,这可能是一个问题。
log_ps
应该是log_softmax
个值,但是您的网络只能生成logits
个值(正如您所说的CrossEntropyLoss
。这些行可以如下修改:< / li>
log_ps = model(images)
prob = torch.exp(log_ps)
top_probs, top_classes = prob.topk(1, dim=1)
# Change into simple code:
logits = model(images)
output = logits.argmax(dim=-1) # should give you the class of predicted label
我刚刚制作了非常相似的代码版本,并且效果很好:
import torch
import torch.nn as nn
import torch.nn.functional as F
class Model(nn.Module):
def __init__(self):
super(Model, self).__init__()
self.conv1 = nn.Conv2d(in_channels=1, out_channels=6, kernel_size=5)
self.conv2 = nn.Conv2d(in_channels=6, out_channels=12, kernel_size=5)
self.fc1 = nn.Linear(in_features=12 * 4 * 4, out_features=120)
self.fc2 = nn.Linear(in_features=120, out_features=60)
self.out = nn.Linear(in_features=60, out_features=10)
#the output will be 0~9 (10)
def forward(self, t):
# implement the forward pass
# (1)input layer
t = t
# (2) hidden conv layer
t = self.conv1(t)
t = F.relu(t)
t = F.max_pool2d(t, kernel_size=2, stride=2)
# (3) hidden conv layer
t = self.conv2(t)
t = F.relu(t)
t = F.max_pool2d(t, kernel_size=2, stride=2)
# (4) hidden linear layer
t = t.reshape(-1, 12 * 4 * 4)
t = self.fc1(t)
t = F.relu(t)
# (5) hidden linear layer
t = self.fc2(t)
t = F.relu(t)
# (6) output layer
t = self.out(t)
return t
import torchvision
import torchvision.transforms as T
train_dataset = torchvision.datasets.FashionMNIST('./data', train=True,
transform=T.ToTensor(),
download=True)
test_dataset = torchvision.datasets.FashionMNIST('./data', train=False,
transform=T.ToTensor(),
download=True)
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=64, shuffle=True)
test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=64, shuffle=False)
epoch = 5
model = Model();
criterion = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters())
train_losses, test_losses = [], []
for e in range(epoch):
train_loss = 0
test_loss = 0
accuracy = 0
for images, labels in train_loader:
optimizer.zero_grad()
logits = model(images) #output
loss = criterion(logits, labels)
train_loss += loss.item()
loss.backward()
optimizer.step()
else:
with torch.no_grad():
model.eval()
for images,labels in test_loader:
logits = model(images)
output = logits.argmax(dim=-1)
equals = (labels == output)
accuracy += equals.to(torch.float).mean()
test_loss += criterion(logits, labels)
model.train()
print("Epoch: {}/{}.. ".format(e+1, epoch),
"Training Loss: {:.3f}.. ".format(train_loss/len(train_loader)),
"Test Loss: {:.3f}.. ".format(test_loss/len(test_loader)),
"Test Accuracy: {:.3f}".format(accuracy/len(test_loader)))
train_losses.append(train_loss/len(train_loader))
test_losses.append(test_loss/len(test_loader))
这是结果,它至少收敛:
Epoch: 1/5.. Training Loss: 0.721.. Test Loss: 0.525.. Test Accuracy: 0.809
Epoch: 2/5.. Training Loss: 0.473.. Test Loss: 0.464.. Test Accuracy: 0.829
Epoch: 3/5.. Training Loss: 0.408.. Test Loss: 0.391.. Test Accuracy: 0.858
Epoch: 4/5.. Training Loss: 0.370.. Test Loss: 0.396.. Test Accuracy: 0.858
Epoch: 5/5.. Training Loss: 0.348.. Test Loss: 0.376.. Test Accuracy: 0.858