Question

我是 pytorch 的新手，我已经被这个问题困住了一段时间。我已经训练了一个 CNN 来对 X 射线图像进行分类。这些图像可以在这个 Kaggle 页面 https://www.kaggle.com/prashant268/chest-xray-covid19-pneumonia/ 中找到。我设法在训练和测试数据上都获得了良好的准确性，但是当我尝试对新图像进行预测时，我对每张图像都得到了相同的（错误的类）输出。这是我的模型的详细信息。

import os 
import matplotlib.pyplot as plt
import numpy as np 
import torch
import glob 
import torch.nn.functional as F 
import torch.nn as nn 
from torchvision.transforms import transforms 
from torch.utils.data import DataLoader 
from torch.optim import Adam 
from torch.autograd import Variable 
import torchvision 
import pathlib 
from google.colab import drive 
drive.mount('/content/drive')

epochs = 20 
batch_size = 128
learning_rate = 0.001

#Data Transformation 
transformer = transforms.Compose([
                                  transforms.Resize((224,224)),
                                  transforms.RandomHorizontalFlip(),
                                  transforms.ToTensor(), 
                                  transforms.Normalize([0.5,0.5,0.5], [0.5,0.5,0.5])
                                  ])

#Load data with DataLoader
train_path = '/content/drive/MyDrive/Chest X-ray (Covid-19 & Pneumonia)/Data/train' 
test_path = '/content/drive/MyDrive/Chest X-ray (Covid-19 & Pneumonia)/Data/test' 

train_loader = DataLoader(torchvision.datasets.ImageFolder(train_path,transform = transformer), batch_size= batch_size, shuffle= True)
test_loader = DataLoader(torchvision.datasets.ImageFolder(test_path,transform = transformer), batch_size= batch_size, shuffle= False)

root = pathlib.Path(train_path)
classes = sorted([j.name.split('/')[-1] for j in root.iterdir()])
print(classes)
train_count = len(glob.glob(train_path+'/**/*.jpg')) + len(glob.glob(train_path+'/**/*.png')) + len(glob.glob(train_path+'/**/*.jpeg'))
test_count = len(glob.glob(test_path+'/**/*.jpg')) + len(glob.glob(test_path+'/**/*.png')) + len(glob.glob(test_path+'/**/*.jpeg'))
print(train_count,test_count)

#Create the CNN 
class CNN(nn.Module):
  def __init__(self):
    super(CNN,self).__init__()
    '''nout = [(width + 2*padding - kernel_size) / stride] + 1 '''
    # [128,3,224,224]
    self.conv1 = nn.Conv2d(in_channels = 3, out_channels = 12, kernel_size = 5) 
    # [4,12,220,220]
    self.pool1 = nn.MaxPool2d(2,2) #reduces the images by a factor of 2
    # [4,12,110,110]
    self.conv2 = nn.Conv2d(in_channels = 12, out_channels = 24, kernel_size = 5)
    # [4,24,106,106]
    self.pool2 = nn.MaxPool2d(2,2)
    # [4,24,53,53] which becomes the input of the fully connected layer 
    self.fc1 = nn.Linear(in_features = (24 * 53 * 53), out_features = 120) 
    self.fc2 = nn.Linear(in_features = 120, out_features = 84) 
    self.fc3 = nn.Linear(in_features = 84, out_features = len(classes)) #final layer, output will be the number of classes 

  def forward(self, x):
    x = self.pool1(F.relu(self.conv1(x)))  
    x = self.pool2(F.relu(self.conv2(x)))  
    x = x.view(-1, 24 * 53 * 53)            
    x = F.relu(self.fc1(x))               
    x = F.relu(self.fc2(x))              
    x = self.fc3(x)                       
    return x


# Training the model 
model = CNN()
loss_function = nn.CrossEntropyLoss() #includes the softmax activation function 
optimizer = torch.optim.Adam(model.parameters(), lr = learning_rate)

n_total_steps = len(train_loader)
for epoch in range(epochs):
  n_correct = 0
  n_samples = 0
  for i, (images, labels) in enumerate(train_loader):
    # Forward pass
    outputs = model(images)
    _, predicted = torch.max(outputs, 1)
    n_samples += labels.size(0)
    n_correct += (predicted == labels).sum().item()

    loss = loss_function(outputs, labels)
    # Backpropagation and optimization 
    optimizer.zero_grad() #empty gradients 
    loss.backward()
    optimizer.step()

    acc = 100.0 * n_correct / n_samples

  print(f'Epoch [{epoch+1}/{epochs}], Step [{i+1}/{n_total_steps}], Accuracy: {round(acc,2)} %, Loss: {loss.item():.4f}')
print('Done!!')

# Testing the model 
with torch.no_grad():
  n_correct = 0
  n_samples = 0
  n_class_correct = [0 for i in range(3)]
  n_class_samples = [0 for i in range(3)]
  for images, labels in test_loader:
    outputs = model(images)
    # max returns (value ,index)
    _, predicted = torch.max(outputs, 1)
    n_samples += labels.size(0)
    n_correct += (predicted == labels).sum().item() 

  acc = 100.0 * n_correct / n_samples
  print(f'Accuracy of the network: {acc} %')

torch.save(model.state_dict(),'/content/drive/MyDrive/Chest X-ray (Covid-19 & Pneumonia)/model.model')

加载模型并尝试对新图像进行预测，代码如下：

checkpoint = torch.load('/content/drive/MyDrive/Chest X-ray (Covid-19 & Pneumonia)/model.model')
model = CNN()
model.load_state_dict(checkpoint)
model.eval()

#Data Transformation 
transformer = transforms.Compose([
                                  transforms.Resize((224,224)),
                                  transforms.ToTensor(),
                                  transforms.Normalize([0.5,0.5,0.5], [0.5,0.5,0.5]) 
                                  ])

#Making preidctions on new data 
from PIL import Image
def prediction(img_path,transformer):
  image = Image.open(img_path).convert('RGB')
  image_tensor = transformer(image)
  image_tensor = image_tensor.unsqueeze_(0) #so img is not treated as a batch 
  input_img = Variable(image_tensor)
  output = model(input_img)
  #print(output)
  index = output.data.numpy().argmax()
  pred = classes[index]
  return pred 

pred_path = '/content/drive/MyDrive/Chest X-ray (Covid-19 & Pneumonia)/Test_images/Data/'
test_imgs = glob.glob(pred_path+'/*')

for i in test_imgs:
    print(prediction(i,transformer))

我猜问题一定出在我预处理数据的方式上，尽管我找不到我的错误。任何帮助将不胜感激，因为我已经坚持了一段时间了。附言如果有帮助，我也可以分享我的笔记本

Answer 1

关于您的问题，我有一个非常好的方法来调试它以定位最有可能出现问题的位置，因此解决您的问题非常容易。

因此，我的调试过程将基于您的 CNN 在测试集上表现良好这一事实。首先将您的测试加载程序批量大小暂时设置为 1。之后，要做的一件事是在您的测试循环中，当您计算出正确的数量时，您可以运行以下代码：

#Your code
outputs = model(images) # Really only one image and 1 output.

#Altered Code: 
correct =  (predicted == labels).sum().item() # This will be either 1 or 0 since you have only one image per batch

# My new code: 
if correct:
   # if value is 1 instead of 0 then turn value into a single image with no batch size
   single_correct_image = images.squeeze(0)
   # Then convert tensor image into PIL image
   pil_image = transforms.ToPILImage()(single_correct_image)
   # Save the pil image to any directory specified in quotes.
   pil_image = pil_image.save("/content")

   #Terminate testing process. Ignore Value Error if it says terminating process
   raise ValueError("terminating process")

现在您已将一张图片保存到磁盘，您知道该图片在测试集中是正确的。下一步是打开这样的图像并将其运行到您的预测函数中。可能会发生一些事情，从而提供有关您情况的信息

如果您的模型返回了错误的答案，那么您在预测和测试代码中的不同代码就有问题。一个使用 torch.sum 和 torch.max 另一个使用 np.argmax。然后您可以使用打印语句来调试那里发生的事情。也许是一些转换错误或您对输出格式的期望不同。
如果您的代码返回正确答案，那么您的模型就无法预测新图像。我建议通过上述流程运行更多的审判案例。

作为补充参考，如果你仍然很卡在你觉得你无法解决它的地步，那么我建议使用这个笔记本来指导并就至少检查哪些代码给出一些建议。

https://www.kaggle.com/salvation23/xray-cnn-pytorch

萨萨克耆那教

在 pytorch 中使用 CNN 对新图像进行预测

1 个答案: