Question

我使用Pytorch创建了一个字母分类的CNN模型，然后使用该模型对一个我从未见过的图像进行测试。我使用opencv在手写图像中提取了一个边界框，但我不知道如何将其应用于模型。

这是自定义数据集

class CustomDatasetFromCSV(Dataset):
    def __init__(self, csv_path, height, width, transforms=None):
        """
        Args:
            csv_path (string): path to csv file
            height (int): image height
            width (int): image width
            transform: pytorch transforms for transforms and tensor conversion
        """
        self.data = pd.read_csv(csv_path)
        self.labels = np.asarray(self.data.iloc[:, 0])
        self.height = height
        self.width = width
        self.transforms = transforms

    def __getitem__(self, index):
        single_image_label = self.labels[index]
        # Read each 784 pixels and reshape the 1D array ([784]) to 2D array ([28,28]) 
        img_as_np = np.asarray(self.data.iloc[index][1:]).reshape(28,28).astype('uint8')
        # Convert image from numpy array to PIL image, mode 'L' is for grayscale
        img_as_img = Image.fromarray(img_as_np)
        img_as_img = img_as_img.convert('L')
        # Transform image to tensor
        if self.transforms is not None:
            img_as_tensor = self.transforms(img_as_img)
        # Return image and the label
        return (img_as_tensor, single_image_label)

    def __len__(self):
        return len(self.data.index)

    transformations = transforms.Compose([
                            transforms.ToTensor()
                        ])

    alphabet_from_csv = CustomDatasetFromCSV("/content/drive/My Drive/A_Z Handwritten Data.csv",
                                                28, 28, transformations)

random_seed = 50
data_size = len(alphabet_from_csv)
indices = list(range(data_size))
split = int(np.floor(0.2 * data_size))
if True:
    np.random.seed(random_seed)
    np.random.shuffle(indices)
train_indices, test_indices = indices[split:], indices[:split]
train_dataset = SubsetRandomSampler(train_indices)
test_dataset = SubsetRandomSampler(test_indices)

train_loader = torch.utils.data.DataLoader(dataset = alphabet_from_csv,
                                           batch_size = batch_size,
                                           sampler = train_dataset)
test_loader = torch.utils.data.DataLoader(dataset = alphabet_from_csv,
                                          batch_size = batch_size,
                                          sampler = test_dataset)

这是我的模特

class ConvNet3(nn.Module):
    def __init__(self, num_classes=26):
        super().__init__()

        self.layer1 = nn.Sequential(
            nn.Conv2d(1, 28, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(28),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2)
        )

        self.layer2 = nn.Sequential(
            nn.Conv2d(28, 56, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(56),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2)
        )

        self.fc = nn.Sequential(
            nn.Dropout(p = 0.5),
            nn.Linear(56 * 7 * 7, 512),
            nn.BatchNorm1d(512),
            nn.ReLU(),
            nn.Dropout(p = 0.5),
            nn.Linear(512, 26),
        )


    def forward(self, x):
        out = self.layer1(x)
        out = self.layer2(out)
        out = out.reshape(out.size(0), -1)
        out = self.fc(out)
        return out
model = ConvNet3(num_classes).to(device)
loss_func = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
def train():
    # train phase
    model.train()

    # create a progress bar
    batch_loss_list = []
    progress = ProgressMonitor(length=len(train_dataset))

    for batch, target in train_loader:
        # Move the training data to the GPU
        batch, target = batch.to(device), target.to(device)

        # forward propagation
        output = model( batch )

        # calculate the loss
        loss = loss_func( output, target )

        # clear previous gradient computation
        optimizer.zero_grad()

        # backpropagate to compute gradients
        loss.backward()

        # update model weights
        optimizer.step()

        # update progress bar
        batch_loss_list.append(loss.item())
        progress.update(batch.shape[0], sum(batch_loss_list)/len(batch_loss_list) )

def test():
    # test phase
    model.eval()

    correct = 0

    # We don't need gradients for test, so wrap in 
    # no_grad to save memory
    with torch.no_grad():
        for batch, target in test_loader:
            # Move the training batch to the GPU
            batch, target = batch.to(device), target.to(device)

            # forward propagation
            output = model( batch )

            # get prediction
            output = torch.argmax(output, 1)

            # accumulate correct number
            correct += (output == target).sum().item()

    # Calculate test accuracy    
    acc = 100 * float(correct) / len(test_dataset) 
    print( 'Test accuracy: {}/{} ({:.2f}%)'.format( correct, len(test_dataset), acc ) )  

for epoch in range(num_epochs):

    print("{}'s try".format(int(epoch)+1))
    train()
    test()
    print("-----------------------------------------------------------------------------")

这是我要绑定的图片

import cv2
import matplotlib.image as mpimg
im = cv2.imread('/content/drive/My Drive/my_handwritten.jpg')

gray = cv2.cvtColor(im, cv2.COLOR_BGR2GRAY)
blur = cv2.GaussianBlur(gray, (5, 5), 0)
thresh = cv2.adaptiveThreshold(blur, 255, 1, 1, 11, 2)

contours = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)[1]
rects=[]

for cnt in contours:
  x, y, w, h = cv2.boundingRect(cnt)
  if h < 20: continue 
  red = (0, 0, 255)
  cv2.rectangle(im, (x, y), (x+w, y+h), red, 2)
  rects.append((x,y,w,h))

cv2.imwrite('my_handwritten_bounding.png', im) 

img_result = []
img_for_class = im.copy()

margin_pixel = 60

for rect in rects:
    #[y:y+h, x:x+w]
    img_result.append(
        img_for_class[rect[1]-margin_pixel : rect[1]+rect[3]+margin_pixel, 
                      rect[0]-margin_pixel : rect[0]+rect[2]+margin_pixel])

    # Draw the rectangles
    cv2.rectangle(im, (rect[0], rect[1]), 
                  (rect[0] + rect[2], rect[1] + rect[3]), (0, 0, 255), 2) 

count = 0
nrows = 4
ncols = 7

plt.figure(figsize=(12,8))

for n in img_result:
    count += 1
    plt.subplot(nrows, ncols, count)
    plt.imshow(cv2.resize(n,(28,28)), cmap='Greys', interpolation='nearest')

plt.tight_layout()
plt.show()

Answer 1

您已经编写了函数test来测试您的网络。您唯一需要做的就是-用一张与数据集中的图像预处理相同的图像创建批处理。

def test_one_image(I, model):
    '''
    I - 28x28 uint8 numpy array
    '''

    # test phase
    model.eval()

    # convert image to torch tensor and add batch dim
    batch = torch.tensor(I / 255).unsqueeze(0)

    # We don't need gradients for test, so wrap in 
    # no_grad to save memory
    with torch.no_grad():
        batch = batch.to(device)

        # forward propagation
        output = model( batch )

        # get prediction
        output = torch.argmax(output, 1)

    return output

在Pytorch中，如何使用加载的模型测试简单图像？

1 个答案: