ValueError:预期张量为大小(C,H,W)的张量图像。得到tensor.size()= torch.Size([1800,800])

时间:2020-11-10 21:43:20

标签: python deep-learning computer-vision pytorch tensor

这是我的评估单元:

start_time = time.time()

with torch.no_grad():

    best_network = Network()
    best_network.cuda()
    best_network.load_state_dict(torch.load('../moth_landmarks.pth')) 
    best_network.eval()
    
    batch = next(iter(train_loader))
    images, landmarks = batch['image'], batch['landmarks']
    #images = images.unsqueeze_(1)

    images = torch.cat((images,images,images),1)
    images = images.cuda()

    norm_image = transforms.Normalize(0.3812, 0.1123) 
    for image in images:
        image = image.float()
        ##image = to_tensor(image) #TypeError: pic should be PIL Image or ndarray. Got <class 'torch.Tensor'>
        image = norm_image(image)
    landmarks = (landmarks + 0.5) * 224
    
    ##[8, 600, 800] --> [8,3,600,800]
    images = images.unsqueeze(1)
    images = torch.cat((images, images, images), 1)

    predictions = (best_network(images).cpu() + 0.5) * 224
    predictions = predictions.view(-1,4,2)
    
    plt.figure(figsize=(10,40))
    
    for img_num in range(8):
        plt.subplot(8,1,img_num+1)
        plt.imshow(images[img_num].cpu().numpy().transpose(1,2,0).squeeze(), cmap='gray')
        plt.scatter(predictions[img_num,:,0], predictions[img_num,:,1], c = 'r')
        plt.scatter(landmarks[img_num,:,0], landmarks[img_num,:,1], c = 'g')

print('Total number of test images: {}'.format(len(test_dataset)))

end_time = time.time()
print("Elapsed Time : {}".format(end_time - start_time)) 

我该如何解决以下错误?

---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
<ipython-input-59-e4aa0ace8c75> in <module>
     19         image = image.float()
     20         ##image = to_tensor(image) #TypeError: pic should be PIL Image or ndarray. Got <class 'torch.Tensor'>
---> 21         image = norm_image(image)
     22     landmarks = (landmarks + 0.5) * 224
     23 

~/anaconda3/lib/python3.7/site-packages/torchvision/transforms/transforms.py in __call__(self, tensor)
    210             Tensor: Normalized Tensor image.
    211         """
--> 212         return F.normalize(tensor, self.mean, self.std, self.inplace)
    213 
    214     def __repr__(self):

~/anaconda3/lib/python3.7/site-packages/torchvision/transforms/functional.py in normalize(tensor, mean, std, inplace)
    282     if tensor.ndimension() != 3:
    283         raise ValueError('Expected tensor to be a tensor image of size (C, H, W). Got tensor.size() = '
--> 284                          '{}.'.format(tensor.size()))
    285 
    286     if not inplace:

ValueError: Expected tensor to be a tensor image of size (C, H, W). Got tensor.size() = torch.Size([1800, 800]).

如果我删除以下行:

images = torch.cat((images,images,images),1)

我会收到这个新错误:

---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
<ipython-input-61-3e892b69015c> in <module>
     19         image = image.float()
     20         ##image = to_tensor(image) #TypeError: pic should be PIL Image or ndarray. Got <class 'torch.Tensor'>
---> 21         image = norm_image(image)
     22     landmarks = (landmarks + 0.5) * 224
     23 

~/anaconda3/lib/python3.7/site-packages/torchvision/transforms/transforms.py in __call__(self, tensor)
    210             Tensor: Normalized Tensor image.
    211         """
--> 212         return F.normalize(tensor, self.mean, self.std, self.inplace)
    213 
    214     def __repr__(self):

~/anaconda3/lib/python3.7/site-packages/torchvision/transforms/functional.py in normalize(tensor, mean, std, inplace)
    282     if tensor.ndimension() != 3:
    283         raise ValueError('Expected tensor to be a tensor image of size (C, H, W). Got tensor.size() = '
--> 284                          '{}.'.format(tensor.size()))
    285 
    286     if not inplace:

ValueError: Expected tensor to be a tensor image of size (C, H, W). Got tensor.size() = torch.Size([600, 800]).

1 个答案:

答案 0 :(得分:0)

以下是正确的:

start_time = time.time()

with torch.no_grad():

    best_network = Network()
    best_network.cuda()
    best_network.load_state_dict(torch.load('../moth_landmarks.pth')) 
    best_network.eval()
    
    batch = next(iter(train_loader))
    images, landmarks = batch['image'], batch['landmarks']
    landmarks = landmarks.view(landmarks.size(0),-1).cuda()

    print(landmarks.shape)
    for i in range(8):
        if(i%2==0):
            landmarks[:,i] = landmarks[:,i]/800
        else:
            landmarks[:,i] = landmarks[:,i]/600
    landmarks [landmarks != landmarks] = 0
    #landmarks = landmarks.unsqueeze_(0)

    images = images.cuda()
    
    print('*, ', landmarks.shape)

    norm_image = transforms.Normalize(0.3812, 0.1123) 
    print('images shape: ', images.shape)
    for image in images:
        
        image = image.unsqueeze_(1)

        #images = torch.cat((images,images,images),1)
        image = image.float()
        ##image = to_tensor(image) #TypeError: pic should be PIL Image or ndarray. Got <class 'torch.Tensor'>
        image = norm_image(image)
    
    print('max: ', torch.max(landmarks))
    print('min: ', torch.min(landmarks))

    ##landmarks = (landmarks + 0.5) * 224 #?? chera?
    print('**')
    print(images.shape, landmarks.shape)
    ##[8, 600, 800] --> [8,3,600,800]
    images = images.unsqueeze(1)
    images = torch.cat((images, images, images), 1)

    #predictions = (best_network(images).cpu() + 0.5) * 224
    predictions = best_network(images).cpu()  

    print('****', predictions.shape)
    for i in range(8):
        if(i%2==0):
            predictions[:,i] = predictions[:,i]*800
        else:
            predictions[:,i] = predictions[:,i]*600

    predictions = predictions.view(-1,4,2)
    print('****', predictions.shape)
    
  
    
    for i in range(8):
        if(i%2==0):
            landmarks[:,i] = landmarks[:,i]*800
        else:
            landmarks[:,i] = landmarks[:,i]*600

    landmarks = landmarks.view(-1,4,2)
    plt.figure(figsize=(10,40))
    landmarks = landmarks.cpu()
    print(type(landmarks), landmarks.shape)
    for img_num in range(8):
        plt.subplot(8,1,img_num+1)
        plt.imshow(images[img_num].cpu().numpy().transpose(1,2,0).squeeze(), cmap='gray')
        plt.scatter(predictions[img_num,:,0], predictions[img_num,:,1], c = 'r')
        plt.scatter(landmarks[img_num,:,0], landmarks[img_num,:,1], c = 'g')

print('Total number of test images: {}'.format(len(test_dataset)))

end_time = time.time()
print("Elapsed Time : {}".format(end_time - start_time))