Pytorch:获取最终层的正确尺寸

时间:2018-10-24 08:08:56

标签: machine-learning conv-neural-network pytorch

Pytorch新手在这里!我正在尝试微调VGG16模型以预测3个不同的类。我的部分工作涉及将FC层转换为CONV层。但是,我的预测值不介于0到2(3个类别)之间。

有人可以为我提供有关如何计算最终层的正确尺寸的良好资源吗?

这是VGG16的原始fC层:

(classifier): Sequential(
    (0): Linear(in_features=25088, out_features=4096, bias=True)
    (1): ReLU(inplace)
    (2): Dropout(p=0.5)
    (3): Linear(in_features=4096, out_features=4096, bias=True)
    (4): ReLU(inplace)
    (5): Dropout(p=0.5)
    (6): Linear(in_features=4096, out_features=1000, bias=True)
  )

我将FC图层转换为CONV的代码:

 def convert_fc_to_conv(self, fc_layers):
        # Replace first FC layer with CONV layer
        fc = fc_layers[0].state_dict()
        in_ch = 512
        out_ch = fc["weight"].size(0)
        first_conv = nn.Conv2d(512, out_ch, kernel_size=(1, 1), stride=(1, 1))

        conv_list = [first_conv]
        for idx, layer in enumerate(fc_layers[1:]):
            if isinstance(layer, nn.Linear):
                fc = layer.state_dict()
                in_ch = fc["weight"].size(1)
                out_ch = fc["weight"].size(0)
                if idx == len(fc_layers)-4:
                    in_ch = 3
                conv = nn.Conv2d(out_ch, in_ch, kernel_size=(1, 1), stride=(1, 1))
                conv_list += [conv]
            else:
                conv_list += [layer]
            gc.collect()

        avg_pool = nn.AvgPool2d(kernel_size=2, stride=1, ceil_mode=False)
        conv_list += [avg_pool, nn.Softmax()]
        top_layers = nn.Sequential(*conv_list)  
        return top_layers

最终模型架构:

    Model(
    (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU(inplace)
    (2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU(inplace)
    (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (5): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (6): ReLU(inplace)
    (7): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (8): ReLU(inplace)
    (9): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (10): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU(inplace)
    (12): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (13): ReLU(inplace)
    (14): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (15): ReLU(inplace)
    (16): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (17): Conv2d(256, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (18): ReLU(inplace)
    (19): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (20): ReLU(inplace)
    (21): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (22): ReLU(inplace)
    (23): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (24): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (25): ReLU(inplace)
    (26): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (27): ReLU(inplace)
    (28): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (29): ReLU(inplace)
    (30): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False))

    (classifier): Sequential(
    (0): Conv2d(512, 4096, kernel_size=(1, 1), stride=(1, 1))
    (1): ReLU(inplace)
    (2): Dropout(p=0.5)
    (3): Conv2d(4096, 3, kernel_size=(1, 1), stride=(1, 1))
    (4): ReLU(inplace)
    (5): Dropout(p=0.5)
    (6): AvgPool2d(kernel_size=2, stride=1, padding=0)
    (7): Softmax()
  )
)

模型摘要:

            Conv2d-1         [-1, 64, 224, 224]           1,792
              ReLU-2         [-1, 64, 224, 224]               0
            Conv2d-3         [-1, 64, 224, 224]          36,928
              ReLU-4         [-1, 64, 224, 224]               0
         MaxPool2d-5         [-1, 64, 112, 112]               0
            Conv2d-6        [-1, 128, 112, 112]          73,856
              ReLU-7        [-1, 128, 112, 112]               0
            Conv2d-8        [-1, 128, 112, 112]         147,584
              ReLU-9        [-1, 128, 112, 112]               0
        MaxPool2d-10          [-1, 128, 56, 56]               0
           Conv2d-11          [-1, 256, 56, 56]         295,168
             ReLU-12          [-1, 256, 56, 56]               0
           Conv2d-13          [-1, 256, 56, 56]         590,080
             ReLU-14          [-1, 256, 56, 56]               0
           Conv2d-15          [-1, 256, 56, 56]         590,080
             ReLU-16          [-1, 256, 56, 56]               0
        MaxPool2d-17          [-1, 256, 28, 28]               0
           Conv2d-18          [-1, 512, 28, 28]       1,180,160
             ReLU-19          [-1, 512, 28, 28]               0
           Conv2d-20          [-1, 512, 28, 28]       2,359,808
             ReLU-21          [-1, 512, 28, 28]               0
           Conv2d-22          [-1, 512, 28, 28]       2,359,808
             ReLU-23          [-1, 512, 28, 28]               0
        MaxPool2d-24          [-1, 512, 14, 14]               0
           Conv2d-25          [-1, 512, 14, 14]       2,359,808
             ReLU-26          [-1, 512, 14, 14]               0
           Conv2d-27          [-1, 512, 14, 14]       2,359,808
             ReLU-28          [-1, 512, 14, 14]               0
           Conv2d-29          [-1, 512, 14, 14]       2,359,808
             ReLU-30          [-1, 512, 14, 14]               0
        MaxPool2d-31            [-1, 512, 7, 7]               0
           Conv2d-32           [-1, 4096, 7, 7]       2,101,248
             ReLU-33           [-1, 4096, 7, 7]               0
          Dropout-34           [-1, 4096, 7, 7]               0
           Conv2d-35              [-1, 3, 7, 7]          12,291
             ReLU-36              [-1, 3, 7, 7]               0
          Dropout-37              [-1, 3, 7, 7]               0
        AvgPool2d-38              [-1, 3, 6, 6]               0
          Softmax-39              [-1, 3, 6, 6]               0

1 个答案:

答案 0 :(得分:1)

我编写了一个函数,该函数将Pytorch模型作为输入并将分类层转换为卷积层。目前,它适用于VGG和Alexnet,但您也可以将其扩展到其他模型。

import torch
import torch.nn as nn
from torchvision.models import alexnet, vgg16

def convolutionize(model, num_classes, input_size=(3, 224, 224)):
    '''Converts the classification layers of VGG & Alexnet to convolutions

    Input:
        model: torch.models
        num_classes: number of output classes
        input_size: size of input tensor to the model

    Returns:
        model: converted model with convolutions
    '''
    features = model.features
    classifier = model.classifier

    # create a dummy input tensor and add a dim for batch-size
    x = torch.zeros(input_size).unsqueeze_(dim=0)

    # change the last layer output to the num_classes
    classifier[-1] = nn.Linear(in_features=classifier[-1].in_features,
                               out_features=num_classes)

    # pass the dummy input tensor through the features layer to compute the output size
    for layer in features:
        x = layer(x)

    conv_classifier = []
    for layer in classifier:
        if isinstance(layer, nn.Linear):
            # create a convolution equivalent of linear layer
            conv_layer = nn.Conv2d(in_channels=x.size(1),
                                   out_channels=layer.weight.size(0),
                                   kernel_size=(x.size(2), x.size(3)))

            # transfer the weights
            conv_layer.weight.data.view(-1).copy_(layer.weight.data.view(-1))
            conv_layer.bias.data.view(-1).copy_(layer.bias.data.view(-1))
            layer = conv_layer

        x = layer(x)
        conv_classifier.append(layer)

    # replace the model.classifier with newly created convolution layers
    model.classifier = nn.Sequential(*conv_classifier)

    return model

def visualize(model, input_size=(3, 224, 224)):
    '''Visualize the input size though the layers of the model'''
    x = torch.zeros(input_size).unsqueeze_(dim=0)
    print(x.size())
    for layer in list(model.features) + list(model.classifier):
        x = layer(x)
        print(x.size())

这是输入通过模型时的外观

_vgg = vgg16()
vgg = convolutionize(_vgg, 100)
print('\n\nVGG')
visualize(vgg)

...

VGG
torch.Size([1, 3, 224, 224])
torch.Size([1, 64, 224, 224])
torch.Size([1, 64, 224, 224])
torch.Size([1, 64, 224, 224])
torch.Size([1, 64, 224, 224])
torch.Size([1, 64, 112, 112])
torch.Size([1, 128, 112, 112])
torch.Size([1, 128, 112, 112])
torch.Size([1, 128, 112, 112])
torch.Size([1, 128, 112, 112])
torch.Size([1, 128, 56, 56])
torch.Size([1, 256, 56, 56])
torch.Size([1, 256, 56, 56])
torch.Size([1, 256, 56, 56])
torch.Size([1, 256, 56, 56])
torch.Size([1, 256, 56, 56])
torch.Size([1, 256, 56, 56])
torch.Size([1, 256, 28, 28])
torch.Size([1, 512, 28, 28])
torch.Size([1, 512, 28, 28])
torch.Size([1, 512, 28, 28])
torch.Size([1, 512, 28, 28])
torch.Size([1, 512, 28, 28])
torch.Size([1, 512, 28, 28])
torch.Size([1, 512, 14, 14])
torch.Size([1, 512, 14, 14])
torch.Size([1, 512, 14, 14])
torch.Size([1, 512, 14, 14])
torch.Size([1, 512, 14, 14])
torch.Size([1, 512, 14, 14])
torch.Size([1, 512, 14, 14])
torch.Size([1, 512, 7, 7])
torch.Size([1, 4096, 1, 1])
torch.Size([1, 4096, 1, 1])
torch.Size([1, 4096, 1, 1])
torch.Size([1, 4096, 1, 1])
torch.Size([1, 4096, 1, 1])
torch.Size([1, 4096, 1, 1])
torch.Size([1, 100, 1, 1])