嗨,我在 pytorch 中有一个解决方案

import torch
import torch.nn as nn
from torch.utils import data
from torchvision import transforms
from torchvision import datasets
import matplotlib.pyplot as plt
import numpy as np

# use the ImageNet transformation
transform = transforms.Compose([transforms.Resize((224, 224)), 
                                transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])])

# define a 1 image dataset
dataset = datasets.ImageFolder(root='./data/Elephant/', transform=transform)

# define the dataloader to load that single image
dataloader = data.DataLoader(dataset=dataset, shuffle=False, batch_size=1)

vgg19 = Mymodel() ## create an object of your model
class VGG(nn.Module):
    def __init__(self):
        super(VGG, self).__init__()
        # get the pretrained VGG19 network
        self.vgg = vgg19
        # disect the network to access its last convolutional layer
        self.features_conv = self.vgg.features[:36]  # 36th layer was my last conv layer
        # get the max pool of the features stem
        self.max_pool = nn.MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
        # get the classifier of the vgg19
        self.classifier = self.vgg.classifier
        # placeholder for the gradients
        self.gradients = None
    # hook for the gradients of the activations
    def activations_hook(self, grad):
        self.gradients = grad
    def forward(self, x):
        x = self.features_conv(x)
        # register the hook
        h = x.register_hook(self.activations_hook)
        # apply the remaining pooling
        x = self.max_pool(x)
        x = x.view((1, -1))
        x = self.classifier(x)
        return x
    # method for the gradient extraction
    def get_activations_gradient(self):
        return self.gradients
    # method for the activation exctraction
    def get_activations(self, x):
        return self.features_conv(x)

vgg = VGG()

# set the evaluation mode

# get the image from the dataloader
img, _ = next(iter(dataloader))

# get the most likely prediction of the model
pred_class = vgg(img).argmax(dim=1).numpy()[0]
pred = vgg(img)

pred[:, pred_class].backward()

# pull the gradients out of the model
gradients = vgg.get_activations_gradient()

# pool the gradients across the channels
pooled_gradients = torch.mean(gradients, dim=[0, 2, 3])

# get the activations of the last convolutional layer
activations = vgg.get_activations(img).detach()

# weight the channels by corresponding gradients
for i in range(512):
    activations[:, i, :, :] *= pooled_gradients[i]
# average the channels of the activations
heatmap = torch.mean(activations, dim=1).squeeze()

# relu on top of the heatmap
# expression (2) in https://arxiv.org/pdf/1610.02391.pdf
heatmap = np.maximum(heatmap, 0)

# normalize the heatmap
heatmap /= torch.max(heatmap)
heatmap = heatmap.numpy()

import cv2
img = cv2.imread('./data/Elephant/data/05fig34.jpg')
heatmap = cv2.resize(heatmap, (img.shape[1], img.shape[0]))
heatmap = np.uint8(255 * heatmap)
heatmap = cv2.applyColorMap(heatmap, cv2.COLORMAP_JET)
superimposed_img = heatmap * 0.4 + img
cv2.imwrite('./map.jpg', superimposed_img)  ###saves gradcam visualization image