尽管我已经将模型和数据都移到了cuda,但是当我尝试删除一些过滤器然后重新训练网络时,我遇到了这个问题。当我调用loss.backward()
时,该错误似乎是在向后;这是我的代码:
criterion = nn.NLLLoss()
optimizer = optim.SGD(model.parameters(), lr = 0.01, momentum=0.9,
weight_decay=0.0005)
def train(model, train_loader, criterion, optimizer, device, epoch):
model.train()
for idx, (data, target) in enumerate(train_loader):
data, target = data.to(device), target.to(device)
optimizer.zero_grad()
output = model(data)
loss = criterion(output,target)
loss.backward() #The problem is here
optimizer.step()
if idx % 5 == 0:
print("Epoch {} [{}/{} ({:.2f}%)]\tLoss: {:.6f}"
.format(epoch,idx*len(data),len(train_loader.dataset),
100.0*idx/len(train_loader),loss.item()))
def test(model,test_loader,criterion,device):
model.eval()
test_loss = 0
correct = 0
with torch.no_grad():
for data,target in test_loader:
data, target = data.to(device), target.to(device)
output = model(data)
loss = criterion(output,target)
test_loss += loss.item()
pred = output.argmax(dim=1, keepdim=True)
correct += pred.eq(target.view_as(pred)).sum().item()
test_loss /= len(test_loader.dataset)
print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{}
({:.0f}%)\n'
.format(test_loss, correct, len(test_loader.dataset),100.
* correct / len(test_loader.dataset)))
#This function returns the index of the filter to be pruned
def index_pruned(model, layer):
l = []
for i in range(model.features[layer].weight.size(0)):
l.append(abs(model.features[layer].weight[i,:,:,:]).sum())
return l.index(min(l))
def prune_filter(model, layer, next_layer, indexes):
#Making a new layer, with one less filter
new_conv =
torch.nn.Conv2d(in_channels=model.features[layer].in_channels,
out_channels=model.features[layer].out_channels-1,
kernel_size=model.features[layer].kernel_size,
stride=model.features[layer].stride,
padding=model.features[layer].padding)
#Pass the old weights to the new layer except in the pruned filter
new_conv.weight[0:indexes,:,:,:] =
model.features[layer].weight[0:indexes,:,:,:]
new_conv.weight[indexes:,:,:,:] =
model.features[layer].weight[indexes+1,:,:,:]
new_conv.bias[0:indexes] = model.features[layer].bias[0:indexes]
new_conv.bias[indexes:] = model.features[layer].bias[indexes+1:]
#replace the layer
model.features[layer] = new_conv
#When not pruning the last layer
if layer != 10:
#Reduce the 'thickness' of the consecutive layer
#as the previous one is pruned
next_new_conv = torch.nn.Conv2d(
in_channels=model.features[next_layer].in_channels-1,
out_channels=model.features[next_layer].out_channels,
kernel_size=model.features[next_layer].kernel_size,
stride=model.features[next_layer].stride,
padding=model.features[next_layer].padding)
next_new_conv.weight[:,0:indexes,:,:] =
model.features[next_layer].weight[:,0:indexes,:,:]
next_new_conv.weight[:,indexes:,:,:] =
model.features[next_layer].weight[:,indexes+1:,:,:]
model.features[next_layer] = next_new_conv
#Pruning the last layer affects the linear layer
elif layer == 10:
params = int(model.classifier[0].in_features /
(model.features[10].out_channels+1))
new_fc1 = torch.nn.Linear(
in_features=int(model.classifier[0].in_features-params),
out_features=int(model.classifier[0].out_features))
new_fc1.weight[:,0:indexes*params] =
model.classifier[0].weight[:,0:indexes*params]
new_fc1.weight[:,:params*indexes] =
model.classifier[0].weight[:,:(indexes+1)*params]
new_fc1.bias = model.classifier[0].bias
model.classifier[0]=new_fc1
return model
def main(model, train_loader,test_loader,criterion,optimizer,
pretrained=False,prune=False,save=False,pruneFilter=False):
device = 'cuda'
if pretrained == True:
model.load_state_dict(torch.load('AlexNet_pruned.pt'))
for params in model.parameters():
params.requires_grad = True
model.to(device)
if pruneFilter == True:
#conv0:
for num_filters_pruned in range(16):
model=prune_filter(model=model, layer=0, next_layer=3, indexes=index_pruned(model,0))
if num_filters_pruned %4 == 0:
model=model.cuda()
train(model, train_loader, criterion, optimizer, device, 1)
test(model,test_loader,criterion,device)
torch.save(model.state_dict(),'AlexNet_filers_pruned.pt')
if save == True:
torch.save(model.state_dict(),'AlexNet.pt')
if prune == True:
torch.save(model.state_dict(),'AlexNet_pruned.pt')
错误在loss.backward()
处。我试图检查每个参数是否都在cuda中
for num_filters_pruned in range(16):
model=prune_filter(model=model, layer=0, next_layer=3, indexes=index_pruned(model,0))
if num_filters_pruned %4 == 0:
model=model.cuda()
for name, param in model.named_parameters():
print(name,':',param.device)
train(model, train_loader, criterion, optimizer, device, 1)
test(model,test_loader,criterion,device)
And this is the result
features.0.weight : cuda:0
features.0.bias : cuda:0
features.3.weight : cuda:0
features.3.bias : cuda:0
features.6.weight : cuda:0
features.6.bias : cuda:0
features.8.weight : cuda:0
features.8.bias : cuda:0
features.10.weight : cuda:0
features.10.bias : cuda:0
classifier.fc1.weight : cuda:0
classifier.fc1.bias : cuda:0
classifier.fc2.weight : cuda:0
classifier.fc2.bias : cuda:0
--------------------------------------------------------------------------- RuntimeError Traceback (most recent call last) <ipython-input-12-46a65917686e> in <module>
----> 1 main(model,train_loader,test_loader,criterion,optimizer,pretrained=True,prune=False,save=False,pruneFilter=True)
<ipython-input-11-8c11e06a650b> in main(model, train_loader, test_loader, criterion, optimizer, pretrained, prune, save, pruneFilter)
33 print(name,':',param.device)
34
---> 35 train(model, train_loader, criterion, optimizer, device, 1)
36 test(model,test_loader,criterion,device)
37 #conv2:
<ipython-input-8-f4fd4c83eff2> in train(model, train_loader, criterion, optimizer, device, epoch)
9 loss = criterion(output,target)
10 loss = loss.cuda()
---> 11 loss.backward()
12 optimizer.step()
13 if idx % 5 == 0:
/usr/local/lib/python3.5/dist-packages/torch/tensor.py in backward(self, gradient, retain_graph, create_graph)
100 products. Defaults to ``False``.
101 """
--> 102 torch.autograd.backward(self, gradient, retain_graph, create_graph)
103
104 def register_hook(self, hook):
/usr/local/lib/python3.5/dist-packages/torch/autograd/__init__.py in backward(tensors, grad_tensors, retain_graph, create_graph, grad_variables)
88 Variable._execution_engine.run_backward(
89 tensors, grad_tensors, retain_graph, create_graph,
---> 90 allow_unreachable=True) # allow_unreachable flag
91
92
RuntimeError: Function CudnnConvolutionBackward returned an invalid gradient at index 1 - expected type torch.FloatTensor but got torch.cuda.FloatTensor