我正在阅读Pytorch中的转学教程: https://pytorch.org/tutorials/beginner/finetuning_torchvision_models_tutorial.html
我从迁移学习教程中获取了大部分代码,
并进行了一些更改以更频繁地打印出val_acc
以适合我的数据集。
尽管原始的迁移学习教程代码有效(Densenet模型正在训练中),但带有更改的我的代码似乎不再起作用。 我不知道为什么。我一直在学习的速度,但没有改变。
from __future__ import print_function, division
import torch
import torch.nn as nn
import torch.optim as optim
from torch.optim import lr_scheduler
import numpy as np
import torchvision
from torchvision import datasets, models, transforms
import matplotlib.pyplot as plt
import time
import os
import copy
def prepare_dataset_from_folder(data_dir, size, batch_size):
"""prepare datasets into dataloaders
args
------
data_dir = root_dir holding 'train_dir' and 'val_dir'
size = size of image
batch_size = batch_size
"""
data_transforms = {
'training': transforms.Compose([
transforms.Resize((size,size)),
transforms.ToTensor(),
transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
]),
'validation': transforms.Compose([
transforms.Resize((size,size)),
transforms.ToTensor(),
transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
]),
}
image_datasets = {x: datasets.ImageFolder(os.path.join(data_dir, x),
data_transforms[x])
for x in ['training', 'validation']}
dataloaders = {x: torch.utils.data.DataLoader(image_datasets[x], batch_size= batch_size,
shuffle=True, num_workers=1) #num_workers leads to errors
for x in ['training', 'validation']}
dataset_sizes = {x: len(image_datasets[x]) for x in ['training', 'validation']}
class_names = image_datasets['training'].classes
return dataloaders, dataset_sizes, class_names
def train_model(model, criterion, optimizer, scheduler, num_epochs=3, best_acc = 80.0, batch_size = 5):
since = time.time()
dataloaders, dataset_sizes, class_names = prepare_dataset_from_folder(data_dir, size, batch_size)
for epoch in range(num_epochs):
print('Epoch {}/{}'.format(epoch, num_epochs - 1))
print('-' * 10)
running_loss = 0.0
running_corrects = 0
for num, data in enumerate(dataloaders["training"]):
if num % 100 == 0:
val_running_loss = 0.0
val_running_corrects = 0
model.eval()
for val_num, val_data in enumerate(dataloaders["validation"]):
inputs, labels = val_data
inputs = inputs.to(device)
labels = labels.to(device)
with torch.set_grad_enabled(False):
outputs = model(inputs)
_, preds = torch.max(outputs, 1)
loss = criterion(outputs, labels)
val_running_loss += loss.item() * inputs.size(0)
val_running_corrects += torch.sum(preds == labels.data)
print(dataset_sizes['validation'])
val_loss = ( val_running_loss/dataset_sizes['validation'])
val_acc = (val_running_corrects.double() / dataset_sizes['validation'])#.cpu().numpy()
print('val_loss {:.4f} val_acc: {:.4f}'.format(val_loss, val_acc))
if val_acc > best_acc:
print("Saving due to high val accuracy")
x = datetime.datetime.now()
time_stamp_str = str(x).split(' ')[0] + "_"
save_path = os.path.join(SAVE_DIR, f"val_acc_{val_acc}.pt")
torch.save(model, save_path)
scheduler.step()
model.train()
inputs, labels = data
inputs = inputs.to(device)
labels = labels.to(device)
#what is this
optimizer.zero_grad()
with torch.set_grad_enabled(True):
#print(torch.is_grad_enabled())
outputs = model(inputs)
_, preds = torch.max(outputs, 1)
loss = criterion(outputs, labels)
loss.backward()
optimizer.step()
running_loss += loss.item() * inputs.size(0)
running_corrects += torch.sum(preds == labels.data)
if num % 100 == 0:
print("100 batches")
temp_running_corrects = (running_corrects.double() / ( len(inputs) * num + 1)).cpu().numpy()
print(temp_running_corrects)
epoch_loss = running_loss / dataset_sizes['training']
epoch_acc = running_corrects.double() / dataset_sizes['training']
print('Training Loss: {:.4f} Acc: {:.4f}'.format(epoch_loss, epoch_acc))
time_elapsed = time.time() - since
print('Training complete in {:.0f}m {:.0f}s'.format(
time_elapsed // 60, time_elapsed % 60))
return model
if __name__=="__main__":
data_dir = r'TEST 10'
device = torch.device("cuda:0")
PATH =" "
size = 224
batch_size = 2
# pick your model
#model_ft = torch.load(PATH)
#model_ft = models.resnet18(pretrained=True)
model_ft = models.densenet201(pretrained=True)
#num_ftrs = model_ft.fc.in_features #in features
num_ftrs = 1920 #densenet 201
#https://discuss.pytorch.org/t/what-does-the-fc-in-feature-mean/4889
model_ft.fc = nn.Linear(num_ftrs, 2)
model_ft = model_ft.to(device)
criterion = nn.CrossEntropyLoss()
# Observe that all parameters are being optimized
optimizer_ft = optim.SGD(model_ft.parameters(), lr=0.0001, momentum=0.9)
# Decay LR by a factor of 0.1 every 7 epochs
exp_lr_scheduler = lr_scheduler.StepLR(optimizer_ft, step_size=2, gamma=0.1)
model_ft = train_model(model_ft, criterion, optimizer_ft, exp_lr_scheduler,num_epochs=3, batch_size = batch_size)
这是结果:
Training Loss: 8.5898 Acc: 0.0102
Epoch 1/2
val_loss 8.5163 val_acc: 0.0191
val_loss 8.9667 val_acc: 0.0255
val_loss 9.3078 val_acc: 0.0127
val_loss 8.7855 val_acc: 0.0318
val_loss 8.6217 val_acc: 0.0255
val_loss 9.0631 val_acc: 0.0191
val_loss 8.5167 val_acc: 0.0255
val_loss 9.0499 val_acc: 0.0191
val_loss 9.0549 val_acc: 0.0255
val_loss 8.8373 val_acc: 0.0191
val_loss 8.9288 val_acc: 0.0191
val_loss 8.9968 val_acc: 0.0127
val_loss 9.2790 val_acc: 0.0127
val_loss 9.4389 val_acc: 0.0191
val_loss 8.6907 val_acc: 0.0318
val_loss 9.0903 val_acc: 0.0191
val_loss 9.0093 val_acc: 0.0191
val_loss 9.4387 val_acc: 0.0127
val_loss 9.1059 val_acc: 0.0191
val_loss 9.3480 val_acc: 0.0127
val_loss 8.9435 val_acc: 0.0191
val_loss 8.4412 val_acc: 0.0318
val_loss 8.8712 val_acc: 0.0382
val_loss 8.9125 val_acc: 0.0191
val_loss 9.3815 val_acc: 0.0127
val_loss 9.0214 val_acc: 0.0191
val_loss 9.4234 val_acc: 0.0127
val_loss 9.1625 val_acc: 0.0191
但是,如果我使用原始教程, 我的模型似乎正在学习。
from __future__ import print_function, division
import pretrainedmodels as ptmodels
import torch
import torch.nn as nn
import torch.optim as optim
from torch.optim import lr_scheduler
import numpy as np
import torchvision
from torchvision import datasets, models, transforms
import matplotlib.pyplot as plt
import time
import os
import copy
def train_model(model, criterion, optimizer, scheduler, num_epochs=25):
since = time.time()
best_model_wts = copy.deepcopy(model.state_dict())
best_acc = 0.0
for epoch in range(num_epochs):
print('Epoch {}/{}'.format(epoch, num_epochs - 1))
print('-' * 10)
# Each epoch has a training and validation phase
for phase in ['training', 'validation']:
if phase == 'training':
scheduler.step() #what is this
model.train() # Set model to training mode
else:
model.eval() # Set model to evaluate mode
running_loss = 0.0
running_corrects = 0
# Iterate over data.
for num, data in enumerate(dataloaders[phase]):
inputs, labels = data
inputs = inputs.to(device)
labels = labels.to(device)
# zero the parameter gradients
optimizer.zero_grad()
# forward
# track history if only in train
with torch.set_grad_enabled(phase == 'training'):
outputs = model(inputs)
_, preds = torch.max(outputs, 1)
loss = criterion(outputs, labels)
# backward + optimize only if in training phase
if phase == 'training':
loss.backward()
optimizer.step()
# statistics
running_loss += loss.item() * inputs.size(0)
running_corrects += torch.sum(preds == labels.data)
if num % 100 == 0:
print("500 batches")
temp_running_corrects = (running_corrects.double() / ( len(inputs) * num + 1)).cpu().numpy()
print(temp_running_corrects)
#print('running_corrects', running_corrects/( len(inputs) * num + 1).numpy())
epoch_loss = running_loss / dataset_sizes[phase]
epoch_acc = running_corrects.double() / dataset_sizes[phase]
print('{} Loss: {:.4f} Acc: {:.4f}'.format(
phase, epoch_loss, epoch_acc))
# deep copy the model
if phase == 'validation' and epoch_acc > best_acc:
best_acc = epoch_acc
best_model_wts = copy.deepcopy(model.state_dict())
print()
time_elapsed = time.time() - since
print('Training complete in {:.0f}m {:.0f}s'.format(
time_elapsed // 60, time_elapsed % 60))
print('Best val Acc: {:4f}'.format(best_acc))
# load best model weights
model.load_state_dict(best_model_wts)
save_path = os.path.join(SAVE_DIR, f"val_acc_{val_acc}.pt")
torch.save(model, save_path)
return model
if __name__=="__main__":
img_size = 224
data_transforms = {
'training': transforms.Compose([
transforms.Resize((img_size,img_size)),
transforms.ToTensor(),
transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
]),
'validation': transforms.Compose([
transforms.Resize((img_size,img_size)),
transforms.ToTensor(),
transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
]),
}
data_dir = r'TEST 10'
image_datasets = {x: datasets.ImageFolder(os.path.join(data_dir, x),
data_transforms[x])
for x in ['training', 'validation']}
dataloaders = {x: torch.utils.data.DataLoader(image_datasets[x], batch_size=2,
shuffle=True, num_workers=4) #num_workers leads to errors
for x in ['training', 'validation']}
dataset_sizes = {x: len(image_datasets[x]) for x in ['training', 'validation']}
class_names = image_datasets['training'].classes
device = torch.device("cuda:0")
#model_ft = models.resnet18(pretrained=True)
#model_ft = models.resnet152(pretrained = True)
#model_ft = ptmodels.__dict__['polynet'](num_classes=1000, pretrained='imagenet')
model_ft = models.densenet201(pretrained=True)
#model_ft = models.resnext101_32x8d(pretrained=True).fc.in_features
print(model_ft.features)
#num_ftrs = model_ft.fc.in_features #in features
num_ftrs = 1920 #densenet 201
#num_ftrs = 2048
#https://discuss.pytorch.org/t/what-does-the-fc-in-feature-mean/4889
model_ft.fc = nn.Linear(num_ftrs, 2)
#model_ft.last_linear = nn.Linear(num_ftrs, 2)
model_ft = model_ft.to(device)
criterion = nn.CrossEntropyLoss()
# Observe that all parameters are being optimized
optimizer_ft = optim.SGD(model_ft.parameters(), lr=0.001, momentum=0.9)
# Decay LR by a factor of 0.1 every 7 epochs
exp_lr_scheduler = lr_scheduler.StepLR(optimizer_ft, step_size=2, gamma=0.1)
model_ft = train_model(model_ft, criterion, optimizer_ft, exp_lr_scheduler,
num_epochs=3)
https://pytorch.org/tutorials/beginner/finetuning_torchvision_models_tutorial.html
Training Loss:
Epoch 0/2
500 batches
0.0
500 batches
0.681592039800995
500 batches
0.6359102244389028
500 batches
0.6422628951747088
500 batches
0.6541822721598003
500 batches
0.6763236763236763
500 batches
0.694421315570358
500 batches
0.7059243397573162
500 batches
0.7108057464084947
500 batches
0.7096057745696835
500 batches
0.7126436781609196
500 batches
0.7142208087233075
500 batches
0.7251145356101625
500 batches
0.7293348712033834
500 batches
0.729382363441628
500 batches
0.7340886371209597
500 batches
0.7363323961262105
我已经坚持了几天,无法解决。 我只是想更频繁地打印出验证准确性。
谢谢。