Pytorch模型(转移学习)未学习

时间:2019-06-16 05:56:59

标签: python deep-learning pytorch

我正在阅读Pytorch中的转学教程: https://pytorch.org/tutorials/beginner/finetuning_torchvision_models_tutorial.html

我从迁移学习教程中获取了大部分代码,    并进行了一些更改以更频繁地打印出val_acc以适合我的数据集。

尽管原始的迁移学习教程代码有效(Densenet模型正在训练中),但带有更改的我的代码似乎不再起作用。   我不知道为什么。我一直在学习的速度,但没有改变。

 from __future__ import print_function, division

import torch
import torch.nn as nn
import torch.optim as optim
from torch.optim import lr_scheduler
import numpy as np
import torchvision
from torchvision import datasets, models, transforms
import matplotlib.pyplot as plt
import time
import os
import copy




def prepare_dataset_from_folder(data_dir, size, batch_size):
    """prepare datasets into dataloaders
    args
    ------
    data_dir = root_dir holding 'train_dir' and 'val_dir'

    size = size of image

    batch_size = batch_size
    """

    data_transforms = {
        'training': transforms.Compose([
           transforms.Resize((size,size)),
            transforms.ToTensor(),
            transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
        ]),
        'validation': transforms.Compose([
            transforms.Resize((size,size)),
            transforms.ToTensor(),
            transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
        ]),
    }


    image_datasets = {x: datasets.ImageFolder(os.path.join(data_dir, x),
                                              data_transforms[x])
                      for x in ['training', 'validation']}


    dataloaders = {x: torch.utils.data.DataLoader(image_datasets[x], batch_size= batch_size,
                                                 shuffle=True, num_workers=1) #num_workers leads to errors
                  for x in ['training', 'validation']}
    dataset_sizes = {x: len(image_datasets[x]) for x in ['training', 'validation']}
    class_names = image_datasets['training'].classes

    return dataloaders, dataset_sizes, class_names





def train_model(model, criterion, optimizer, scheduler, num_epochs=3, best_acc = 80.0, batch_size = 5):
    since = time.time()
    dataloaders, dataset_sizes, class_names =  prepare_dataset_from_folder(data_dir, size, batch_size)


    for epoch in range(num_epochs):
        print('Epoch {}/{}'.format(epoch, num_epochs - 1))
        print('-' * 10)

        running_loss = 0.0
        running_corrects = 0

        for num, data in enumerate(dataloaders["training"]):

            if num % 100 == 0:
                   val_running_loss = 0.0
                   val_running_corrects = 0
                   model.eval()

                   for val_num, val_data in enumerate(dataloaders["validation"]):
                       inputs, labels = val_data
                       inputs = inputs.to(device)
                       labels = labels.to(device)
                       with torch.set_grad_enabled(False):
                           outputs = model(inputs)
                           _, preds = torch.max(outputs, 1)
                           loss = criterion(outputs, labels)

                       val_running_loss += loss.item() * inputs.size(0)
                       val_running_corrects += torch.sum(preds == labels.data)

                   print(dataset_sizes['validation']) 
                   val_loss = ( val_running_loss/dataset_sizes['validation'])
                   val_acc = (val_running_corrects.double() / dataset_sizes['validation'])#.cpu().numpy()
                   print('val_loss  {:.4f} val_acc: {:.4f}'.format(val_loss, val_acc))
                   if val_acc > best_acc:
                      print("Saving due to high val accuracy")
                      x = datetime.datetime.now()
                      time_stamp_str = str(x).split(' ')[0] + "_" 
                      save_path = os.path.join(SAVE_DIR, f"val_acc_{val_acc}.pt")
                      torch.save(model, save_path)

            scheduler.step()
            model.train()
            inputs, labels = data
            inputs = inputs.to(device)
            labels = labels.to(device)
             #what is this

            optimizer.zero_grad()

            with torch.set_grad_enabled(True):
                    #print(torch.is_grad_enabled())
                    outputs = model(inputs)
                    _, preds = torch.max(outputs, 1)
                    loss = criterion(outputs, labels)
                    loss.backward()
                    optimizer.step()

            running_loss += loss.item() * inputs.size(0)
            running_corrects += torch.sum(preds == labels.data)


            if num % 100 == 0:
                    print("100 batches")
                    temp_running_corrects = (running_corrects.double() / ( len(inputs) *  num + 1)).cpu().numpy()
                    print(temp_running_corrects)

        epoch_loss = running_loss / dataset_sizes['training']
        epoch_acc = running_corrects.double() / dataset_sizes['training']


        print('Training Loss: {:.4f} Acc: {:.4f}'.format(epoch_loss, epoch_acc))

    time_elapsed = time.time() - since
    print('Training complete in {:.0f}m {:.0f}s'.format(
    time_elapsed // 60, time_elapsed % 60))


    return model








if __name__=="__main__":



    data_dir = r'TEST 10'
    device = torch.device("cuda:0")
    PATH =" "
    size = 224
    batch_size = 2



    # pick your model
    #model_ft = torch.load(PATH)
    #model_ft = models.resnet18(pretrained=True)
    model_ft = models.densenet201(pretrained=True)



    #num_ftrs = model_ft.fc.in_features #in features
    num_ftrs = 1920 #densenet 201
    #https://discuss.pytorch.org/t/what-does-the-fc-in-feature-mean/4889


    model_ft.fc = nn.Linear(num_ftrs, 2)

    model_ft = model_ft.to(device)

    criterion = nn.CrossEntropyLoss()

    # Observe that all parameters are being optimized
    optimizer_ft = optim.SGD(model_ft.parameters(), lr=0.0001, momentum=0.9)

    # Decay LR by a factor of 0.1 every 7 epochs
    exp_lr_scheduler = lr_scheduler.StepLR(optimizer_ft, step_size=2, gamma=0.1)


    model_ft = train_model(model_ft, criterion, optimizer_ft, exp_lr_scheduler,num_epochs=3, batch_size = batch_size)

这是结果:

Training Loss: 8.5898 Acc: 0.0102
Epoch 1/2
val_loss 8.5163 val_acc: 0.0191
val_loss 8.9667 val_acc: 0.0255
val_loss 9.3078 val_acc: 0.0127
val_loss 8.7855 val_acc: 0.0318
val_loss 8.6217 val_acc: 0.0255
val_loss 9.0631 val_acc: 0.0191
val_loss 8.5167 val_acc: 0.0255
val_loss 9.0499 val_acc: 0.0191
val_loss 9.0549 val_acc: 0.0255
val_loss 8.8373 val_acc: 0.0191
val_loss 8.9288 val_acc: 0.0191
val_loss 8.9968 val_acc: 0.0127
val_loss 9.2790 val_acc: 0.0127
val_loss 9.4389 val_acc: 0.0191
val_loss 8.6907 val_acc: 0.0318
val_loss 9.0903 val_acc: 0.0191
val_loss 9.0093 val_acc: 0.0191
val_loss 9.4387 val_acc: 0.0127
val_loss 9.1059 val_acc: 0.0191
val_loss 9.3480 val_acc: 0.0127
val_loss 8.9435 val_acc: 0.0191
val_loss 8.4412 val_acc: 0.0318
val_loss 8.8712 val_acc: 0.0382
val_loss 8.9125 val_acc: 0.0191
val_loss 9.3815 val_acc: 0.0127
val_loss 9.0214 val_acc: 0.0191
val_loss 9.4234 val_acc: 0.0127
val_loss 9.1625 val_acc: 0.0191

但是,如果我使用原始教程, 我的模型似乎正在学习。

from __future__ import print_function, division
import pretrainedmodels as ptmodels

import torch
import torch.nn as nn
import torch.optim as optim
from torch.optim import lr_scheduler
import numpy as np
import torchvision
from torchvision import datasets, models, transforms
import matplotlib.pyplot as plt
import time
import os
import copy



def train_model(model, criterion, optimizer, scheduler, num_epochs=25):
    since = time.time()

    best_model_wts = copy.deepcopy(model.state_dict())
    best_acc = 0.0

    for epoch in range(num_epochs):
        print('Epoch {}/{}'.format(epoch, num_epochs - 1))
        print('-' * 10)

        # Each epoch has a training and validation phase
        for phase in ['training', 'validation']:
            if phase == 'training':
                scheduler.step()  #what is this
                model.train()  # Set model to training mode
            else:
                model.eval()   # Set model to evaluate mode

            running_loss = 0.0
            running_corrects = 0

            # Iterate over data.
            for num, data in enumerate(dataloaders[phase]):
                inputs, labels = data
                inputs = inputs.to(device)
                labels = labels.to(device)

                # zero the parameter gradients
                optimizer.zero_grad()

                # forward
                # track history if only in train
                with torch.set_grad_enabled(phase == 'training'):
                    outputs = model(inputs)
                    _, preds = torch.max(outputs, 1)
                    loss = criterion(outputs, labels)

                    # backward + optimize only if in training phase
                    if phase == 'training':
                        loss.backward()
                        optimizer.step()

                # statistics
                running_loss += loss.item() * inputs.size(0)
                running_corrects += torch.sum(preds == labels.data)

                if num % 100 == 0:
                    print("500 batches")
                    temp_running_corrects = (running_corrects.double() / ( len(inputs) *  num + 1)).cpu().numpy()
                    print(temp_running_corrects)
                    #print('running_corrects', running_corrects/( len(inputs) *  num + 1).numpy())

            epoch_loss = running_loss / dataset_sizes[phase]
            epoch_acc = running_corrects.double() / dataset_sizes[phase]

            print('{} Loss: {:.4f} Acc: {:.4f}'.format(
                phase, epoch_loss, epoch_acc))

            # deep copy the model
            if phase == 'validation' and epoch_acc > best_acc:
                best_acc = epoch_acc
                best_model_wts = copy.deepcopy(model.state_dict())

        print()

    time_elapsed = time.time() - since
    print('Training complete in {:.0f}m {:.0f}s'.format(
        time_elapsed // 60, time_elapsed % 60))
    print('Best val Acc: {:4f}'.format(best_acc))

    # load best model weights
    model.load_state_dict(best_model_wts)
    save_path = os.path.join(SAVE_DIR, f"val_acc_{val_acc}.pt")
    torch.save(model, save_path)
    return model


if __name__=="__main__":

    img_size = 224      
    data_transforms = {
        'training': transforms.Compose([
           transforms.Resize((img_size,img_size)),
            transforms.ToTensor(),
            transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
        ]),
        'validation': transforms.Compose([
            transforms.Resize((img_size,img_size)),
            transforms.ToTensor(),
            transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
        ]),
    }
    data_dir = r'TEST 10'

    image_datasets = {x: datasets.ImageFolder(os.path.join(data_dir, x),
                                              data_transforms[x])
                      for x in ['training', 'validation']}
    dataloaders = {x: torch.utils.data.DataLoader(image_datasets[x], batch_size=2,
                                                 shuffle=True, num_workers=4) #num_workers leads to errors
                  for x in ['training', 'validation']}
    dataset_sizes = {x: len(image_datasets[x]) for x in ['training', 'validation']}
    class_names = image_datasets['training'].classes

    device = torch.device("cuda:0")

    #model_ft = models.resnet18(pretrained=True)
    #model_ft = models.resnet152(pretrained = True)
    #model_ft = ptmodels.__dict__['polynet'](num_classes=1000, pretrained='imagenet')
    model_ft = models.densenet201(pretrained=True)
    #model_ft = models.resnext101_32x8d(pretrained=True).fc.in_features
    print(model_ft.features)

    #num_ftrs = model_ft.fc.in_features #in features
    num_ftrs = 1920 #densenet 201
    #num_ftrs = 2048
    #https://discuss.pytorch.org/t/what-does-the-fc-in-feature-mean/4889


    model_ft.fc = nn.Linear(num_ftrs, 2)
    #model_ft.last_linear = nn.Linear(num_ftrs, 2)

    model_ft = model_ft.to(device)

    criterion = nn.CrossEntropyLoss()

    # Observe that all parameters are being optimized
    optimizer_ft = optim.SGD(model_ft.parameters(), lr=0.001, momentum=0.9)

    # Decay LR by a factor of 0.1 every 7 epochs
    exp_lr_scheduler = lr_scheduler.StepLR(optimizer_ft, step_size=2, gamma=0.1)



    model_ft = train_model(model_ft, criterion, optimizer_ft, exp_lr_scheduler,
                           num_epochs=3)

https://pytorch.org/tutorials/beginner/finetuning_torchvision_models_tutorial.html

Training Loss:
Epoch 0/2

500 batches
0.0
500 batches
0.681592039800995
500 batches
0.6359102244389028
500 batches
0.6422628951747088
500 batches
0.6541822721598003
500 batches
0.6763236763236763
500 batches
0.694421315570358
500 batches
0.7059243397573162
500 batches
0.7108057464084947
500 batches
0.7096057745696835
500 batches
0.7126436781609196
500 batches
0.7142208087233075
500 batches
0.7251145356101625
500 batches
0.7293348712033834
500 batches
0.729382363441628
500 batches
0.7340886371209597
500 batches
0.7363323961262105

我已经坚持了几天,无法解决。 我只是想更频繁地打印出验证准确性。

谢谢。

0 个答案:

没有答案