pytorch的运行速度比张量流慢得多,即使对于浅层的CNN

时间:2018-11-12 11:34:55

标签: python tensorflow pytorch

输入数据是被转换为PSD的EEG数据。其形状为 [103600,59,51] ,其中103600是样本数,即一个时期的总样本数。数据已加载到内存中。

我使用的模型非常简单,但是,使用pytorch 0.4和GTX 1080,训练一个纪元所需的时间为50秒。但是,val set接口的时间为0.3s。

我还使用 tensorflow 1.2.0测试了该模型,时间为8s 。我不知道我是否弄错了。

所以我做了一些测试。此img显示了运行时间。 enter image description here

其中 time_to_tensor inputs, targets = torch.tensor(inputs, dtype=torch.float), torch.tensor(targets, dtype=torch.int64)

的时间

time_to_cuda for inputs, targets = inputs.to(self.device), targets.to(self.device)

time_to_model out = self.model(inputs)

时间准则 loss = self.criterion(out, targets)

time_backward self.optimizer.step()

似乎tensor.to(self.device)(或tensor.cuda())的操作浪费了很多时间。因此,我尝试使用

将数据立即移至GPU
self.X_train = torch.tensor(self.X_train, dtype=torch.float).to(self.device)
self.y_train = torch.tensor(self.y_train, dtype=torch.int64).to(self.device)

并评论

inputs, targets = torch.tensor(inputs, dtype=torch.float), torch.tensor(targets, dtype=torch.int64)
inputs, targets = inputs.to(self.device), targets.to(self.device)

但是我在下面的img中获得了运行时间。enter image description here。一个纪元所需的时间仍约为50s,并且每次操作的时间都已更改。我很困惑。

有人可以帮我发现问题吗?谢谢。

代码在这里。

这是我使用的模型

import torch
import torch.nn as nn
import torch.nn.functional as F

class TxtCNN4(nn.Module):

    def __init__(self, n_classes=6, time_steps=59, psd_lenght=51, filter_numbers=128):
        super(TxtCNN4, self).__init__()
        self.n_classes = n_classes
        self.time_steps = time_steps
        self.psd_lenght = psd_lenght
        self.filters = filter_numbers

        self.conv1 = nn.Conv2d(in_channels=1, out_channels=self.filters//2, 
                                kernel_size=(3, psd_lenght), stride=1, padding=0, bias=False)
        self.bn1 = nn.BatchNorm2d(self.filters//2, momentum=0.05)
        self.pool1 = nn.MaxPool2d(kernel_size=(self.time_steps - 3 + 1, 1))

        self.conv2 = nn.Conv2d(in_channels=self.filters//2, out_channels=self.filters,
                                    kernel_size=(3, 1), stride=1, padding=0, bias=False)
        self.bn2 = nn.BatchNorm2d(self.filters, momentum=0.05)
        self.pool2 = nn.MaxPool2d(kernel_size=(self.time_steps - 5 + 1, 1))

        self.conv3 = nn.Conv2d(in_channels=self.filters, out_channels=self.filters,
                                    kernel_size=(3, 1), stride=1, padding=0, bias=False)
        self.bn3 = nn.BatchNorm2d(self.filters, momentum=0.05)
        self.pool3 = nn.MaxPool2d(kernel_size=(self.time_steps - 7 + 1, 1))

        self.fc = nn.Linear(in_features=2*self.filters+self.filters//2, out_features=128)
        self.cls = nn.Linear(in_features=128, out_features=self.n_classes)
        self.criterion = nn.CrossEntropyLoss(weight=None)

    def forward(self, inputs):
        inputs = inputs.view(-1, self.time_steps, self.psd_lenght, 1)
        inputs = inputs.permute(0, 3, 1, 2)
        conv1 = F.relu(self.bn1(self.conv1(inputs)), inplace=True)
        conv2 = F.relu(self.bn2(self.conv2(conv1)), inplace=True)
        conv3 = F.relu(self.bn3(self.conv3(conv2)), inplace=True)

        x = torch.cat((self.pool1(conv1), self.pool2(conv2), self.pool3(conv3)), dim=1)
        x = x.view(-1, 2*self.filters+self.filters//2)
        x = F.dropout(x, p=0.5, training=self.training)
        x = F.relu(self.fc(x), inplace=True)
        x = self.cls(x)

        return x

这是用于转换模型的主要python文件

file_path = '/mnt/disk2/wy/SLEEP_EDF/'

batch_size = 32*4*2
dropout_rate = 0.5
nb_classes = 5
max_epochs = 100
early_stop_epoch = 10

learning_rate = 1e-3

model_type='TxtCNN4'
filter_numbers = 128

fs = 100    # Sample frequence
n_Channels = 2
n_Samples = 30*100

load_flag = False
model_path = ''

def iterate_minibatches(inputs, targets, batchsize, shuffle=False):
    input_len = inputs.shape[0]
    assert input_len == len(targets)

    if shuffle:
        indices = np.arange(input_len)  
        np.random.shuffle(indices)
    for start_idx in range(0, input_len, batchsize):
        if shuffle:
            excerpt = indices[start_idx:start_idx + batchsize]
        else:
            excerpt = slice(start_idx, start_idx + batchsize)

class Trainer():

    def __init__(self, X_inputs, labels, fold, subj_id, log_path, model_type=model_type, args=None):
        self.subj_id = subj_id
        self.model_type = model_type
        self.writer_root_path = os.path.abspath(
                                os.path.join(
                                    os.path.curdir,
                                    log_path+'_'+model_type,
                                    model_type + '_' + str(subj_id)))
        self.train_writer = SummaryWriter(os.path.join(self.writer_root_path, 'train'))
        self.val_writer = SummaryWriter(os.path.join(self.writer_root_path, 'val'))
        self.test_writer = SummaryWriter(os.path.join(self.writer_root_path, 'test'))

        self.curr_epcoh = 0
        self.curr_iter = 0

        self.train_end_flag = False

        (self.X_train, self.y_train), (self.X_val, self.y_val), (self.X_test, self.y_test) = reformatInput(X_inputs, labels, fold)
        print('Test set label and BiLi:\t', np.unique(self.y_test, return_counts=True))
        # normalization between all data
        X_mean = self.X_train.mean()
        X_std = self.X_train.std()
        self.X_train = (self.X_train - X_mean)/X_std
        self.X_val = (self.X_val - X_mean)/X_std
        self.X_test = (self.X_test - X_mean)/X_std


        if model_type == 'TxtCNN4':
            model = TxtCNN4()
        else:
            model = DeepSleepNet()
        self.device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
        self.model = model.to(self.device)
        self.criterion = nn.CrossEntropyLoss(weight=None)

        # self.X_train = torch.tensor(self.X_train, dtype=torch.float).to(self.device)
        # self.y_train = torch.tensor(self.y_train, dtype=torch.int64).to(self.device)

        self.optimizer = optim.Adam(self.model.parameters(), lr=learning_rate)


    def train(self, epoch):
        train_loss = AvgMeter()
        train_acc = AvgMeter()
        start_time = time.time()

        time_to_tensor = 0
        time_to_cuda = 0
        time_model = 0
        time_criterion = 0
        time_backward = 0
        time_step = 0
        time_writer = 0
        self.curr_epcoh = epoch
        self.model.train()
        for (inputs, targets) in iterate_minibatches(self.X_train, self.y_train, batch_size, shuffle=False):
            self.curr_iter += 1
            time_1 = time.time()
            inputs, targets = torch.tensor(inputs, dtype=torch.float), torch.tensor(targets, dtype=torch.int64)  # dtype=torch.int64
            time_to_tensor += (time.time() - time_1)
            time_1 = time.time()
            inputs, targets = inputs.to(self.device), targets.to(self.device)
            time_to_cuda += (time.time() - time_1)

            # zero the parameter gradients
            self.optimizer.zero_grad()

            # forward + backward + optimize
            time_1 = time.time()
            out = self.model(inputs)
            time_model += (time.time() - time_1)

            time_1 = time.time()
            loss = self.criterion(out, targets)
            time_criterion += (time.time() - time_1)

            time_1 = time.time()
            loss.backward()
            time_backward += (time.time() - time_1)

            time_1 = time.time()
            self.optimizer.step()
            time_step += (time.time() - time_1)

            if self.curr_iter % 20 == 0:
                time_1 = time.time()
                pred = out.max(dim=1)[1].cpu().numpy()
                gt = targets.cpu().numpy()
                acc = np.mean(pred==gt)
                _loss = loss.detach().cpu().numpy()
                train_acc.update(acc)
                train_loss.update(_loss)

                self.train_writer.add_scalar('learning_rate', curr_lr, self.curr_iter)
                self.train_writer.add_scalar('acc', acc, self.curr_iter)
                self.train_writer.add_scalar('loss', _loss, self.curr_iter)
                time_writer += (time.time() - time_1)


        fmt_str = "Train\tEpoch [{:d}/{:d}]  train_Loss: {:.4f}\ttrain_Acc: {:.2f}\tTime per Epoch: {:.4f}"
        print_str = fmt_str.format(self.curr_epcoh,
                                    max_epochs,
                                    train_loss.avg, 
                                    train_acc.avg*100, 
                                    time.time()-start_time)
        print(print_str)
        print('time_to_tensor: ',time_to_tensor)
        print('time_to_cuda: ',time_to_cuda)
        print('time_model: ',time_model)
        print('time_criterion: ',time_criterion)
        print('time_backward: ',time_backward)
        print('time_step: ',time_step)
        print('time_writer: ',time_writer)
        print(len(self.y_train))
        train_acc.reset()
        train_loss.reset()

    def validation(self):
        self.model.eval()
        with torch.no_grad():
            # val set
            loss_gather = AvgMeter()
            pred_gather = PreGather()
            tran_time = 0
            for (inputs, targets) in iterate_minibatches(self.X_val, self.y_val, batch_size, shuffle=False):
                time_1 = time.time()
                inputs, targets = torch.tensor(inputs, dtype=torch.float), torch.tensor(targets, dtype=torch.int64)  # dtype=torch.int64
                inputs, targets = inputs.to(self.device), targets.to(self.device)
                tran_time += (time.time() - time_1)

                out = self.model(inputs)
                _loss = self.criterion(out, targets).detach().cpu().numpy()
                pred = out.max(dim=1)[1].cpu().numpy()
                loss_gather.update(_loss)
                pred_gather.update(pred)


            val_loss = loss_gather.avg
            pred = pred_gather.pred
            val_acc = np.mean(pred==self.y_val)
            val_kappa = cohen_kappa_score(self.y_val, pred)
            val_BCA = recall_score(self.y_val, pred, average='macro')
            loss_gather.reset()
            pred_gather.reset()

            # Then we print the results for this epoch:
            fmt_str = "VAL \tEpoch [{:d}/{:d}]  val_Loss: {:.4f}\tval_Acc: {:.2f}\tval_kappa: {:.2f}\tval_BCA: {:.2f}"
            print_str = fmt_str.format(self.curr_epcoh,
                                        max_epochs,
                                        val_loss,
                                        val_acc*100,
                                        val_kappa*100,
                                        val_BCA*100)
            print(print_str)
            print(tran_time)
            print(len(self.y_val))
            self.val_writer.add_scalar('acc', val_acc, self.curr_iter)
            self.val_writer.add_scalar('loss', val_loss, self.curr_iter)
            self.val_writer.add_scalar('kappa', val_kappa, self.curr_iter)
            self.val_writer.add_scalar('bca', val_BCA, self.curr_iter)


def train_all_subject(num_epochs=max_epochs, log_path=None):
    # Leave-Subject-Out cross validation
    subj_nums, fold_pairs, EEGs, labels = load_data(file_path, subj_nums=38, channel=1)  
    for subj_id in range(subj_nums):
        print('The subj_id', subj_id, '\t\t Training the ' + model_type + ' Model...')
        trainer = Trainer(EEGs, labels, fold_pairs[subj_id], subj_id, log_path)
        for epoch in range(trainer.curr_epcoh, max_epochs):
            if trainer.train_end_flag is False:
                trainer.train(epoch)
                trainer.validation()
                print('-'*50)
            else:
                break

0 个答案:

没有答案