输入数据是被转换为PSD的EEG数据。其形状为 [103600,59,51] ,其中103600是样本数,即一个时期的总样本数。数据已加载到内存中。
我使用的模型非常简单,但是,使用pytorch 0.4和GTX 1080,训练一个纪元所需的时间为50秒。但是,val set接口的时间为0.3s。
我还使用 tensorflow 1.2.0测试了该模型,时间为8s 。我不知道我是否弄错了。
所以我做了一些测试。此img显示了运行时间。 enter image description here
其中
time_to_tensor 是inputs, targets = torch.tensor(inputs, dtype=torch.float), torch.tensor(targets, dtype=torch.int64)
time_to_cuda for inputs, targets = inputs.to(self.device), targets.to(self.device)
time_to_model out = self.model(inputs)
时间准则 loss = self.criterion(out, targets)
time_backward self.optimizer.step()
似乎tensor.to(self.device)(或tensor.cuda())的操作浪费了很多时间。因此,我尝试使用
将数据立即移至GPUself.X_train = torch.tensor(self.X_train, dtype=torch.float).to(self.device)
self.y_train = torch.tensor(self.y_train, dtype=torch.int64).to(self.device)
并评论
inputs, targets = torch.tensor(inputs, dtype=torch.float), torch.tensor(targets, dtype=torch.int64)
inputs, targets = inputs.to(self.device), targets.to(self.device)
但是我在下面的img中获得了运行时间。enter image description here。一个纪元所需的时间仍约为50s,并且每次操作的时间都已更改。我很困惑。
有人可以帮我发现问题吗?谢谢。
代码在这里。
import torch
import torch.nn as nn
import torch.nn.functional as F
class TxtCNN4(nn.Module):
def __init__(self, n_classes=6, time_steps=59, psd_lenght=51, filter_numbers=128):
super(TxtCNN4, self).__init__()
self.n_classes = n_classes
self.time_steps = time_steps
self.psd_lenght = psd_lenght
self.filters = filter_numbers
self.conv1 = nn.Conv2d(in_channels=1, out_channels=self.filters//2,
kernel_size=(3, psd_lenght), stride=1, padding=0, bias=False)
self.bn1 = nn.BatchNorm2d(self.filters//2, momentum=0.05)
self.pool1 = nn.MaxPool2d(kernel_size=(self.time_steps - 3 + 1, 1))
self.conv2 = nn.Conv2d(in_channels=self.filters//2, out_channels=self.filters,
kernel_size=(3, 1), stride=1, padding=0, bias=False)
self.bn2 = nn.BatchNorm2d(self.filters, momentum=0.05)
self.pool2 = nn.MaxPool2d(kernel_size=(self.time_steps - 5 + 1, 1))
self.conv3 = nn.Conv2d(in_channels=self.filters, out_channels=self.filters,
kernel_size=(3, 1), stride=1, padding=0, bias=False)
self.bn3 = nn.BatchNorm2d(self.filters, momentum=0.05)
self.pool3 = nn.MaxPool2d(kernel_size=(self.time_steps - 7 + 1, 1))
self.fc = nn.Linear(in_features=2*self.filters+self.filters//2, out_features=128)
self.cls = nn.Linear(in_features=128, out_features=self.n_classes)
self.criterion = nn.CrossEntropyLoss(weight=None)
def forward(self, inputs):
inputs = inputs.view(-1, self.time_steps, self.psd_lenght, 1)
inputs = inputs.permute(0, 3, 1, 2)
conv1 = F.relu(self.bn1(self.conv1(inputs)), inplace=True)
conv2 = F.relu(self.bn2(self.conv2(conv1)), inplace=True)
conv3 = F.relu(self.bn3(self.conv3(conv2)), inplace=True)
x = torch.cat((self.pool1(conv1), self.pool2(conv2), self.pool3(conv3)), dim=1)
x = x.view(-1, 2*self.filters+self.filters//2)
x = F.dropout(x, p=0.5, training=self.training)
x = F.relu(self.fc(x), inplace=True)
x = self.cls(x)
return x
file_path = '/mnt/disk2/wy/SLEEP_EDF/'
batch_size = 32*4*2
dropout_rate = 0.5
nb_classes = 5
max_epochs = 100
early_stop_epoch = 10
learning_rate = 1e-3
model_type='TxtCNN4'
filter_numbers = 128
fs = 100 # Sample frequence
n_Channels = 2
n_Samples = 30*100
load_flag = False
model_path = ''
def iterate_minibatches(inputs, targets, batchsize, shuffle=False):
input_len = inputs.shape[0]
assert input_len == len(targets)
if shuffle:
indices = np.arange(input_len)
np.random.shuffle(indices)
for start_idx in range(0, input_len, batchsize):
if shuffle:
excerpt = indices[start_idx:start_idx + batchsize]
else:
excerpt = slice(start_idx, start_idx + batchsize)
class Trainer():
def __init__(self, X_inputs, labels, fold, subj_id, log_path, model_type=model_type, args=None):
self.subj_id = subj_id
self.model_type = model_type
self.writer_root_path = os.path.abspath(
os.path.join(
os.path.curdir,
log_path+'_'+model_type,
model_type + '_' + str(subj_id)))
self.train_writer = SummaryWriter(os.path.join(self.writer_root_path, 'train'))
self.val_writer = SummaryWriter(os.path.join(self.writer_root_path, 'val'))
self.test_writer = SummaryWriter(os.path.join(self.writer_root_path, 'test'))
self.curr_epcoh = 0
self.curr_iter = 0
self.train_end_flag = False
(self.X_train, self.y_train), (self.X_val, self.y_val), (self.X_test, self.y_test) = reformatInput(X_inputs, labels, fold)
print('Test set label and BiLi:\t', np.unique(self.y_test, return_counts=True))
# normalization between all data
X_mean = self.X_train.mean()
X_std = self.X_train.std()
self.X_train = (self.X_train - X_mean)/X_std
self.X_val = (self.X_val - X_mean)/X_std
self.X_test = (self.X_test - X_mean)/X_std
if model_type == 'TxtCNN4':
model = TxtCNN4()
else:
model = DeepSleepNet()
self.device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
self.model = model.to(self.device)
self.criterion = nn.CrossEntropyLoss(weight=None)
# self.X_train = torch.tensor(self.X_train, dtype=torch.float).to(self.device)
# self.y_train = torch.tensor(self.y_train, dtype=torch.int64).to(self.device)
self.optimizer = optim.Adam(self.model.parameters(), lr=learning_rate)
def train(self, epoch):
train_loss = AvgMeter()
train_acc = AvgMeter()
start_time = time.time()
time_to_tensor = 0
time_to_cuda = 0
time_model = 0
time_criterion = 0
time_backward = 0
time_step = 0
time_writer = 0
self.curr_epcoh = epoch
self.model.train()
for (inputs, targets) in iterate_minibatches(self.X_train, self.y_train, batch_size, shuffle=False):
self.curr_iter += 1
time_1 = time.time()
inputs, targets = torch.tensor(inputs, dtype=torch.float), torch.tensor(targets, dtype=torch.int64) # dtype=torch.int64
time_to_tensor += (time.time() - time_1)
time_1 = time.time()
inputs, targets = inputs.to(self.device), targets.to(self.device)
time_to_cuda += (time.time() - time_1)
# zero the parameter gradients
self.optimizer.zero_grad()
# forward + backward + optimize
time_1 = time.time()
out = self.model(inputs)
time_model += (time.time() - time_1)
time_1 = time.time()
loss = self.criterion(out, targets)
time_criterion += (time.time() - time_1)
time_1 = time.time()
loss.backward()
time_backward += (time.time() - time_1)
time_1 = time.time()
self.optimizer.step()
time_step += (time.time() - time_1)
if self.curr_iter % 20 == 0:
time_1 = time.time()
pred = out.max(dim=1)[1].cpu().numpy()
gt = targets.cpu().numpy()
acc = np.mean(pred==gt)
_loss = loss.detach().cpu().numpy()
train_acc.update(acc)
train_loss.update(_loss)
self.train_writer.add_scalar('learning_rate', curr_lr, self.curr_iter)
self.train_writer.add_scalar('acc', acc, self.curr_iter)
self.train_writer.add_scalar('loss', _loss, self.curr_iter)
time_writer += (time.time() - time_1)
fmt_str = "Train\tEpoch [{:d}/{:d}] train_Loss: {:.4f}\ttrain_Acc: {:.2f}\tTime per Epoch: {:.4f}"
print_str = fmt_str.format(self.curr_epcoh,
max_epochs,
train_loss.avg,
train_acc.avg*100,
time.time()-start_time)
print(print_str)
print('time_to_tensor: ',time_to_tensor)
print('time_to_cuda: ',time_to_cuda)
print('time_model: ',time_model)
print('time_criterion: ',time_criterion)
print('time_backward: ',time_backward)
print('time_step: ',time_step)
print('time_writer: ',time_writer)
print(len(self.y_train))
train_acc.reset()
train_loss.reset()
def validation(self):
self.model.eval()
with torch.no_grad():
# val set
loss_gather = AvgMeter()
pred_gather = PreGather()
tran_time = 0
for (inputs, targets) in iterate_minibatches(self.X_val, self.y_val, batch_size, shuffle=False):
time_1 = time.time()
inputs, targets = torch.tensor(inputs, dtype=torch.float), torch.tensor(targets, dtype=torch.int64) # dtype=torch.int64
inputs, targets = inputs.to(self.device), targets.to(self.device)
tran_time += (time.time() - time_1)
out = self.model(inputs)
_loss = self.criterion(out, targets).detach().cpu().numpy()
pred = out.max(dim=1)[1].cpu().numpy()
loss_gather.update(_loss)
pred_gather.update(pred)
val_loss = loss_gather.avg
pred = pred_gather.pred
val_acc = np.mean(pred==self.y_val)
val_kappa = cohen_kappa_score(self.y_val, pred)
val_BCA = recall_score(self.y_val, pred, average='macro')
loss_gather.reset()
pred_gather.reset()
# Then we print the results for this epoch:
fmt_str = "VAL \tEpoch [{:d}/{:d}] val_Loss: {:.4f}\tval_Acc: {:.2f}\tval_kappa: {:.2f}\tval_BCA: {:.2f}"
print_str = fmt_str.format(self.curr_epcoh,
max_epochs,
val_loss,
val_acc*100,
val_kappa*100,
val_BCA*100)
print(print_str)
print(tran_time)
print(len(self.y_val))
self.val_writer.add_scalar('acc', val_acc, self.curr_iter)
self.val_writer.add_scalar('loss', val_loss, self.curr_iter)
self.val_writer.add_scalar('kappa', val_kappa, self.curr_iter)
self.val_writer.add_scalar('bca', val_BCA, self.curr_iter)
def train_all_subject(num_epochs=max_epochs, log_path=None):
# Leave-Subject-Out cross validation
subj_nums, fold_pairs, EEGs, labels = load_data(file_path, subj_nums=38, channel=1)
for subj_id in range(subj_nums):
print('The subj_id', subj_id, '\t\t Training the ' + model_type + ' Model...')
trainer = Trainer(EEGs, labels, fold_pairs[subj_id], subj_id, log_path)
for epoch in range(trainer.curr_epcoh, max_epochs):
if trainer.train_end_flag is False:
trainer.train(epoch)
trainer.validation()
print('-'*50)
else:
break