我正在尝试训练与MLP层连接的pytorch LSTM模型。该模型的编码如下:
class RNNBlock(nn.Module):
def __init__(self, in_dim, hidden_dim, num_layer=1, dropout=0):
super(RNNBlock, self).__init__()
self.hidden_dim = hidden_dim
self.num_layer = num_layer
self.lstm = nn.LSTM(in_dim, hidden_dim, num_layer, dropout)
def forward(self, onehot, length):
batch_size = onehot.shape[0]
h_in = nn.Parameter(torch.randn(self.num_layer, batch_size, self.hidden_dim))
c_in = nn.Parameter(torch.randn(self.num_layer, batch_size, self.hidden_dim))
packed = nn.utils.rnn.pack_padded_sequence(onehot, length, batch_first=True)
output, (h_out, c_out) = self.lstm(packed, (h_in, c_in))
unpacked, unpacked_length = nn.utils.rnn.pad_packed_sequence(output, batch_first=True)
vectors = list()
for i, vector in enumerate(unpacked):
vectors.append(unpacked[i, unpacked_length[i]-1, :].view(1, -1))
out = torch.cat(vectors, 0)
return out
class Predictor(nn.Module):
def __init__(self, in_dim, out_dim, act=None):
super(Predictor, self).__init__()
self.linear = nn.Linear(in_dim, out_dim)
nn.init.xavier_normal_(self.linear.weight)
self.activation = act
def forward(self, x):
out = self.linear(x)
if self.activation != None:
out = self.activation(out)
return out
class RNNNet(nn.Module):
def __init__(self, args):
super(RNNNet, self).__init__()
self.rnnBlock = RNNBlock(args.in_dim, args.hidden_dim, args.num_layer, args.dropout)
self.pred1 = Predictor(args.hidden_dim, args.pred_dim1, act=nn.ReLU())
self.pred2 = Predictor(args.pred_dim1, args.pred_dim2, act=nn.ReLU())
self.pred3 = Predictor(args.pred_dim2, args.out_dim)
def forward(self, onehot, length):
out = self.rnnBlock(onehot, length)
out = self.pred1(out)
out = self.pred2(out)
out = self.pred3(out)
return out
这是我的训练和实验功能
def train(model, device, optimizer, criterion, data_train, bar, args):
epoch_train_loss = 0
epoch_train_mae = 0
for i, batch in enumerate(data_train):
list_onehot = torch.tensor(batch[0]).cuda().float()
list_length = torch.tensor(batch[1]).cuda()
list_logP = torch.tensor(batch[2]).cuda().float()
# Sort onehot tensor with respect to the sequence length.
list_length, list_index = torch.sort(list_length, descending=True)
list_length.cuda()
list_index.cuda()
list_onehot = torch.Tensor([list_onehot.tolist()[i] for i in list_index]).cuda().float()
model.train()
optimizer.zero_grad()
list_pred_logP = model(list_onehot, list_length).squeeze().cuda()
list_pred_logP.require_grad = False
train_loss = criterion(list_pred_logP, list_logP)
train_mae = mean_absolute_error(list_pred_logP.tolist(), list_logP.tolist())
epoch_train_loss += train_loss.item()
epoch_train_mae += train_mae
train_loss.backward()
optimizer.step()
bar.update(len(list_onehot))
epoch_train_loss /= len(data_train)
epoch_train_mae /= len(data_train)
return model, epoch_train_loss, epoch_train_mae
def experiment(dict_partition, device, bar, args):
time_start = time.time()
model = RNNNet(args)
model.cuda()
if args.optim == 'Adam':
optimizer = optim.Adam(model.parameters(),
lr=args.lr,
weight_decay=args.l2_coef)
elif args.optim == 'RMSprop':
optimizer = optim.RMSprop(model.parameters(),
lr=args.lr,
weight_decay=args.l2_coef)
elif args.optim == 'SGD':
optimizer = optim.SGD(model.parameters(),
lr=args.lr,
weight_decay=args.l2_coef)
else:
assert False, 'Undefined Optimizer Type'
criterion = nn.MSELoss()
scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=args.step_size, gamma=args.gamma)
list_train_loss = list()
list_val_loss = list()
list_train_mae = list()
list_val_mae = list()
data_train = DataLoader(dict_partition['train'], batch_size=args.batch_size, shuffle=args.shuffle)
data_val = DataLoader(dict_partition['val'], batch_size=args.batch_size, shuffle=args.shuffle)
for epoch in range(args.epoch):
scheduler.step()
model, train_loss, train_mae = train(model, device, optimizer, criterion, data_train, bar, args)
list_train_loss.append(train_loss)
list_train_mae.append(train_mae)
mode, val_loss, val_mae = validate(model, device, criterion, data_val, bar, args)
list_val_loss.append(val_loss)
list_val_mae.append(val_mae)
data_test = DataLoader(dict_partition['test'], batch_size=args.batch_size, shuffle=args.shuffle)
mae, std, logP_total, pred_logP_total = test(model, device, data_test, args)
time_end = time.time()
time_required = time_end - time_start
args.list_train_loss = list_train_loss
args.list_val_loss = list_val_loss
args.list_train_mae = list_train_mae
args.list_val_mae = list_val_mae
args.logP_total = logP_total
args.pred_logP_total = pred_logP_total
args.mae = mae
args.std = std
args.time_required = time_required
return args
list_onehot和list_length张量从DataLoader加载并上传到GPU。然后,要使用压缩序列作为输入,我对list_onehot和list_length都进行了排序,然后上传到GPU。该模型已上传到GPU,h_in,c_in张量和压缩序列对象也已上传到GPU。但是,当我尝试运行此代码时,它不使用GPU,而仅使用CPU。如何使用GPU训练此模型?