在构建RNN模型的过程中,遇到以下错误。以下是我的代码的一部分:
class RNN(nn.Module):
def __init__(self):
super().__init__()
self.embedding = nn.Embedding(emb_num, emb_size)
self.dropout1 = nn.Dropout(dropout_rate)
self.LSTM = nn.LSTM(50, 128, 1, bidirectional = True)
self.dropout2 = nn.Dropout(dropout_rate)
self.full_connect = nn.Linear(256 , 5) # biLSTM state * 2
def forward(self, x):
x = self.embedding(x)
x = x.permute(1,0,2)
x = self.dropout1(x)
_, (hn, cn) = self.LSTM(x)
out = self.dropout2(hn)
#print(out.shape)
out = torch.cat([out[i, :, :] for i in range(2)], 1)
out = out.squeeze()
out = self.full_connect(out)
return out
def train():
optimizer = torch.optim.Adam(model.parameters(), lr = learning_rate, weight_decay = 0.001)
Loss = nn.CrossEntropyLoss()
for epoch in range(epochs):
model.train()
max_acc = 0
print('epoch:{}'.format(epoch + 1))
for i, data in enumerate(trainloader, 0):
X_train, y_train = data
optimizer.zero_grad()
X_train = X_train.long().to(device)
y_train = y_train.long().to(device)
output = model(X_train)
loss = Loss(output, y_train)
loss.backward()
optimizer.step()
print('loss:{:3f}'.format(loss))
model.eval()
acc = valid(validloader)
print('epoch:{} acc:{}'.format(epoch+1, acc))
if epoch + 1 == 50:
torch.save(model.state_dict(), 'epoch50.pt')
if acc > max_acc:
max_acc = acc
torch.save(model.state_dict(), 'max_acc model.pt')
torch.save(model.state_dict(), 'final model.pt')
def valid(dataloader):
correct = 0
total = 0
with torch.no_grad():
for i, data in enumerate(dataloader, 0):
X_train, y_train = data
#optimizer.zero_grad()
X_train = X_train.long().to(device)
y_train = y_train.long().to(device)
output = model(X_train)
#loss = Loss(output, y_train)
#loss.backward()
#optimizer.step()
correct += (torch.argmax(output, dim = 1) == y_train).sum().item()
total += y_train.shape[0]
return correct / total
在上面的代码中,我创建了一个devset来在训练中测试模型。但是在4个或更多个时间之后,发生了此错误:
Traceback (most recent call last):
File "c:\Users\hhhh\Desktop\NLP-beginner\task2\task2.py", line 287, in <module>
train()
File "c:\Users\hhhh\Desktop\NLP-beginner\task2\task2.py", line 185, in train
acc = valid(validloader)
File "c:\Users\hhhh\Desktop\NLP-beginner\task2\task2.py", line 207, in valid
correct += (torch.argmax(output, dim = 1) == y_train).sum().item()
RuntimeError: CUDA error: unspecified launch failure
我试图切换到cpu设备来训练模型,但是训练速度甚至降低了1个纪元。是因为我的计算机配置不足以运行它吗?
答案 0 :(得分:1)
要检查您的系统是否具有CUDA:
from torch.cuda import is_available
def main():
use_cuda = not args.no_cuda and is_available()
dev = device("cuda" if use_cuda else "cpu")
model = RNN().to(device=dev)
# Call train and test methods below
if __name__ == '__main__':
main()