我在MNIST数据集上训练我的模型,将Google Colab用于GPU 设备仍然是cuda仍在出错,我尝试了其他解决方案,但在本地pc中代码无法正常工作,colab是否需要其他功能? 我之前已经完成过有关aws的培训,并且代码没有问题
epoch = 22
steps = 0
print_every_step = 5
total_train_loss, total_test_loss = [], []
for e in range(epoch):
train_loss = 0
test_loss = 0
accuracy = 0
for images, labels in train_loader:
# clear the gradients of all optimized variables
optimizer.zero_grad()
steps += 1
images, labels = images.to(device), labels.to(device)
# forward pass: compute predicted outputs by passing inputs to the model
log_ps = model(images)
loss = criterion(log_ps, labels)
loss.backward()
optimizer.step()
train_loss += loss.item()
if(steps % print_every_step == 0 ):
model.eval()
with torch.no_grad():
for images, labels in test_loader:
images, labels = images.to(device), labels.to(device)
log_ps = model(images)
loss = criterion(log_ps, labels)
test_loss += loss.item()
#calculate accuracy
ps = torch.exp(log_ps)
top_p, top_class = ps.topk(1, dim=1)
equals = top_class == labels.view(*top_class.shape)
accuracy += torch.mean(equals.type(torch.FloatTensor)).item()
print(f"Epoch {epoch+1}/{epochs}.. "
f"Train loss: {train_loss/print_every_step:.3f}.. "
f"Test loss: {test_loss/len(test_loader):.3f}.. "
f"Test accuracy: {accuracy/len(testl_oader):.3f}")
model.train()
total_train_loss.append(train_loss/print_every_step)
total_test_loss.append(test_loss/len(testloader))