Question

我在MNIST数据集上训练我的模型，将Google Colab用于GPU 设备仍然是cuda仍在出错，我尝试了其他解决方案，但在本地pc中代码无法正常工作，colab是否需要其他功能？我之前已经完成过有关aws的培训，并且代码没有问题

epoch = 22
steps = 0
print_every_step = 5

total_train_loss, total_test_loss = [], []

for e in range(epoch):

  train_loss = 0
  test_loss = 0
  accuracy = 0 

  for images, labels in train_loader:

    # clear the gradients of all optimized variables

    optimizer.zero_grad()

    steps += 1

    images, labels = images.to(device), labels.to(device)

    # forward pass: compute predicted outputs by passing inputs to the model

    log_ps = model(images)

    loss = criterion(log_ps, labels)

    loss.backward()

    optimizer.step()

    train_loss += loss.item()

  if(steps % print_every_step == 0 ):

    model.eval()

    with torch.no_grad():

      for images, labels in test_loader:

        images, labels = images.to(device), labels.to(device)

        log_ps = model(images)

        loss = criterion(log_ps, labels)

        test_loss += loss.item()

      #calculate accuracy
        ps = torch.exp(log_ps)
        top_p, top_class = ps.topk(1, dim=1)
        equals = top_class == labels.view(*top_class.shape)
        accuracy += torch.mean(equals.type(torch.FloatTensor)).item()

    print(f"Epoch {epoch+1}/{epochs}.. "
          f"Train loss: {train_loss/print_every_step:.3f}.. "
          f"Test loss: {test_loss/len(test_loader):.3f}.. "
          f"Test accuracy: {accuracy/len(testl_oader):.3f}")

    model.train()

    total_train_loss.append(train_loss/print_every_step)
    total_test_loss.append(test_loss/len(testloader))

Google colab：RuntimeError：后端CPU的预期对象，但参数＃4'mat1'的后端CUDA

0 个答案: