Question

我正在尝试在colab上进行cuda编程。我尝试了在cuda内核上进行基本的数值数组操作。对于在colab上运行cuda，我可以在网上看到很多示例。

!apt update -qq;
!wget https://developer.nvidia.com/compute/cuda/8.0/Prod2/local_installers/cuda-repo-ubuntu1604-8-0-local-ga2_8.0.61-1_amd64-deb;
!dpkg -i cuda-repo-ubuntu1604-8-0-local-ga2_8.0.61-1_amd64-deb;
!apt-key add /var/cuda-repo-8-0-local-ga2/7fa2af80.pub;
!apt-get update -qq;
!apt-get install cuda gcc-5 g++-5 -y -qq;
!ln -s /usr/bin/gcc-5 /usr/local/cuda/bin/gcc;
!ln -s /usr/bin/g++-5 /usr/local/cuda/bin/g++;
!apt install cuda-8.0;
!/usr/local/cuda/bin/nvcc --version
!pip install git+git://github.com/andreinechaev/nvcc4jupyter.git
%load_ext nvcc_plugin

%%cu
#include <stdio.h>
#define N  64
inline cudaError_t checkCudaErr(cudaError_t err, const char* msg) {
  if (err != cudaSuccess) {
    fprintf(stderr, "CUDA Runtime error at %s: %s\n", msg, cudaGetErrorString(err));
  }
  return err;
}
__global__ void matrixMulGPU( int * a, int * b, int * c )
{
  /*
   * Build out this kernel.
   */
    int row = threadIdx.y + blockIdx.y * blockDim.y;
    int col = threadIdx.x + blockIdx.x * blockDim.x;

    int val = 0;
    if (row < N && col < N) {
      for (int i = 0; i < N; ++i) {
         val += a[row * N + i] * b[i * N + col];
       }

      c[row * N + col] = val;
    }
}
/*
 * This CPU function already works, and will run to create a solution matrix
 * against which to verify your work building out the matrixMulGPU kernel.
 */
void matrixMulCPU( int * a, int * b, int * c )
{
  int val = 0;for( int row = 0; row < N; ++row )
    for( int col = 0; col < N; ++col )
    {
      val = 0;
      for ( int k = 0; k < N; ++k )
        val += a[row * N + k] * b[k * N + col];
      c[row * N + col] = val;
    }
}
int main()
{
  int *a, *b, *c_cpu, *c_gpu; // Allocate a solution matrix for both the CPU and the GPU operations
 int size = N * N * sizeof (int); // Number of bytes of an N x N matrix// Allocate memory
  cudaMallocManaged (&a, size);
  cudaMallocManaged (&b, size);
  cudaMallocManaged (&c_cpu, size);
  cudaMallocManaged (&c_gpu, size);// Initialize memory; create 2D matrices
 from google.colab import drive
drive.mount('/content/gdrive')
PATH_OF_DATA= '/content/gdrive/"My Drive"/sherry-christian-8Myh76_3M2U-unsplash'

  for( int row = 0; row < N; ++row )
    for( int col = 0; col < N; ++col )
    {
      a[row*N + col] = row;
      b[row*N + col] = col+2;
      c_cpu[row*N + col] = 0;
      c_gpu[row*N + col] = 0;
    }/*
   * Assign `threads_per_block` and `number_of_blocks` 2D values
   * that can be used in matrixMulGPU above.
   */dim3 threads_per_block(32, 32, 1);
  dim3 number_of_blocks(N / threads_per_block.x + 1, N / threads_per_block.y + 1, 1);
  printf("%d",number_of_blocks);
 matrixMulGPU <<< number_of_blocks, threads_per_block >>> ( a, b, c_gpu );
 checkCudaErr(cudaDeviceSynchronize(), "Syncronization");
 checkCudaErr(cudaGetLastError(), "GPU");// Call the CPU version to check our work
  matrixMulCPU( a, b, c_cpu );// Compare the two answers to make sure they are equal
  bool error = false;
  for( int row = 0; row < N && !error; ++row )
    for( int col = 0; col < N && !error; ++col )
      if (c_cpu[row * N + col] != c_gpu[row * N + col])
      {
        printf("FOUND ERROR at c[%d][%d]\n", row, col);
        error = true;
        break;
      }
if (!error)
    printf("Success!\n");// Free all our allocated memory
  cudaFree(a); cudaFree(b);
  cudaFree( c_cpu ); cudaFree( c_gpu );
}

这是示例代码。

from google.colab import drive
drive.mount('/content/gdrive')
PATH_OF_DATA= '/content/gdrive/"My Drive"/sherry-christian-8Myh76_3M2U-unsplash'

我知道这些行是错误的。我在c ++代码中使用python代码段。我想改用c ++代码段。

我想将图像加载到cuda c ++程序中并尝试操作。因此，有什么方法可以将图像从驱动器加载到google colab上的cuda c ++程序。有人可以帮助我解决示例代码吗？

有什么方法可以将Google驱动器中的图片加载到colab上运行的cuda代码中吗？

0 个答案: