Question

可能重复：
Output of cuda program is not what was expected

我正在运行这个简单的CUDA程序：

#include <cuda_runtime.h>
#include <cuda.h>
#include <stdio.h>

__global__ void
  display(char *t[])
{

  int v = blockIdx.x;
  int p = blockIdx.y;
  int offset = v+ p*gridDim.x;
  t[offset] = "(";
  //
}

void 
main()
{
  int c = 5;
  cudaGetDeviceCount(&c);
  cudaDeviceProp prop;
  cudaGetDeviceProperties(&prop,0);
  printf("The device name is : %s\n", prop.name);
  //bool value = prop.integrated;
  char *x[6];
  int i;
  for (i = 0; i<6; i++)
      cudaMalloc((void**)&x[i], 20*sizeof(char));

  // Checking the meaning of grid(3,2)
  dim3 grid(3,2); 
  display<<<grid,1>>>(x);
  char y[30];
  cudaMemcpy(y, x[0], 20*sizeof(char), cudaMemcpyDeviceToHost);
  printf("The values is :%s\n", y);
  cudaFree(x[0]);

  getchar();
}

我不明白为什么数组y在执行结束时仍为空。不应该是“（”？

Answer 1

我已回答了这个问题here。

但是我会把这个建议留给第一个提出这个问题的其他人：

在调试CUDA代码时，我强烈建议添加强制同步并检查错误，正如我在your other post中提到的那样，以确保您的硬件设置，API设置，当前天气不会弄乱：

/*  Force Thread Synchronization  */
cudaError err = cudaThreadSynchronize();

/*  Check for and display Error  */
if ( cudaSuccess != err )
{
    fprintf( stderr, "Cuda error in file '%s' in line %i : %s.\n",
             __FILE__, __LINE__, cudaGetErrorString( err) );
}

OP的代码的关键问题是x存在于CPU上，即使它的成员存在于GPU上。再次，请参阅我的回答here。

CUDA程序的输出为空

1 个答案: