我的简单C ++ cuda代码的源代码
#include <iostream>
#include <cuda.h>
using namespace std;
__global__ void AddIntsCUDA(int *a, int *b, int *c)
{
*c = *a + *b;
}
int main()
{
int a, b, c;
int *d_a, *d_b, *d_c;
int size = sizeof(int);
cudaMalloc((void **)&d_a, size);
cudaMalloc((void **)&d_b, size);
cudaMalloc((void **)&d_c, size);
a = 10;
b = 35;
c = 0;
cudaMemcpy(d_a, &a, size, cudaMemcpyHostToDevice);
cudaMemcpy(d_b, &b, size, cudaMemcpyHostToDevice);
AddIntsCUDA<<<1, 1>>>(d_a, d_b, d_c);
cudaMemcpy(&c, d_c, size, cudaMemcpyDeviceToHost);
cout << "The Answer is "<< c << endl;
cudaFree(d_a);
cudaFree(d_b);
cudaFree(d_c);
system("pause");
return 0;
}
Console Output 输出显示c = 0,但我希望a和b输出之和(应该是45,因为a = 10,b = 35) 解释一下这段代码到底是怎么回事
答案 0 :(得分:2)
尝试在内核启动之后和复制之前添加cudaError_t err = cudaDeviceSynchronize();
。并打印err
的值。
使用const char* cudaGetErrorString ( cudaError_t error )
在运行时获取错误字符串,或者在这里查看:
https://docs.nvidia.com/cuda/cuda-runtime-api/group__CUDART__TYPES.html#group__CUDART__TYPES_1gf599e5b8b829ce7db0f5216928f6ecb6
在出现错误号35的注释之后,似乎您需要更新驱动程序。