我正在CUDA中进行这个简单的求和程序,当anwer应该为14时,它会一直给我回答“答案是5”。我认为这意味着它不是将数据从设备复制回主机,而是我不确定。这是我正在使用的程序,名为“newsum.cu”:
#include <iostream>
#include <cuda.h>
using namespace std;
__global__ void AddIntsCUDA(int* a, int* b)
{
a[0] += b[0];
}
int main()
{
int a = 5, b = 9;
int *d_a, *d_b;
//maybe put (void **) before &d_a in cudaMalloc?
cudaMalloc(&d_a, sizeof(int));
cudaMalloc(&d_b, sizeof(int));
cudaMemcpy(d_a, &a, sizeof(int), cudaMemcpyHostToDevice);
cudaMemcpy(d_b, &b, sizeof(int), cudaMemcpyHostToDevice);
AddIntsCUDA<<<1, 1>>>(d_a, d_b);
cudaMemcpy(&a, d_a, sizeof(int), cudaMemcpyDeviceToHost);
cout<<"The answer is "<<a<<endl;
cudaFree(d_a);
cudaFree(d_b);
return 0;
}
这是我用来编译它的makefile:
# A simple CUDA makefile
#
# Author: Naga Kandasamy
# Date: 9/16/2015
#
# CUDA depends on two things:
# 1) The CUDA nvcc compiler, which needs to be on your path,
# or called directly, which we do here
# 2) The CUDA shared library being available at runtime,
# which we make available by setting the LD_LIBRARY_PATH
# variable for the duration of the makefile.
#
# Note that you can set your PATH and LD_LIBRARY_PATH variables as part of your
# .bash_profile so that you can compile and run without using this makefile.
NVCCFLAGS := -O3 -gencode arch=compute_30,code=sm_30
NVCC := /usr/local/cuda/bin/nvcc
LD_LIBRARY_PATH := /usr/local/cuda/lib64
all: newsum
newsum: newsum.cu
$(NVCC) -o newsum newsum.cu $(NVCCFLAGS)
clean:
rm newsum
为什么它给我错误的答案?