我是cuda和C ++的新手,似乎无法解决这个问题。
我想要做的是将2d数组A复制到设备,然后将其复制回相同的数组B.
我希望B数组的值与A相同,但有些东西我做错了。
CUDA - 4.2,编译为win32,64位机器,NVIDIA Quadro K5000
这是代码。
void main(){
cout<<"Host main" << endl;
// Host code
const int width = 3;
const int height = 3;
float* devPtr;
float a[width][height];
//load and display input array
cout << "a array: "<< endl;
for (int i = 0 ; i < width; i ++)
{
for (int j = 0 ; j < height; j ++)
{
a[i][j] = i + j;
cout << a[i][j] << " ";
}
cout << endl;
}
cout<< endl;
//Allocating Device memory for 2D array using pitch
size_t host_orig_pitch = width * sizeof(float); //host original array pitch in bytes
size_t pitch;// pitch for the device array
cudaMallocPitch(&devPtr, &pitch, width * sizeof(float), height);
cout << "host_orig_pitch: " << host_orig_pitch << endl;
cout << "sizeof(float): " << sizeof(float)<< endl;
cout << "width: " << width << endl;
cout << "height: " << height << endl;
cout << "pitch: " << pitch << endl;
cout << endl;
cudaMemcpy2D(devPtr, pitch, a, host_orig_pitch, width, height, cudaMemcpyHostToDevice);
float b[width][height];
//load b and display array
cout << "b array: "<< endl;
for (int i = 0 ; i < width; i ++)
{
for (int j = 0 ; j < height; j ++)
{
b[i][j] = 0;
cout << b[i][j] << " ";
}
cout << endl;
}
cout<< endl;
//MyKernel<<<100, 512>>>(devPtr, pitch, width, height);
//cudaThreadSynchronize();
//cudaMemcpy2d(dst, dPitch,src ,sPitch, width, height, typeOfCopy )
cudaMemcpy2D(b, host_orig_pitch, devPtr, pitch, width, height, cudaMemcpyDeviceToHost);
// should be filled in with the values of array a.
cout << "returned array" << endl;
for(int i = 0 ; i < width ; i++){
for (int j = 0 ; j < height ; j++){
cout<< b[i][j] << " " ;
}
cout<<endl;
}
cout<<endl;
system("pause");
}
这是输出。
主机主阵列0 1 2 1 2 3 2 3 4
host_orig_pitch:12 sizeof(float):4宽度:3高度:3间距:512
b数组:0 0 0 0 0 0 0 0 0
返回数组0 0 0 1.17549e-038 0 0 0 0 0
按任意键继续。 。
如果需要更多信息,请告诉我,我会发布。 任何帮助将不胜感激。
答案 0 :(得分:4)
正如评论中所指出的那样,原始海报正在为cudaMemcpy2D
电话提供不正确的参数。传输的width参数始终以字节为单位,因此在上面的代码中:
cudaMemcpy2D(b, host_orig_pitch, devPtr, pitch, width, height, cudaMemcpyDeviceToHost);
应该是
cudaMemcpy2D(b, host_orig_pitch, devPtr, pitch, width * sizeof(float), height, cudaMemcpyDeviceToHost);
请注意,此答案已添加为社区维基,以便将此问题从未答复的列表中删除