我正在尝试使用CUDA 5.5对cudaMemcpy3D进行简单测试。我已经四处搜索并找到了不同的示例并阅读了相应的Runtime API文档,但无法弄清楚我做错了什么。下面的代码编译得很好,但是当我尝试运行它时,我在cudaMemcpy3D
调用时遇到了分段错误。我尝试使用cuda-gdb
运行此功能,但无法从中获取任何有用信息以告诉我出现了什么问题(可能是因为我不熟悉gdb
/ cuda-gdb
用法)。任何帮助我找出错误的地方都会非常感激。
#include <cstdio>
#include <cuda_runtime.h>
int main() {
static const size_t NX = 60;
static const size_t NY = 60;
static const size_t NZ = 60;
float* h_data = new float[NX * NY * NZ];
for(unsigned int i = 0; i < NX * NY * NZ; ++i) {
h_data[i] = static_cast<float>(i);
}
float* d_data = 0;
cudaPitchedPtr dstPtr = make_cudaPitchedPtr((void**)&d_data, NX * sizeof(float), NX, NY);
printf("cudaPitchedPtr: %s\n", cudaGetErrorString(cudaGetLastError()));
cudaExtent extent = make_cudaExtent(NX * sizeof(float), NY, NZ);;
cudaMalloc3D(&dstPtr, extent);
printf("cudaMalloc3D: %s\n", cudaGetErrorString(cudaGetLastError()));
cudaMemset3D(dstPtr, 0, extent);
printf("cudaMemset3D: %s\n", cudaGetErrorString(cudaGetLastError()));
cudaPitchedPtr srcPtr = make_cudaPitchedPtr((void**)&h_data, NX * sizeof(float), NX, NY);
printf("cudaPitchedPtr: %s\n", cudaGetErrorString(cudaGetLastError()));
cudaMemcpy3DParms params = {0};
params.srcPtr = srcPtr;
params.dstPtr = dstPtr;
params.extent = extent;
params.kind = cudaMemcpyHostToDevice;
cudaMemcpy3D(¶ms);
printf("cudaMemcpy3D: %s\n", cudaGetErrorString(cudaGetLastError()));
delete[] h_data;
return 0;
}
答案 0 :(得分:2)
这里有几个问题。没有特别的顺序:
cudaMakePitchedPtr
。这可能是此代码中的主要问题cudaMakePitchedPtr
调用是多余的。 cudaMalloc3d
调用将根据您提供的范围以及驱动程序和设备对分配施加的任何要求制作倾斜指针make_cudaExtent
和make_cudaPitchedPtr
等辅助例程不会修改cudaGetLastError()
使用的运行时API状态,因此这些错误检查调用也是多余的。修复这些内容后,您可能会看到如下代码:
#include <cstdio>
#include <cuda_runtime.h>
int main() {
static const size_t NX = 60;
static const size_t NY = 60;
static const size_t NZ = 60;
float* h_data = new float[NX * NY * NZ];
for(unsigned int i = 0; i < NX * NY * NZ; ++i) {
h_data[i] = static_cast<float>(i);
}
cudaPitchedPtr srcPtr = make_cudaPitchedPtr(h_data, NX * sizeof(float), NX, NY);
cudaPitchedPtr dstPtr;
cudaExtent extent = make_cudaExtent(NX * sizeof(float), NY, NZ);;
cudaMalloc3D(&dstPtr, extent);
cudaMemset3D(dstPtr, 0, extent);
cudaMemcpy3DParms params = {0};
params.srcPtr = srcPtr;
params.dstPtr = dstPtr;
params.extent = extent;
params.kind = cudaMemcpyHostToDevice;
cudaMemcpy3D(¶ms);
printf("cudaMemcpy3D: %s\n", cudaGetErrorString(cudaGetLastError()));
delete[] h_data;
return 0;
}
你可能会发现它按预期工作。