cudaMemcpy可以用于分配cudaMallocPitch的内存吗?如果没有,你能说出应该使用哪个功能。 cudaMallocPitch返回线性内存,所以我想应该使用cudaMemcpy。
答案 0 :(得分:10)
您当然可以使用cudaMemcpy
来复制投放的设备内存,但使用cudaMemcpy2D
会更常见。从主机到设备的倾斜副本的示例如下所示:
#include "cuda.h"
#include <assert.h>
typedef float real;
int main(void)
{
cudaFree(0); // Establish context
// Host array dimensions
const size_t dx = 300, dy = 300;
// For the CUDA API width and pitch are specified in bytes
size_t width = dx * sizeof(real), height = dy;
// Host array allocation
real * host = new real[dx * dy];
size_t pitch1 = dx * sizeof(real);
// Device array allocation
// pitch is determined by the API call
real * device;
size_t pitch2;
assert( cudaMallocPitch((real **)&device, &pitch2, width, height) == cudaSuccess );
// Sample memory copy - note source and destination pitches can be different
assert( cudaMemcpy2D(device, pitch2, host, pitch1, width, height, cudaMemcpyHostToDevice) == cudaSuccess );
// Destroy context
assert( cudaDeviceReset() == cudaSuccess );
return 0;
}
(注意:未经测试,cavaet emptor以及所有.....)