此问题是this question的扩展,与this question相关。
[Q1]在执行结构成员cudaMalloc
时,是否需要转换为(void **)?示例(请参阅代码问题):
结构:
typedef struct {
int a;
int *b;
} Matrix;
分配和复制到设备的主要功能:
int main void()
{
int rows, cols, numMat = 2;
//[Q2] What would be the problem of no allocating (numMat * sizeof()) here?
//for example, allocating just for sizeof(Matrix)?
Matrix *data = (Matrix*)malloc(numMat * sizeof(Matrix));
// ... Successfully read from file into "data" ...
//[Q3] Do we really need to copy "data" to host?
//[A3] No necessary
Matrix *h_data = (Matrix*)malloc(numMat * sizeof(Matrix));
memcpy(h_data, data, numMat * sizeof(Matrix));
// ... Copy matrix data is now on the gpu ...
//[Q4] Do we need to cast (void**)&(h_data->a)? 'a' not a pointer.
//[A4] An int cannot be copied in this fashion
// cudaMalloc(&(h_data->a), rows*cols*sizeof(int));
// cudaMemcpy(h_data->a, data->a, rows*cols*sizeof(int), cudaMemcpyHostToDevice);
//[Q5] Do we need to cast (void**)&(h_data->b)? 'b' is a pointer
cudaMalloc(&(h_data->b), rows*cols*sizeof(int));
cudaMemcpy(h_data->b, data->b, rows*cols*sizeof(int), cudaMemcpyHostToDevice);
// ... Copy the "meta" data to gpu ...
//[Q6] Can we just copy h_data instead? Why creating another pointer "d_data"?
//[A6] Yes
Matrix *d_data;
//[Q7] Wouldn't we need to cast (void**)&d_data?
cudaMalloc(&d_data, numMat*sizeof(Matrix));
//[Q8] h_data is in host and device. Can we just copy "data" to device?
cudaMemcpy(d_data, h_data, numMat*sizeof(Matrix));
// ... Do other things ...
}
最终,我们只想将Matrix作为指针传递:
// Kernel call
doThings<<<dimGrid, dimBlock>>>(d_data);
内核定义:
__global__ doThings(Matrix *matrices)
{
matrices->a = ...;
matrices->b = ...;
}
提前感谢您的时间和工作,帮助我解决疑惑!