当我在main函数中使用thrust :: device_vector时,我可以正确地将它传递给内核函数,代码如下:
thrust::device_vector<int> device_a(2);
thrust::host_vector<int> host_a(2);
MyTest << <1, 2 >> >(thrust::raw_pointer_cast(&device_a[0]),device_a.size());
host_a = device_a;
for (int i = 0; i < host_a.size();i++)
cout << host_a[i] << endl;
但我想在我的代码中使用二维device_vector,我该如何使用它?如下面的代码所示
__global__ void MyTest(thrust::device_vector<int>* a,int total){
int idx = threadIdx.x;
if (idx < total){
int temp = idx;
a[idx][0] = temp;
a[idx][1] = temp;
__syncthreads();
}
}
void main(){
thrust::device_vector<thrust::device_vector<int>> device_a(2,thrust::device_vector<int>(2));
thrust::host_vector<thrust::host_vector<int>> host_a(2,thrust::host_vector<int>(2));
MyTest << <1, 2 >> >(thrust::raw_pointer_cast(device_a.data()),device_a.size());
host_a = device_a;
for (int i = 0; i < host_a.size(); i++){
cout << host_a[i][0] << endl;
cout << host_a[i][1] << endl;
}
}
答案 0 :(得分:1)
通常,Thrust容器是仅限主机类型,不能在__device__
和__global__
函数中使用。
使用二维数组的常用方法是将其放入一维线性存储空间,如下面的代码所示。
__global__ void MyTest(int* a, int nrows, int ncols) {
int j = threadIdx.x;
int i = threadIdx.y;
if (i < nrows && j < ncols) {
int temp = i + j;
a[i * ncols + j] = temp;
}
}
int main(int argc, char** argv) {
int nrows = 2;
int ncols = 2;
thrust::device_vector<int> device_a(nrows * ncols);
MyTest<<<1, dim3(2, 2)>>>(thrust::raw_pointer_cast(device_a.data()), rows, ncols);
return 0;
}