简单的CUDA内核有奇怪的结果?

时间:2014-06-18 04:15:10

标签: arrays matlab visual-studio-2012 cuda

我在MATLAB中使用CUDA内核对象,以便用所有' 55填充2D数组。结果很奇怪。 2D阵列仅填充特定点,如下所示。在第1025行之后,该数组全部为零。知道会出现什么问题吗?

enter image description here

1 个答案:

答案 0 :(得分:2)

正如我在上面的评论中所提到的,你错误地抵消了矩阵行。下面的代码是一个完整的工作示例,证明了这一点。

#include<thrust\device_vector.h>

__global__ void myKern(double* masterForces, int r_max, int iterations) {

    int threadsPerBlock = blockDim.x * blockDim.y;
    int blockId         = blockIdx.x + (blockIdx.y * gridDim.x);
    int threadId        = threadIdx.x + (threadIdx.y * blockDim.x);
    int globalIdx       = (blockId * threadsPerBlock) + threadId;

    //for (int i=0; i<iterations; i++) masterForces[globalIdx * r_max + i] = 55;
    for (int i=0; i<iterations; i++) masterForces[globalIdx * iterations + i] = 55;

}

void main() {

    int ThreadBlockSize = 32;
    int GridSize        = 32;

    int reps            = 1024;
    int iterations      = 2000;

    thrust::device_vector<double> gpuF_M(reps*iterations, 0); 

    myKern<<<GridSize,ThreadBlockSize>>>(thrust::raw_pointer_cast(gpuF_M.data()),reps,iterations);

    int numerrors = 0;
    for (int i=0; i<reps*iterations; i++) { 
        double test = gpuF_M[i]; 
        if (test != 55) { printf("Error %i %f\n",i,test); numerrors++; }
    }

    printf("Finished!\n");
    printf("The number of errors is = %i\n",numerrors);
    getchar();

}