为什么cudamalloc的无效参数为(long long **)

时间:2014-01-19 23:22:03

标签: arrays cuda

我正在尝试将一组ptrs传递给许多长长的数组。代码编译好了,但是当我尝试为ptr数组设备spce时,我得到了无效参数的执行错误???我该怎么做?我的代码片段如下:

        long long**  my_d_bit_LL_ptrs;
        long long**  my_d_cs_LL_ptrs;
        long long**  my_d_bit_LL_regs;
        long long**  my_d_cs_LL_regs;

                // allocate a common original array, org by Bits, that has both Bits & CS ptr info 
        cuda_rtn_err = cudaMalloc(&d_LL_reg, ((points+1)<<4)*sizeof(long long));
        if ( cudaSuccess != cuda_rtn_err ) {
             printf("CUDA Error! %s, line=%d\n",cudaGetErrorString(cuda_rtn_err), __LINE__ );
        } 
        cuda_rtn_err = cudaMemcpy(d_LL_reg, i_regs, ((points+1)<<4)*sizeof(long long), cudaMemcpyHostToDevice);
        if ( cudaSuccess != cuda_rtn_err ) {
             printf("CUDA Error! %s, line=%d\n",cudaGetErrorString(cuda_rtn_err), __LINE__ );
        } 
            // allocate an array of ptrs, then allocate a by Bits array for each one
        my_d_bit_LL_ptrs = (long long**)calloc( NUM_BLOCKS, sizeof(long long *) ); // Bit LL_Reg, ordered by bit#, we allow 16 CS/bit entry
            // then allocate a by Bits array, one for each Blk & set the ptrs to them
        for (i=0; i<NUM_BLOCKS; i++) { // points
            cuda_rtn_err = cudaMalloc(&my_d_bit_LL_ptrs[i], ((points+1)<<4)*sizeof(long long) ); // Bit LL_Reg, ordered by bit#, we allow 16 CS/bit entry
            if ( cudaSuccess != cuda_rtn_err ) {
                printf("CUDA Error! %s, line=%d\n",cudaGetErrorString(cuda_rtn_err), __LINE__ );
            } 
            printf("my_d_bit_LL_ptrs[%d]= %d (x%p) \n", i, my_d_bit_LL_ptrs[i], my_d_bit_LL_ptrs[i] );
        }

---&GT; XEQ错误

        cuda_rtn_err = cudaMalloc(my_d_bit_LL_regs, NUM_BLOCKS*sizeof(long long *) ); // Allocate a CUDA array of ptrs
        if ( cudaSuccess != cuda_rtn_err ) {
             printf("CUDA Error! %s, line=%d\n",cudaGetErrorString(cuda_rtn_err), __LINE__ );
        }               // copy the CPU ptr array to the CUDA array
        cuda_rtn_err = cudaMemcpy(my_d_bit_LL_regs, my_d_bit_LL_ptrs, NUM_BLOCKS*sizeof(long long *), cudaMemcpyHostToDevice);
        if ( cudaSuccess != cuda_rtn_err ) {
             printf("CUDA Error! %s, line=%d\n",cudaGetErrorString(cuda_rtn_err), __LINE__ );
        } 

我已经取代了很长时间的空虚&amp;得到相同的回应。

提前感谢您的帮助, 艾伦

1 个答案:

答案 0 :(得分:1)

注意编译器给你的警告。

这一行:

cuda_rtn_err = cudaMalloc(my_d_bit_LL_regs, NUM_BLOCKS*sizeof(long long *) );

应该是这样的:

cuda_rtn_err = cudaMalloc(&my_d_bit_LL_regs, NUM_BLOCKS*sizeof(long long *) );

当我尝试编译代码时,编译器告诉我:

t306.cu(145): warning: variable "my_d_bit_LL_regs" is used before its value is set