在CUDA中使用双指针时访问非法内存

时间:2016-12-09 11:43:39

标签: pointers cuda nvidia

在下面给出的代码中, d_slot 是在main中初始化为 NULL 的双指针。 它的值在内核“测试”中发生了变化。 我将要实现的代码需要携带 d_slot 的值,并且不会恢复为 NULL ,因为它发生在'之后' 测试'已完成。 (这可能是因为双指针是按值而不是通过引用传递的)

#include <stdio.h>
#include <cuda_runtime.h>
#include <cuda_profiler_api.h>
#include <helper_cuda.h>
#include <unistd.h>
#include <stdlib.h>
struct radix_tree_root {
    unsigned int        height;
    struct radix_tree_node  *rnode;
};

struct radix_tree_node {
    unsigned int    count;
    void        *slots[64];
};

__global__ void test1(struct radix_tree_node **d_slot,struct radix_tree_root *d_root)
{
    (d_slot) = &d_root->rnode;
    printf("From test1: d_slot = %p\t*d_slot = %p\n",d_slot,*d_slot);
}

__global__ void test2(struct radix_tree_node **d_slot)
{
    printf("From test2: d_slot = %p\n",d_slot);
}

__global__ void test3(struct radix_tree_node ***d_slot,struct radix_tree_root *d_root)
{
    (*d_slot) = &d_root->rnode;
}


int
main(void)
{
    struct radix_tree_root *root,*d_root;
    struct radix_tree_node **d_slot=NULL;
    cudaError_t err = cudaSuccess;
    root = (struct radix_tree_root *) malloc(sizeof(struct radix_tree_root));
    root->height = 0;
    root->rnode =NULL;


    //allocate memory to d_root in the GPU//
    err = cudaMalloc((void **)&d_root, sizeof(struct radix_tree_root));
    if (err != cudaSuccess)
    {
        fprintf(stderr, "Failed to allocate device d_root (error code %s)!\n", cudaGetErrorString(err));
        exit(EXIT_FAILURE);
    }
    //copy root to d_root
    err = cudaMemcpy(d_root, root, (sizeof(struct radix_tree_root)), cudaMemcpyHostToDevice);
    if (err != cudaSuccess)
    {
        fprintf(stderr, "Failed to copy root from host to device (error code %s)!\n", cudaGetErrorString(err));
        exit(EXIT_FAILURE);
    }


    printf("\nFrom the main: d_root = %p\n",d_root);
    test1<<<1,1>>>(d_slot,d_root);
    err = cudaGetLastError();//brief Returns the last error from a runtime call
    cudaDeviceSynchronize();
    test2<<<1,1>>>(d_slot);
    err = cudaGetLastError();//brief Returns the last error from a runtime call
    cudaDeviceSynchronize();
    //test3<<<1,1>>>(&d_slot,d_root);
    err = cudaGetLastError();//brief Returns the last error from a runtime call
    cudaDeviceSynchronize();
    //test2<<<1,1>>>(d_slot);
    err = cudaGetLastError();//brief Returns the last error from a runtime call
    cudaDeviceSynchronize();


    err = cudaFree(d_root);
    if (err != cudaSuccess)
    {
        fprintf(stderr, "Failed to free device d_root (error code %s)!\n", cudaGetErrorString(err));
        exit(EXIT_FAILURE);
    }
    free(root);

    printf("successful execution of entire program\n");
    return 0;
}

此代码的输出为:

From the main: d_root = 0x900ca0000
From test1: d_slot = 0x900ca0008    *d_slot = (nil)
From test2: d_slot = (nil)
successful execution of entire program

这一切都很好。但是当我取消注释上面代码中给出的' test3 '和' test2 '内核时, 我预计 d_slot 的价值会被推进...... 但是,遇到了错误......

取消注释“ test3 ”和“ test2 ”的代码输出为:

From the main: d_root = 0x900ca0000
From test1: d_slot = 0x900ca0008    *d_slot = (nil)
From test2: d_slot = (nil)
Failed to free device d_root (error code an illegal memory access was encountered)!

所以我的问题是,

“如何成功为d_slot赋值(双指针) 在内核执行完成后,在内核中没有丢失它的值?“

1 个答案:

答案 0 :(得分:1)

图形内存中需要有test1可以写入的位置,test2test3可以读取。您可以再次使用cudaMallocstruct radix_tree_node *分配空间,如下所示:

cudaMalloc((void **)&d_slot, sizeof(struct radix_tree_root *));

然后test1可以将指针值写入*d_slottest2test3可以读取test1*d_slot写入的值。