在下面给出的代码中, d_slot 是在main中初始化为 NULL 的双指针。 它的值在内核“测试”中发生了变化。 我将要实现的代码需要携带 d_slot 的值,并且不会恢复为 NULL ,因为它发生在'之后' 测试'已完成。 (这可能是因为双指针是按值而不是通过引用传递的)
#include <stdio.h>
#include <cuda_runtime.h>
#include <cuda_profiler_api.h>
#include <helper_cuda.h>
#include <unistd.h>
#include <stdlib.h>
struct radix_tree_root {
unsigned int height;
struct radix_tree_node *rnode;
};
struct radix_tree_node {
unsigned int count;
void *slots[64];
};
__global__ void test1(struct radix_tree_node **d_slot,struct radix_tree_root *d_root)
{
(d_slot) = &d_root->rnode;
printf("From test1: d_slot = %p\t*d_slot = %p\n",d_slot,*d_slot);
}
__global__ void test2(struct radix_tree_node **d_slot)
{
printf("From test2: d_slot = %p\n",d_slot);
}
__global__ void test3(struct radix_tree_node ***d_slot,struct radix_tree_root *d_root)
{
(*d_slot) = &d_root->rnode;
}
int
main(void)
{
struct radix_tree_root *root,*d_root;
struct radix_tree_node **d_slot=NULL;
cudaError_t err = cudaSuccess;
root = (struct radix_tree_root *) malloc(sizeof(struct radix_tree_root));
root->height = 0;
root->rnode =NULL;
//allocate memory to d_root in the GPU//
err = cudaMalloc((void **)&d_root, sizeof(struct radix_tree_root));
if (err != cudaSuccess)
{
fprintf(stderr, "Failed to allocate device d_root (error code %s)!\n", cudaGetErrorString(err));
exit(EXIT_FAILURE);
}
//copy root to d_root
err = cudaMemcpy(d_root, root, (sizeof(struct radix_tree_root)), cudaMemcpyHostToDevice);
if (err != cudaSuccess)
{
fprintf(stderr, "Failed to copy root from host to device (error code %s)!\n", cudaGetErrorString(err));
exit(EXIT_FAILURE);
}
printf("\nFrom the main: d_root = %p\n",d_root);
test1<<<1,1>>>(d_slot,d_root);
err = cudaGetLastError();//brief Returns the last error from a runtime call
cudaDeviceSynchronize();
test2<<<1,1>>>(d_slot);
err = cudaGetLastError();//brief Returns the last error from a runtime call
cudaDeviceSynchronize();
//test3<<<1,1>>>(&d_slot,d_root);
err = cudaGetLastError();//brief Returns the last error from a runtime call
cudaDeviceSynchronize();
//test2<<<1,1>>>(d_slot);
err = cudaGetLastError();//brief Returns the last error from a runtime call
cudaDeviceSynchronize();
err = cudaFree(d_root);
if (err != cudaSuccess)
{
fprintf(stderr, "Failed to free device d_root (error code %s)!\n", cudaGetErrorString(err));
exit(EXIT_FAILURE);
}
free(root);
printf("successful execution of entire program\n");
return 0;
}
此代码的输出为:
From the main: d_root = 0x900ca0000
From test1: d_slot = 0x900ca0008 *d_slot = (nil)
From test2: d_slot = (nil)
successful execution of entire program
这一切都很好。但是当我取消注释上面代码中给出的' test3 '和' test2 '内核时, 我预计 d_slot 的价值会被推进...... 但是,遇到了错误......
取消注释“ test3 ”和“ test2 ”的代码输出为:
From the main: d_root = 0x900ca0000
From test1: d_slot = 0x900ca0008 *d_slot = (nil)
From test2: d_slot = (nil)
Failed to free device d_root (error code an illegal memory access was encountered)!
所以我的问题是,
“如何成功为d_slot赋值(双指针) 在内核执行完成后,在内核中没有丢失它的值?“
答案 0 :(得分:1)
图形内存中需要有test1
可以写入的位置,test2
和test3
可以读取。您可以再次使用cudaMalloc
为struct radix_tree_node *
分配空间,如下所示:
cudaMalloc((void **)&d_slot, sizeof(struct radix_tree_root *));
然后test1
可以将指针值写入*d_slot
,test2
和test3
可以读取test1
从*d_slot
写入的值。