我正在使用动态并行,我想创建一个模板内核,给出一个对象指针+成员函数指针执行该函数。这是一个最小(非)工作示例,使用-arch = compute_35 -dlink flags编译,
#include <iostream>
struct A
{
int i;
__device__ void clear()
{
i = 0;
}
};
template<typename Object, typename memberFunction>
__global__ void generalKernel(Object* o, memberFunction f)
{
(o->*f)();
}
template<typename Object, typename memberFunction>
__device__ void executeFunction(Object* o, memberFunction f)
{
generalKernel<<<1,1>>>(o,f);
cudaDeviceSynchronize();
}
__global__ void mainKernel(A* a)
{
executeFunction(a, &A::clear);
}
int main(int argc, char * argv[])
{
A* a;
cudaMallocManaged(&a, sizeof(A));
a->i = 1;
mainKernel<<<1,1>>>(a);
cudaDeviceSynchronize();
std::cout << a->i << std::endl;
return EXIT_SUCCESS;
}
答案 0 :(得分:0)
这是一个简单的CUDA代码,用于说明如何将成员函数指针传递给内核。一切都在代码中解释。
#define gpuErrchk(val) \
cudaErrorCheck(val, __FILE__, __LINE__, true)
void cudaErrorCheck(cudaError_t err, char* file, int line, bool abort)
{
if(err != cudaSuccess)
{
printf("%s %s %d\n", cudaGetErrorString(err), file, line);
if(abort) exit(-1);
}
}
// struct holds an 'int' type data memeber and '__device__' function member
struct ST
{
int id;
__device__ void foo()
{
printf("value of id: %d\n",id);
}
};
// creating an alias for our function pointer
// since the function is a member of a struct, we add struct name and scope resolution 'ST::'
// to signify as such
typedef void (ST::*Fptr)(void);
// templated kernel
template<typename Object, typename memberFunction>
__global__ void kernel(Object* o, memberFunction f)
{
(o->*f)();
}
// declaring a __device__ function pointer, assigning it the address of 'ST::foo'
// remember that this function pointer is also direclty accessible from the kernel
__device__ Fptr fp = &ST::foo;
int main(int argc, char** argv)
{
// declaring and initializing a host 'ST' object
ST h_st;
h_st.id = 10;
// device 'ST' object
ST* d_st;
// allocating device memory
gpuErrchk(cudaMalloc((void**)&d_st, sizeof(ST)));
// copying host data from host object to device object
gpuErrchk(cudaMemcpy(d_st, &h_st, sizeof(ST), cudaMemcpyHostToDevice));
// declaring host side function pointer of type 'Fptr', which can be passed to kernel as argument
Fptr h_fptr;
// copying address of '__device__' function pointer to host side function pointer
gpuErrchk(cudaMemcpyFromSymbol(&h_fptr, fp, sizeof(Fptr)));
// passing arguments to kernel
kernel<<<1,1>>>(d_st,h_fptr);
// making sure no errors occured
gpuErrchk(cudaPeekAtLastError());
gpuErrchk(cudaDeviceSynchronize());
// free device memory
gpuErrchk(cudaFree(d_st));
return 0;
}