I am currently developing a GPU version of a CPU function (e.g. function Calc(int a, int b, double* c, souble* d, CalcInvFunction GetInv )), in which a host function is passes as a function pointer(e.g. in above example GetInv is the host function of CalcInvFunction type). My question is, if i have to put Calc() function entirely in GPU, i have to pass the GetInv function as a function pointer argument in device function/kernel function, and is that possible?
答案 0 :(得分:1)
Yes, for a GPU implementation of Calc
, you should pass the GetInv
as a __device__
function pointer.
It is possible, here are some worked examples:
Most of the above examples demonstrate bringing the device function pointer all the way back to the host code. This may not be necessary for your particular case. But it should be fairly obvious from above how to grab a __device__
function pointer (in device code) and use it in a kernel.
答案 1 :(得分:1)
最后,我已经能够将主机函数作为函数指针传递给cuda内核函数(__global__函数)。感谢Robert Crovella和njuffa的回答。我已经能够将类成员函数(cpu函数)作为函数指针传递给cuda内核。但是,主要问题是,我只能传递静态类成员函数。我无法传递未声明为static的函数。 例如:
/**/
__host__ __device__
static int
CellfunPtr(
void*ptr, int a
);
/**/
上面的函数有效,因为这个成员函数被声明为静态成员函数。如果我没有将此成员函数声明为静态成员,
/**/
__host__ __device__
int
CellfunPtr(
void*ptr, int a
);
/**/
然后它不起作用。
完整的代码有四个文件。
/*start of fundef.h file*/
typedef int (*pFunc_t)(void* ptr, int N);
/*end of fundef.h file*/
/*start of solver.h file*/
class CalcVars {
int eqnCount;
int numCell;
int numTri;
int numTet;
public:
double* cellVel;
double* cellPre;
/** Constructor */
CalcVars(
const int eqnCount_,
const int numCell_,
const int numTri_,
const int numTet_
);
/** Destructor */
~CalcVars(void);
public:
void
CalcAdv();
__host__ __device__
static int
CellfunPtr(
void*ptr, int a
);
};
/*end of solver.h file*/
/*start of solver.cu file*/
#include "solver.h"
__device__ pFunc_t pF1_d = CalcVars::CellfunPtr;
pFunc_t pF1_h ;
__global__ void kernel(int*a, pFunc_t func, void* thisPtr_){
int tid = threadIdx.x;
a[tid] = (*func)(thisPtr_, a[tid]);
};
/* Constructor */
CalcVars::CalcVars(
const int eqnCount_,
const int numCell_,
const int numTri_,
const int numTet_
)
{
this->eqnCount = eqnCount_;
this->numCell = numCell_;
this->numTri = numTri_;
this->cellVel = (double*) calloc((size_t) eqnCount, sizeof(double));
this->cellPre = (double*) calloc((size_t) eqnCount, sizeof(double));
}
/* Destructor */
CalcVars::~CalcVars(void)
{
free(this->cellVel);
free(this->cellPre);
}
void
CalcVars::CalcAdv(
){
/*int b1 = 0;
b1 = CellfunPtr(this, 1);*/
int Num = 50;
int *a1, *a1_dev;
a1 = (int *)malloc(Num*sizeof(int));
cudaMalloc((void**)&a1_dev, Num*sizeof(int));
for(int i = 0; i <Num; i++){
a1[i] = i;
}
cudaMemcpy(a1_dev, a1, Num*sizeof(int), cudaMemcpyHostToDevice);
//copy addresses of device functions to host
cudaMemcpyFromSymbol(&pF1_h, pF1_d, sizeof(pFunc_t));
kernel<<<1,42>>>(a1_dev, pF1_h, this);
cudaDeviceSynchronize();
cudaMemcpy(a1, a1_dev, Num*sizeof(int), cudaMemcpyDeviceToHost);
};
int
CalcVars::CellfunPtr(
void* ptr, int a
){
//CalcVars* ClsPtr = (CalcVars*)ptr;
printf("Printing from CPU function\n");
//int eqn_size = ClsPtr->eqnCount;
//printf("The number is %d",eqn_size);
return a-1;
};
/*end of solver.cu file*/
/*start of main.cpp file*/
#include "solver.h"
int main(){
int n_Eqn, n_cell, n_tri, n_tetra;
n_Eqn = 100;
n_cell = 200;
n_tri = 300;
n_tetra = 400;
CalcVars* calcvars;
calcvars = new CalcVars(n_Eqn, n_cell, n_tri, n_tetra );
calcvars->CalcAdv();
system("pause");
}
/*end of main.cpp file*/