通用Cuda功能 - 模板& Cuda - c ++

时间:2015-06-23 09:02:30

标签: c++ templates cuda generic-programming

我的目标是制作一个通用的Cuda内核。我的第一步是尝试在函数cudaMain中使用模板(尚未在内核中 - 这将是我的第二步)。从我的c ++ main()文件中调用cudaMain。从cudaMain调用内核。这个工作正常,只要我不使用模板。只要我将模板添加到类和cudaMain,我就会收到此错误: undefined reference to 'Cuda_class<int>::cudaMain(int, int, int*, int*, int*, int*, int*)'

以下是代码:

main.cpp:

#include "cuda_class.hpp"

Cuda_class<int> p;
p.cudaMain(trees.size(), trees[0].size(), treeArray_x, treeArray_y, treeArray_z, treeArray_ID, box);

cuda_class.hpp:

template <class T>
class Cuda_class{
public:
    void cudaMain(int number_of_trees, int tree_size, T treeArray_x[], T treeArray_y[], T treeArray_z[], int treeArray_ID[], T box[]);
};

cuda_class.cu:

#include "cuda_class.hpp"

__global__
void insideBox(int *treeArray_x, int *treeArray_y, int *treeArray_z, int *treeArray_ID, int *box, int tree_size){

    //for each thread has it's own tree starting here
    int startOfTree = threadIdx.x * tree_size ;
    int endOfTree = startOfTree + tree_size - 1;
    traverseTree(treeArray_x, treeArray_y, treeArray_z, treeArray_ID, box, 1, startOfTree, endOfTree);

}
template <class T>
void Cuda_class<T>::cudaMain(int number_of_trees, int tree_size, T treeArray_x[], T treeArray_y[], T treeArray_z[], int treeArray_ID[], T box[]){

    cudaSetDevice(MYDEVICE);
    // do something allocate memory etc

    //launch kernel
    insideBox<<<1,32>>>(d_treeArray_x, d_treeArray_y, d_treeArray_z, d_treeArray_ID, d_box, tree_size);
    //do some other stuff
}

0 个答案:

没有答案