我的目标是制作一个通用的Cuda内核。我的第一步是尝试在函数cudaMain
中使用模板(尚未在内核中 - 这将是我的第二步)。从我的c ++ main()文件中调用cudaMain
。从cudaMain
调用内核。这个工作正常,只要我不使用模板。只要我将模板添加到类和cudaMain
,我就会收到此错误:
undefined reference to 'Cuda_class<int>::cudaMain(int, int, int*, int*, int*, int*, int*)'
以下是代码:
main.cpp:
#include "cuda_class.hpp"
Cuda_class<int> p;
p.cudaMain(trees.size(), trees[0].size(), treeArray_x, treeArray_y, treeArray_z, treeArray_ID, box);
cuda_class.hpp:
template <class T>
class Cuda_class{
public:
void cudaMain(int number_of_trees, int tree_size, T treeArray_x[], T treeArray_y[], T treeArray_z[], int treeArray_ID[], T box[]);
};
cuda_class.cu:
#include "cuda_class.hpp"
__global__
void insideBox(int *treeArray_x, int *treeArray_y, int *treeArray_z, int *treeArray_ID, int *box, int tree_size){
//for each thread has it's own tree starting here
int startOfTree = threadIdx.x * tree_size ;
int endOfTree = startOfTree + tree_size - 1;
traverseTree(treeArray_x, treeArray_y, treeArray_z, treeArray_ID, box, 1, startOfTree, endOfTree);
}
template <class T>
void Cuda_class<T>::cudaMain(int number_of_trees, int tree_size, T treeArray_x[], T treeArray_y[], T treeArray_z[], int treeArray_ID[], T box[]){
cudaSetDevice(MYDEVICE);
// do something allocate memory etc
//launch kernel
insideBox<<<1,32>>>(d_treeArray_x, d_treeArray_y, d_treeArray_z, d_treeArray_ID, d_box, tree_size);
//do some other stuff
}