Question

我正在使用CUDA 7并尝试将函数作为模板参数传递给设备函数，如下所示：

typedef float(*Op)(float, float);

template<typename Op>
__device__ bool is_maxima(float ax, float ay, cudaTextureObject_t current)
{
    // I try to use the passed function as:
    float cv = tex2D<float>(current, ax, ay);    
    float pv = tex2D<float>(current, ax - 1.f, ay);
    if (Op(cv, pv) != cv) return false;
    return true;
}

现在来自我的global cuda内核，我将其称为：

__global__ void detect_keypoints(cudaTextureObject_t current
                                 float *result, int width, int height)
{
    const int x = __mul24(blockDim.x, blockIdx.x) + threadIdx.x;
    const int y = __mul24(blockDim.y, blockIdx.y) + threadIdx.y;

    float c = tex2D<float>(current, ax + 0.5f, ay + 0.5f);
    float ax = x + 1; float av = y + 1;

    if (is_maxima<fmaxf>(ax + 0.5f, ay + 0.5f, current))
        result[y * width + height] = 1.f;
}

然而，它给我一个编译器错误说：

error: no instance of function template "is_maxima" matches the argument 
list
        argument types are: (float, float, cudaTextureObject_t)

在CUDA设备功能中不允许将函数作为模板参数传递吗？我的印象是CUDA现在支持所有C ++功能。

为了完整性，fmaxf在CUDA SDK中定义为：

inline float fmaxf(float a, float b)
{
    return a > b ? a : b;
}

Answer 1

正如您已经指出的那样，这是不正确的：

template<typename Op>

我认为应该是：

template<Op op>

以下代码似乎对我有效（删除纹理，因为它与问题无关）：

$ cat t755.cu
#include <stdio.h>
#include <math.h>

typedef float(*Op)(float, float);


__device__ float mymax(float a, float b){

  return (a>b)?a:b;
}

template <Op op>
__device__ float test(float d1, float d2){
  return op(d1, d2);
}

__global__ void kernel(){

  float a1 = 1.0f;
  float a2 = 2.0f;
  printf("%f\n", test<mymax>(a1, a2));
}

int main(){

  kernel<<<1,1>>>();
  cudaDeviceSynchronize();
}
$ nvcc -arch=sm_35 -std=c++11 t755.cu -o t755
$ ./t755
2.000000
$

Answer 2

您可以像这样定义is_maxima以避免typedef：

template<typename Op>
__device__ bool is_maxima(float ax, float ay, cudaTextureObject_t current, Op fun)
{
    float cv = tex2D<float>(current, ax, ay);    
    float pv = tex2D<float>(current, ax - 1.f, ay);
    if (fun(cv, pv) != cv) return false;
    return true;
}

然后使用is_maxima(ax + 0.5f, ay + 0.5f, current, fmaxf)调用它。

有关详细信息，请参阅此答案：https://stackoverflow.com/a/1174193/678093

cuda设备功能和模板

2 个答案: