Warp无效的PC,设备功能指针

时间:2019-05-28 08:23:05

标签: c++ cuda function-pointers vtable

我有一个类MyClass,它有一个返回设备lambda的函数,在承包商中,我想调用一个使用该函数的内核。 这就是我试图做到的方式:

#include "nvfunctional"

template<typename Func>
__global__ void testKernal(size_t size, Func func) {
    for(int i = 0;i < size; i++) {
        printf("func(%d) = %d\n", i, func(i));
    }
}

class MyClass {
public:
    nvstd::function<size_t(const size_t)> getIncFunc() {
        return []__device__(const size_t val) {
            return val+1;
        };
    }

    MyClass() {
        auto func = getIncFunc();
        testKernal<<<1,1>>>(10, func);
        cudaError_t err = cudaDeviceSynchronize();
        if(cudaSuccess != err) {
            printf("error with test kernal: %d\n", err);
        }
    }
};

但是我得到这个错误: error

这是cuda-memcheck的输出:

$ cuda-memcheck ./prog.out 
========= CUDA-MEMCHECK
========= Invalid PC
=========     at 0x00000458 in /usr/local/cuda-10.0/bin/..//include/crt/nvfunctional:560:_ZNK5nvstd8functionIFffEEclEf
=========     by thread (0,0,0) in block (0,0,0)
=========     Device Frame:/usr/local/cuda-10.0/bin/..//include/crt/nvfunctional:560:_ZNK5nvstd8functionIFffEEclEf (_ZNK5nvstd8functionIFffEEclEf : 0x458)
=========     Device Frame:/home/nhrnhr0/Desktop/NeuralNetwork/ActivationLayer.h:8:void testKernal<nvstd::function<float () (float)>>(unsigned long, float () (float)) (void testKernal<nvstd::function<float () (float)>>(unsigned long, float () (float)) : 0x1010)
=========     Saved host backtrace up to driver entry point at kernel launch time
=========     Host Frame:/usr/lib/x86_64-linux-gnu/libcuda.so.1 (cuLaunchKernel + 0x2cd) [0x24d9dd]
=========     Host Frame:./prog.out [0x2b962]
=========     Host Frame:./prog.out [0x2bb57]
=========     Host Frame:./prog.out [0x5ff15]
=========     Host Frame:./prog.out [0xea4b]
=========     Host Frame:./prog.out [0xe4cf]
=========     Host Frame:./prog.out [0xe519]
=========     Host Frame:./prog.out [0xeb9a]
=========     Host Frame:./prog.out [0xf828]
=========     Host Frame:./prog.out [0xee96]
=========     Host Frame:/lib/x86_64-linux-gnu/libc.so.6 (__libc_start_main + 0xe7) [0x21b97]
=========     Host Frame:./prog.out [0x7faa]
=========
========= Program hit cudaErrorLaunchFailure (error 4) due to "unspecified launch failure" on CUDA API call to cudaDeviceSynchronize. 
done
=========     Saved host backtrace up to driver entry point at error
=========     Host Frame:/usr/lib/x86_64-linux-gnu/libcuda.so.1 [0x351c13]
=========     Host Frame:./prog.out [0x49856]
=========     Host Frame:./prog.out [0xf83d]
=========     Host Frame:./prog.out [0xee96]
=========     Host Frame:/lib/x86_64-linux-gnu/libc.so.6 (__libc_start_main + 0xe7) [0x21b97]
=========     Host Frame:./prog.out [0x7faa]
=========
========= ERROR SUMMARY: 2 errors

(MyClass在ActivationLayer.h中)
我在做什么错了?

0 个答案:

没有答案