Question

我正在尝试在CUDA中实现基本的面向对象程序。

我有以下课程：

/*
 * File: Point.h
 */

class Point {

    private:
        float x;
        float y;
        float z;

    public:
        __host__ __device__ Point(float x, float y, float z);

        __host__ __device__ float getX();
        __device__ float getY();
        __device__ float getZ();

        __device__ void setX(float x);
        __device__ void setY(float y);
        __device__ void setZ(float z);

        __device__ Point* operator+(Point* p) {
            return new Point(
                p->getX(),
                p->getY(),
                p->getZ()
            );
        };
};

/*
 * File: Point.cu
 */

#include "Point.h"


__host__ __device__ Point::Point(float x, float y, float z) {
    this->x = x;
    this->y = y;
    this->z = z;
};

__host__ __device__ float Point::getX() {return x;}
__device__ float Point::getY() {return y;}
__device__ float Point::getZ() {return z;}

__device__ void Point::setX(float x) {this->x = x;}
__device__ void Point::setY(float y) {this->y = y;}
__device__ void Point::setZ(float z) {this->z = z;}

在main.cu文件中，我写了一个内核，在集合中的每个Point上的'x'组件中添加10个单元。这是内核：

__global__ void SumPoints(Point** d_arr_points) {
    int i = threadIdx.x + blockIdx.x * blockDim.x;
    d_arr_points[i]->setX(d_arr_points[i]->getX() + 10);
}

为了达到这个目标，我正在使用CUDA推力，但我还是不明白为了传输数据必须如何使用原始指针。其实我在主函数中尝试这个：

int main(void) {

    thrust::host_vector<Point *> h_points(NUM_THREADS);
    for (int i = 0; i < NUM_THREADS; i++) {
        Point * new_point = new Point(1,0,0);
        h_points.push_back(new_point);
    }

    thrust::device_vector<Point *> d_points = h_points;
    Point ** d_arr_points = thrust::raw_pointer_cast(&d_points[0]);

    SumPoints<<<NUM_BLOCKS, NUM_THREADS_PER_BLOCK>>>(d_arr_points);

    return 0;
}

nvcc会抛出以下错误：

./main.cu(22): Error: External calls are not supported (found non-inlined call to _ZN5Point4setXEf) make: *** [main.o] Error 2

任何人都可以帮助我吗？谢谢！

Answer 1

这些点在主机堆中分配（在设备中不可见），而只在设备中分配指针。如果对象存储在不可见的主机内存中，Cuda无法内联方法Point::setX。

按照 presius litel snoflek 评论中的建议使用thrust::host_vector<Point>。使用thrust::host_vector时，无需明确使用operator new。例如，避免像代码一样 __device__ Point* operator+(Point* p)那是麻烦。此外，课程Point可以安全地与host和device一起使用

CUDA如何在设备上传输和修改对象

1 个答案: