
时间:2015-05-11 14:06:49

标签: c++ cuda

我在主机和设备上定义了一个结构。 在主机中,我使用值初始化此结构的数组。

MyStruct *h_s = (MyStruct *) malloc(objsize*sizeof(MyStruct));
hs[0] = ...

Mystruct *d_s;
cudaMalloc( &d_s, objsize * sizeof(MyStruct));
cudaMemcpy( d_s, h_s, objsize * sizeof(MyStruct), cudaMemcpyHostToDevice );
init<<< gridSize, blockSize >>> ( d_s );


__shared__ Mystruct *d_s;

__global__ void init(Mystruct *theStructArray){
   //How to allocate memory for d_s
   //How copy theStructArray to d_s


修改 我正在尝试将小编码写入cuda。

struct Sphere {
double rad;       // radius
Vec p, e, c;      // position, emission, color
Refl_t refl;      // reflection type (DIFFuse, SPECular, REFRactive)

    rad = 16.5;
    p = (Vec(27,16.5,47) + Vec(73,16.5,78))*0.5;
    e = Vec();
    c = Vec(0.75, 0.75, 0.75);
    refl = DIFF;

Sphere(double rad_, Vec p_, Vec e_, Vec c_, Refl_t refl_):
rad(rad_), p(p_), e(e_), c(c_), refl(refl_) {}

__device__ double intersect(const Ray &r) const { // returns distance, 0 if nohit
    Vec op = p-r.o; // Solve t^2*d.d + 2*t*(o-p).d + (o-p).(o-p)-R^2 = 0
    double t, eps=1e-4,, det=b**rad;
    if (det<0) return 0; else det=sqrt(det);
    return (t=b-det)>eps ? t : ((t=b+det)>eps ? t : 0);


1 个答案:

答案 0 :(得分:1)


  1. 如何为共享内存阵列动态保留内存
  2. 如何在内核中使用动态共享内存
  3. 你的内核变成这样:

    __shared__ Mystruct *d_s;
    __global__ void init(Mystruct *theStructArray){
        int tid = blockDim.x * blockIdx.x + threadIdx.x;
        // load to shared memory array
        // assumes Mystruct has correct copy assignment semantics
        d_s[threadIdx.x] = theStructArray[tid]
        // Each thread has now loaded one value to the block
        // scoped shared array



    MyStruct *h_s = (MyStruct *) malloc(objsize*sizeof(MyStruct));
    hs[0] = ...
    Mystruct *d_s;
    cudaMalloc( &d_s, objsize * sizeof(MyStruct));
    cudaMemcpy( d_s, h_s, objsize * sizeof(MyStruct), cudaMemcpyHostToDevice );
    init<<< gridSize, blockSize, blockSize * sizeof(MyStruct) >>> ( d_s );

    注意内核调用的<<< >>>节的第三个参数。它指定每个块保留的内存字节数。硬件规定了您可以进行的共享内存分配大小的限制,并且它们可能会对超出硬件限制的性能产生额外影响。

    共享内存是CUDA的一个记录良好的功能,我建议Mark Harris's blog和此Stack Overflow Question作为CUDA共享内存机制的良好起点。