在cuda中使用共享内存会产生内存写入错误

时间:2014-08-26 08:08:33

标签: cuda shared-memory

我的内核工作正常

__global__ static void  CalcSTLDistance_Kernel(Integer ComputeParticleNumber)
{
    const Integer TID = CudaGetTargetID();
    const Integer ID  = TID;
    if(ID >= ComputeParticleNumber)
    {
        return ;
    }
    CDistance NearestDistance;
    Integer NearestID = -1;
    NearestDistance.Magnitude = 1e8;
    NearestDistance.Direction = make_Scalar3(0,0,0);
    if(c_daOutputParticleID[ID] < -1)
    {
        c_daSTLDistance[ID] = NearestDistance;
        c_daSTLID[ID] = NearestID;
        return;
    }
    Scalar3 TargetPosition = c_daParticlePosition[ID];

    Integer TriangleID;     
    Integer CIDX, CIDY, CIDZ;
    Integer CID = GetCellID(&CONSTANT_BOUNDINGBOX,&TargetPosition,CIDX, CIDY, CIDZ);
    Integer Range = 1;
    if(CID >=0 && CID < c_CellNum)
    {
        for(Integer k = -Range; k <= Range; ++k)
        {
            for(Integer j = -Range; j <= Range; ++j)
            {
                for(Integer i = -Range; i <= Range; ++i)
                {
                    Integer MCID = GetCellID(&CONSTANT_BOUNDINGBOX,CIDX +i, CIDY + j,CIDZ + k);
                    if(MCID < 0 || MCID >= c_CellNum)
                    {
                        continue;
                    }
                    unsigned int TriangleNum = c_daCell[MCID].m_TriangleNum;
                    for(unsigned int l = 0; l < TriangleNum; ++l)
                    {
                        TriangleID = c_daCell[MCID].m_TriangleID[l];
                        if( TriangleID >= 0 && TriangleID < c_TriangleNum && TriangleID != NearestID)// No need to calculate again for the same triangle
                        {
                            CDistance Distance ;
                            Distance.Magnitude = CalcDistance(&c_daTriangles[TriangleID], &TargetPosition, &Distance.Direction);
                            if(Distance.Magnitude < NearestDistance.Magnitude)
                            {
                                NearestDistance = Distance;
                                NearestID = TriangleID;
                            }
                        }
                    }   
                }
            }
        }
    }
    c_daSTLDistance[ID] = NearestDistance;
    c_daSTLID[ID] = NearestID;
}

这里 c_daParticlePosition 是常量内存float3数据类型。所以在这里我想使用共享内存,所以我尝试创建float3类型的共享内存,并尝试将常量日期复制到共享内存,但它显示未知错误,并使用cuda-memcheck它说

此处线程编号为255,块大小为

shared_memory代码

__global__ static void CalcSTLDistance_Kernel(Integer ComputeParticleNumber)
{
    //const Integer TID = CudaGetTargetID();
    const Integer ID  =CudaGetTargetID(); 
    extern __shared__ float3 s[];
    /*if(ID >= ComputeParticleNumber)
    {
        return ;
    }*/
    s[ID] = c_daParticlePosition[ID];
    __syncthreads();

    CDistance NearestDistance;
    Integer NearestID = -1;
    NearestDistance.Magnitude = 1e8;
    NearestDistance.Direction.x = 0;
    NearestDistance.Direction.y = 0;
    NearestDistance.Direction.z = 0;//make_Scalar3(0,0,0);
    //if(c_daOutputParticleID[ID] < -1)
    //{
    //  c_daSTLDistance[ID] = NearestDistance;
    //  c_daSTLID[ID] = NearestID;
    //  return;
    //}

    //Scalar3 TargetPosition = c_daParticlePosition[ID];

    Integer TriangleID;     
    Integer CIDX, CIDY, CIDZ;
    Integer CID = GetCellID(&CONSTANT_BOUNDINGBOX,&s[ID],CIDX, CIDY, CIDZ);
    if(CID >=0 && CID < c_CellNum)
    {
        //Integer Range = 1;
        for(Integer k = -1; k <= 1; ++k)
        {
            for(Integer j = -1; j <= 1; ++j)
            {
                for(Integer i = -1; i <= 1; ++i)
                {
                    Integer MCID = GetCellID(&CONSTANT_BOUNDINGBOX,CIDX +i, CIDY + j,CIDZ + k);
                    if(MCID < 0 || MCID >= c_CellNum)
                    {
                        continue;
                    }
                    unsigned int TriangleNum = c_daCell[MCID].m_TriangleNum;
                    for(unsigned int l = 0; l < TriangleNum; ++l)
                    {
                        TriangleID = c_daCell[MCID].m_TriangleID[l];
                        /*if(c_daTrianglesParameters[c_daTriangles[TriangleID].ModelIDNumber].isDrag)
                        {
                            continue;
                        }*/

                        if( TriangleID >= 0 && TriangleID < c_TriangleNum && TriangleID != NearestID)// No need to calculate again for the same triangle
                        {
                        CDistance Distance ;
                            Distance.Magnitude = CalcDistance(&c_daTriangles[TriangleID], &s[ID], &Distance.Direction);
                            if(Distance.Magnitude < NearestDistance.Magnitude)
                            {
                                NearestDistance = Distance;
                                NearestID = TriangleID;
                            }
                        }
                    }   
                }
            }
        }
    }
    c_daSTLDistance[ID] = NearestDistance;
    c_daSTLID[ID] = NearestID;
}

错误

  Invalid __shared__ write of size 4
    =========     at 0x00000128 in CalcSTLDistance_Kernel(int)
    =========     by thread (159,0,0) in block (0,0,0)
    =========     Address 0x0000077c is out of bounds

1 个答案:

答案 0 :(得分:2)

您可以在this article中找到有关如何使用共享内存的有用信息。特别关注静态共享内存动态共享内存部分。

根据上面的文章,你应该发现你只是写出数组s的界限,正如错误信息所说的那样。要解决此问题,您可以:

  • 在编译时指定共享内存数组s的大小, 如果您事先知道,例如__shared__ float3 s[123456];
  • 或使用动态大小的s数组,这基本上就是你现在正在做的事情,但 ALSO 将第三个内核启动参数指定为CalcSTLDistance_Kernel<<<gridSize, blockSize, sharedMemorySizeInBytes>>>。如果您要使用123456 float3数组,请使用int sharedMemorySizeInBytes = 123456 * sizeof(float3)