CUDA代码不起作用,为什么?

时间:2012-02-21 12:27:15

标签: cuda atomic

我有这个代码,但有时它有效,有时不是(写printf(“ERR:%d \ n”,id))。 我在CUDA 4.1下工作并拥有GTS450,这是计算能力2.1。

代码没有更高的目的,Iam只是试图找到它为什么不起作用,因为我的头脑告诉我,那是对的:]

如果你想运行它,也许你需要执行几次,当出现“错误”或更改网格大小时!

PS:here you can download exe file for win64 - you need to have cuda4.1 driver

class MAN
{
public:
    int m_id;
    int m_use;

    __device__
    MAN()
    {
        m_id = -1;
        m_use = 0;
    }
};

__device__ int* d_ids = NULL;
__device__ int d_last_ids = 0;

__device__ MAN* d_mans = NULL;


__global__ void init()
{
    d_mans = new MAN[500];  //note: 500 is more than enough!
    d_ids = new int[500];

    for(int i=0; i < 500; i++)
        d_ids[i] = 0;
}


__device__ int getMAN() //every block get unique number, so at one moment all running blocks has different id
{
    while(true)
    {
        for(int i=0; i < 500; i++)
            if(atomicCAS(&(d_mans[i].m_use), 0, 1)==0)
                return i;
    }
}
__device__ void returnMAN(int id)
{
    int s = atomicExch(&(d_mans[id].m_use), 0);
}



__global__ void testIt()
{
    if(threadIdx.x==0)
    {
        int man = getMAN();

        int id = d_mans[man].m_id;
        if(id == -1)    //If It never works with this "id", its creating new
        {
            id = atomicAdd(&d_last_ids, 2);

            d_ids[id] = 10; //set to non-zero
            d_mans[man].m_id = id;  //save new id for next time

            printf("ADD:%d\n", id);
        }

        if(d_ids[id]==0)
            printf("ERR:%d\n", id); //THIS SHOULD NEVER HAPPEN, BUT BECOMES !!!

        returnMAN(man);
    }
}



int main()
{
    init<<<1, 1>>>();
    printf("init() err: %d\n", cudaDeviceSynchronize());

    testIt<<<20000, 512>>>();
    printf("testIt() err: %d\n", cudaDeviceSynchronize());

    getchar();
    return 0;
}

2 个答案:

答案 0 :(得分:1)

这似乎发生了,因为 这段代码

    int id = d_mans[man].m_id;
    if(id == -1)    //If It never works with this "id", its creating new
    {
        id = atomicAdd(&d_last_ids, 2);

        d_ids[id] = 10; //set to non-zero
        d_mans[man].m_id = id;  //save new id for next time

        printf("ADD:%d\n", id);
    }

    if(d_ids[id]==0)
        printf("ERR:%d\n", id); //THIS SHOULD NEVER HAPPEN, BUT BECOMES !!!

如果某个块写入d_mans [man] .m_id,但仍未写入d_ids [id],则包含竞争条件。可能编译器交换指令“设置为非零”和“下次保存新ID”或缓存只是不及时更新。

实际上,问题在于你的分配器 - 最好记住上次使用的'man'的索引而不是寻找它。

答案 1 :(得分:0)

我改变了这个:

__device__ int* d_ids = NULL;

到此:

__device__ volatile int* d_ids = NULL;

它运作正常!!!

甚至它也不需要__threadfence();