我有这个代码,但有时它有效,有时不是(写printf(“ERR:%d \ n”,id))。 我在CUDA 4.1下工作并拥有GTS450,这是计算能力2.1。
代码没有更高的目的,Iam只是试图找到它为什么不起作用,因为我的头脑告诉我,那是对的:]
如果你想运行它,也许你需要执行几次,当出现“错误”或更改网格大小时!
PS:here you can download exe file for win64 - you need to have cuda4.1 driver
class MAN
{
public:
int m_id;
int m_use;
__device__
MAN()
{
m_id = -1;
m_use = 0;
}
};
__device__ int* d_ids = NULL;
__device__ int d_last_ids = 0;
__device__ MAN* d_mans = NULL;
__global__ void init()
{
d_mans = new MAN[500]; //note: 500 is more than enough!
d_ids = new int[500];
for(int i=0; i < 500; i++)
d_ids[i] = 0;
}
__device__ int getMAN() //every block get unique number, so at one moment all running blocks has different id
{
while(true)
{
for(int i=0; i < 500; i++)
if(atomicCAS(&(d_mans[i].m_use), 0, 1)==0)
return i;
}
}
__device__ void returnMAN(int id)
{
int s = atomicExch(&(d_mans[id].m_use), 0);
}
__global__ void testIt()
{
if(threadIdx.x==0)
{
int man = getMAN();
int id = d_mans[man].m_id;
if(id == -1) //If It never works with this "id", its creating new
{
id = atomicAdd(&d_last_ids, 2);
d_ids[id] = 10; //set to non-zero
d_mans[man].m_id = id; //save new id for next time
printf("ADD:%d\n", id);
}
if(d_ids[id]==0)
printf("ERR:%d\n", id); //THIS SHOULD NEVER HAPPEN, BUT BECOMES !!!
returnMAN(man);
}
}
int main()
{
init<<<1, 1>>>();
printf("init() err: %d\n", cudaDeviceSynchronize());
testIt<<<20000, 512>>>();
printf("testIt() err: %d\n", cudaDeviceSynchronize());
getchar();
return 0;
}
答案 0 :(得分:1)
这似乎发生了,因为 这段代码
int id = d_mans[man].m_id;
if(id == -1) //If It never works with this "id", its creating new
{
id = atomicAdd(&d_last_ids, 2);
d_ids[id] = 10; //set to non-zero
d_mans[man].m_id = id; //save new id for next time
printf("ADD:%d\n", id);
}
if(d_ids[id]==0)
printf("ERR:%d\n", id); //THIS SHOULD NEVER HAPPEN, BUT BECOMES !!!
如果某个块写入d_mans [man] .m_id,但仍未写入d_ids [id],则包含竞争条件。可能编译器交换指令“设置为非零”和“下次保存新ID”或缓存只是不及时更新。
实际上,问题在于你的分配器 - 最好记住上次使用的'man'的索引而不是寻找它。
答案 1 :(得分:0)
我改变了这个:
__device__ int* d_ids = NULL;
到此:
__device__ volatile int* d_ids = NULL;
它运作正常!!!
甚至它也不需要__threadfence();