我试图手动编写asm代码的锁,因为代码实际上没有用,我使用__sync_bool_compare_and_swap来查看反汇编。
但是,内置功能似乎也不起作用。
我正在使用Ubuntu 15.10 x64
这是该计划的一般结构。
全局:
#include <stdio.h>
#include <stdlib.h>
#include <pthread.h>
#include <time.h>
#include <stdbool.h>
#include <unistd.h>
unsigned int randrange(unsigned int min, unsigned int max); // custom rand function
pthread_mutex_t mutex = PTHREAD_MUTEX_INITIALIZER; // real mutex
__volatile__ unsigned int lock = 1; // my attempt
unsigned int i = 0, count = 0;
主题功能:
void *worker(void *arg)
{
unsigned int loop = randrange(1000, 10000), time = randrange(10, 100);
pthread_mutex_lock(&mutex);
count += loop; // safe keep trace of number of iterations
pthread_mutex_unlock(&mutex);
/* Acquire the custom lock */
for(int x = 0; x < loop; x++)
{
i++; // increment i by number of iterations with a step=1
usleep(time); // short nap
}
__sync_synchronize(); // built-in full barriers
lock = 1; // release the custom lock
pthread_exit((void*) true);
}
主要
int main()
{
pthread_t thread_id[1000];
pthread_attr_t attr;
unsigned int ret_thread = NULL;
void *status = NULL;
unsigned int args = NULL;
unsigned int nthreads = 1000;
pthread_attr_init(&attr);
pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_JOINABLE);
for(int i = 0; i < nthreads; i++)
{
ret_thread = pthread_create(&thread_id[i], &attr, worker,
(void *) &args);
if(ret_thread)
{
printf("Error occurred during thread creation. \
n: %i - return code: %i", i, ret_thread);
return -1;
}
}
pthread_attr_destroy(&attr);
for(int i = 0; i < nthreads; i++)
{
ret_thread = pthread_join(thread_id[i], &status);
if(ret_thread)
{
printf("Error occurred during thread joining. \
n: %i - return code: %i", i, ret_thread);
return -1;
}
}
puts("\nmain: All the threads exited normally.\n");
printf("count: %i - i: %i\n", count, i); // core: compare the safe trace or number of iterations with the target one
puts("\n\nmain: I've finished, bye.\n");
pthread_mutex_destroy(&mutex);
return 0;
}
由于cmpxchg source, destination
将累加器(寄存器A)与目标操作数进行比较,因此
如果accumulator == destination,则设置ZF并将源复制到目标和
如果累加器!=目的地,则重置ZF并将目标复制到累加器......
我的尝试:
void *worker(void *arg)
{
unsigned int loop = randrange(1000, 10000), time = randrange(10, 100);
pthread_mutex_lock(&mutex);
count += loop; // safe keep trace of number of iterations
pthread_mutex_unlock(&mutex);
/* My attempt */
prepare:
__asm__ __volatile__ ("xor %r8, %r8");
wait:
__asm__ __volatile__ ("nop \n\t"
"mov $0x1, %rax \n\t"
"lock; cmpxchgq %r8, (lock)");
__asm__ __volatile__ goto ("jne %l0" :::: wait);
/* My attempt */
for(int x = 0; x < loop; x++)
{
i++; // increment i by number of iterations with a step=1
usleep(time); // short nap
}
__sync_synchronize(); // built-in full barriers
lock = 1; // release the custom lock
pthread_exit((void*) true);
}
拆卸:
(gdb) disas worker
Dump of assembler code for function worker:
0x0000000000400c70 <+0>: push %r12
0x0000000000400c72 <+2>: push %rbp
0x0000000000400c73 <+3>: mov $0x2710,%esi
0x0000000000400c78 <+8>: push %rbx
0x0000000000400c79 <+9>: mov $0x3e8,%edi
0x0000000000400c7e <+14>: callq 0x400c20 <randrange>
0x0000000000400c83 <+19>: mov $0x32,%esi
0x0000000000400c88 <+24>: mov $0x1,%edi
0x0000000000400c8d <+29>: mov %eax,%ebp
0x0000000000400c8f <+31>: callq 0x400c20 <randrange>
0x0000000000400c94 <+36>: mov $0x602100,%edi
0x0000000000400c99 <+41>: mov %eax,%r12d
0x0000000000400c9c <+44>: callq 0x4009d0 <pthread_mutex_lock@plt>
0x0000000000400ca1 <+49>: mov $0x602100,%edi
0x0000000000400ca6 <+54>: add %ebp,0x201434(%rip) # 0x6020e0 <count>
0x0000000000400cac <+60>: callq 0x400980 <pthread_mutex_unlock@plt>
0x0000000000400cb1 <+65>: nopl 0x0(%rax)
0x0000000000400cb8 <+72>: nop
0x0000000000400cb9 <+73>: xor %r8,%r8
0x0000000000400cbc <+76>: mov $0x1,%rax
0x0000000000400cc3 <+83>: lock cmpxchg %r8,0x6020b8 # 0x6020b8 <lock>
0x0000000000400ccd <+93>: jne 0x400cb8 <worker+72>
0x0000000000400ccf <+95>: xor %ebx,%ebx
0x0000000000400cd1 <+97>: test %ebp,%ebp
0x0000000000400cd3 <+99>: je 0x400cee <worker+126>
0x0000000000400cd5 <+101>: nopl (%rax)
0x0000000000400cd8 <+104>: mov %r12d,%edi
0x0000000000400cdb <+107>: add $0x1,%ebx
0x0000000000400cde <+110>: addl $0x1,0x2013ff(%rip) # 0x6020e4 <i>
0x0000000000400ce5 <+117>: callq 0x4009f0 <usleep@plt>
0x0000000000400cea <+122>: cmp %ebp,%ebx
0x0000000000400cec <+124>: jne 0x400cd8 <worker+104>
0x0000000000400cee <+126>: mov $0x1,%edi
0x0000000000400cf3 <+131>: mfence
0x0000000000400cf6 <+134>: movl $0x1,0x2013b8(%rip) # 0x6020b8 <lock>
0x0000000000400d00 <+144>: callq 0x400990 <pthread_exit@plt>
End of assembler dump.
使用__sync_bool_compare_and_swap():
void *worker(void *arg)
{
unsigned int loop = randrange(1000, 10000), time = randrange(10, 100);
pthread_mutex_lock(&mutex);
count += loop; // safe keep trace of number of iterations
pthread_mutex_unlock(&mutex);
/* built-in function */
while(!__sync_bool_compare_and_swap(&lock, 1, 0))
{
while(lock) __asm__ __volatile__ ("nop");
}
/* built-in function */
for(int x = 0; x < loop; x++)
{
i++; // increment i by number of iterations with a step=1
usleep(time); // short nap
}
__sync_synchronize(); // built-in full barriers
lock = 1; // release the custom lock
pthread_exit((void*) true);
}
拆卸:
(gdb) disas worker
Dump of assembler code for function worker:
0x0000000000400c70 <+0>: push %r12
0x0000000000400c72 <+2>: push %rbp
0x0000000000400c73 <+3>: mov $0x2710,%esi
0x0000000000400c78 <+8>: push %rbx
0x0000000000400c79 <+9>: mov $0x3e8,%edi
0x0000000000400c7e <+14>: callq 0x400c20 <randrange>
0x0000000000400c83 <+19>: mov $0x32,%esi
0x0000000000400c88 <+24>: mov $0x1,%edi
0x0000000000400c8d <+29>: mov %eax,%ebx
0x0000000000400c8f <+31>: callq 0x400c20 <randrange>
0x0000000000400c94 <+36>: mov $0x602100,%edi
0x0000000000400c99 <+41>: mov %eax,%ebp
0x0000000000400c9b <+43>: callq 0x4009d0 <pthread_mutex_lock@plt>
0x0000000000400ca0 <+48>: mov $0x602100,%edi
0x0000000000400ca5 <+53>: add %ebx,0x201435(%rip) # 0x6020e0 <count>
0x0000000000400cab <+59>: callq 0x400980 <pthread_mutex_unlock@plt>
0x0000000000400cb0 <+64>: mov $0x1,%ecx
0x0000000000400cb5 <+69>: xor %edx,%edx
0x0000000000400cb7 <+71>: nopw 0x0(%rax,%rax,1)
0x0000000000400cc0 <+80>: mov %ecx,%eax
0x0000000000400cc2 <+82>: lock cmpxchg %edx,0x2013ee(%rip) # 0x6020b8 <lock>
0x0000000000400cca <+90>: je 0x400cdd <worker+109>
0x0000000000400ccc <+92>: nopl 0x0(%rax)
0x0000000000400cd0 <+96>: mov 0x2013e2(%rip),%eax # 0x6020b8 <lock>
0x0000000000400cd6 <+102>: test %eax,%eax
0x0000000000400cd8 <+104>: je 0x400cc0 <worker+80>
0x0000000000400cda <+106>: nop
0x0000000000400cdb <+107>: jmp 0x400cd0 <worker+96>
0x0000000000400cdd <+109>: xor %r12d,%r12d
0x0000000000400ce0 <+112>: test %ebx,%ebx
0x0000000000400ce2 <+114>: je 0x400cfb <worker+139>
0x0000000000400ce4 <+116>: mov %ebp,%edi
0x0000000000400ce6 <+118>: add $0x1,%r12d
0x0000000000400cea <+122>: addl $0x1,0x2013f3(%rip) # 0x6020e4 <i>
0x0000000000400cf1 <+129>: callq 0x4009f0 <usleep@plt>
0x0000000000400cf6 <+134>: cmp %ebx,%r12d
0x0000000000400cf9 <+137>: jne 0x400ce4 <worker+116>
0x0000000000400cfb <+139>: mov $0x1,%edi
0x0000000000400d00 <+144>: mfence
0x0000000000400d03 <+147>: movl $0x1,0x2013ab(%rip) # 0x6020b8 <lock>
0x0000000000400d0d <+157>: callq 0x400990 <pthread_exit@plt>
End of assembler dump.
内置函数产生更多代码:
0x0000000000400cb0 <+64>: mov $0x1,%ecx
0x0000000000400cb5 <+69>: xor %edx,%edx
0x0000000000400cb7 <+71>: nopw 0x0(%rax,%rax,1)
0x0000000000400cc0 <+80>: mov %ecx,%eax
0x0000000000400cc2 <+82>: lock cmpxchg %edx,0x2013ee(%rip) # 0x6020b8 <lock>
0x0000000000400cca <+90>: je 0x400cdd <worker+109>
0x0000000000400ccc <+92>: nopl 0x0(%rax)
0x0000000000400cd0 <+96>: mov 0x2013e2(%rip),%eax # 0x6020b8 <lock>
0x0000000000400cd6 <+102>: test %eax,%eax
0x0000000000400cd8 <+104>: je 0x400cc0 <worker+80>
0x0000000000400cda <+106>: nop
0x0000000000400cdb <+107>: jmp 0x400cd0 <worker+96>
96 和 107 之间的代码有什么作用?
我不认为内置函数会产生无用的代码,所以我想以错误的方式理解我的内容。
它有时工作的内置函数意味着它根本不起作用!
的更新:
我已经使用内置版重新测试了它的实现。它可以工作。
我该怎么办?