gcc内置同步函数和cmpxchg

时间:2016-03-16 13:41:40

标签: c gcc assembly locking mutex

我试图手动编写asm代码的锁,因为代码实际上没有用,我使用__sync_bool_compare_and_swap来查看反汇编。
但是,内置功能似乎也不起作用。

我正在使用Ubuntu 15.10 x64


这是该计划的一般结构。

全局:

#include <stdio.h>
#include <stdlib.h>
#include <pthread.h>
#include <time.h>
#include <stdbool.h>
#include <unistd.h>

unsigned int randrange(unsigned int min, unsigned int max);   // custom rand function

             pthread_mutex_t mutex = PTHREAD_MUTEX_INITIALIZER;   // real mutex
__volatile__ unsigned int    lock = 1;                            // my attempt
             unsigned int    i = 0, count = 0;

主题功能:

void *worker(void *arg)
{
    unsigned int loop = randrange(1000, 10000), time = randrange(10, 100);

    pthread_mutex_lock(&mutex);  
    count += loop;                  // safe keep trace of number of iterations
    pthread_mutex_unlock(&mutex);

    /* Acquire the custom lock */

    for(int x = 0; x < loop; x++)
    {
        i++;                // increment i by number of iterations with a step=1
        usleep(time);       // short nap
    }

    __sync_synchronize();   // built-in full barriers
    lock = 1;               // release the custom lock

    pthread_exit((void*) true);
}

主要

int main()
{
    pthread_t       thread_id[1000];
    pthread_attr_t  attr;
    unsigned int    ret_thread = NULL;
    void           *status = NULL;
    unsigned int    args = NULL;
    unsigned int    nthreads = 1000;

    pthread_attr_init(&attr);
    pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_JOINABLE);

    for(int i = 0; i < nthreads; i++)
    {
        ret_thread = pthread_create(&thread_id[i], &attr, worker,
                                    (void *) &args);
        if(ret_thread)
        {
            printf("Error occurred during thread creation. \
                    n: %i   - return code: %i", i, ret_thread);
            return -1;
        }
    }

    pthread_attr_destroy(&attr);

    for(int i = 0; i < nthreads; i++)
    {
        ret_thread = pthread_join(thread_id[i], &status);
        if(ret_thread)
        {
            printf("Error occurred during thread joining. \
                    n: %i   - return code: %i", i, ret_thread);
            return -1;
        }
    }

    puts("\nmain: All the threads exited normally.\n");

    printf("count: %i   -   i: %i\n", count, i);   // core: compare the safe trace or number of iterations with the target one

    puts("\n\nmain: I've finished, bye.\n");

    pthread_mutex_destroy(&mutex);

    return 0;
}



由于cmpxchg source, destination将累加器(寄存器A)与目标操作数进行比较,因此
如果accumulator == destination,则设置ZF并将源复制到目标和
如果累加器!=目的地,则重置ZF并将目标复制到累加器......

我的尝试:

void *worker(void *arg)
{
    unsigned int loop = randrange(1000, 10000), time = randrange(10, 100);

    pthread_mutex_lock(&mutex);  
    count += loop;                  // safe keep trace of number of iterations
    pthread_mutex_unlock(&mutex);

    /* My attempt */

    prepare:
        __asm__ __volatile__      ("xor %r8, %r8");
    wait:
        __asm__ __volatile__      ("nop                        \n\t"
                                   "mov $0x1, %rax             \n\t"
                                   "lock; cmpxchgq %r8, (lock)");
        __asm__ __volatile__ goto ("jne %l0" :::: wait);

    /* My attempt */

    for(int x = 0; x < loop; x++)
    {
        i++;                // increment i by number of iterations with a step=1
        usleep(time);       // short nap
    }

    __sync_synchronize();   // built-in full barriers
    lock = 1;               // release the custom lock

    pthread_exit((void*) true);
}

拆卸:

(gdb) disas worker
Dump of assembler code for function worker:
   0x0000000000400c70 <+0>:     push   %r12
   0x0000000000400c72 <+2>:     push   %rbp
   0x0000000000400c73 <+3>:     mov    $0x2710,%esi
   0x0000000000400c78 <+8>:     push   %rbx
   0x0000000000400c79 <+9>:     mov    $0x3e8,%edi
   0x0000000000400c7e <+14>:    callq  0x400c20 <randrange>
   0x0000000000400c83 <+19>:    mov    $0x32,%esi
   0x0000000000400c88 <+24>:    mov    $0x1,%edi
   0x0000000000400c8d <+29>:    mov    %eax,%ebp
   0x0000000000400c8f <+31>:    callq  0x400c20 <randrange>
   0x0000000000400c94 <+36>:    mov    $0x602100,%edi
   0x0000000000400c99 <+41>:    mov    %eax,%r12d
   0x0000000000400c9c <+44>:    callq  0x4009d0 <pthread_mutex_lock@plt>
   0x0000000000400ca1 <+49>:    mov    $0x602100,%edi
   0x0000000000400ca6 <+54>:    add    %ebp,0x201434(%rip)      # 0x6020e0 <count>
   0x0000000000400cac <+60>:    callq  0x400980 <pthread_mutex_unlock@plt>
   0x0000000000400cb1 <+65>:    nopl   0x0(%rax)
   0x0000000000400cb8 <+72>:    nop
   0x0000000000400cb9 <+73>:    xor    %r8,%r8
   0x0000000000400cbc <+76>:    mov    $0x1,%rax
   0x0000000000400cc3 <+83>:    lock cmpxchg %r8,0x6020b8         # 0x6020b8 <lock>
   0x0000000000400ccd <+93>:    jne    0x400cb8 <worker+72>
   0x0000000000400ccf <+95>:    xor    %ebx,%ebx
   0x0000000000400cd1 <+97>:    test   %ebp,%ebp
   0x0000000000400cd3 <+99>:    je     0x400cee <worker+126>
   0x0000000000400cd5 <+101>:   nopl   (%rax)
   0x0000000000400cd8 <+104>:   mov    %r12d,%edi
   0x0000000000400cdb <+107>:   add    $0x1,%ebx
   0x0000000000400cde <+110>:   addl   $0x1,0x2013ff(%rip)        # 0x6020e4 <i>
   0x0000000000400ce5 <+117>:   callq  0x4009f0 <usleep@plt>
   0x0000000000400cea <+122>:   cmp    %ebp,%ebx
   0x0000000000400cec <+124>:   jne    0x400cd8 <worker+104>
   0x0000000000400cee <+126>:   mov    $0x1,%edi
   0x0000000000400cf3 <+131>:   mfence 
   0x0000000000400cf6 <+134>:   movl   $0x1,0x2013b8(%rip)        # 0x6020b8 <lock>
   0x0000000000400d00 <+144>:   callq  0x400990 <pthread_exit@plt>
End of assembler dump.


使用__sync_bool_compare_and_swap():

void *worker(void *arg)
{
    unsigned int loop = randrange(1000, 10000), time = randrange(10, 100);

    pthread_mutex_lock(&mutex);  
    count += loop;                  // safe keep trace of number of iterations
    pthread_mutex_unlock(&mutex);

    /* built-in function */

    while(!__sync_bool_compare_and_swap(&lock, 1, 0))
    {
        while(lock) __asm__ __volatile__ ("nop");
    }

    /* built-in function */

    for(int x = 0; x < loop; x++)
    {
        i++;                // increment i by number of iterations with a step=1
        usleep(time);       // short nap
    }

    __sync_synchronize();   // built-in full barriers
    lock = 1;               // release the custom lock

    pthread_exit((void*) true);
}

拆卸:

(gdb) disas worker
Dump of assembler code for function worker:
   0x0000000000400c70 <+0>:     push   %r12
   0x0000000000400c72 <+2>:     push   %rbp
   0x0000000000400c73 <+3>:     mov    $0x2710,%esi
   0x0000000000400c78 <+8>:     push   %rbx
   0x0000000000400c79 <+9>:     mov    $0x3e8,%edi
   0x0000000000400c7e <+14>:    callq  0x400c20 <randrange>
   0x0000000000400c83 <+19>:    mov    $0x32,%esi
   0x0000000000400c88 <+24>:    mov    $0x1,%edi
   0x0000000000400c8d <+29>:    mov    %eax,%ebx
   0x0000000000400c8f <+31>:    callq  0x400c20 <randrange>
   0x0000000000400c94 <+36>:    mov    $0x602100,%edi
   0x0000000000400c99 <+41>:    mov    %eax,%ebp
   0x0000000000400c9b <+43>:    callq  0x4009d0 <pthread_mutex_lock@plt>
   0x0000000000400ca0 <+48>:    mov    $0x602100,%edi
   0x0000000000400ca5 <+53>:    add    %ebx,0x201435(%rip)        # 0x6020e0 <count>
   0x0000000000400cab <+59>:    callq  0x400980 <pthread_mutex_unlock@plt>
   0x0000000000400cb0 <+64>:    mov    $0x1,%ecx
   0x0000000000400cb5 <+69>:    xor    %edx,%edx
   0x0000000000400cb7 <+71>:    nopw   0x0(%rax,%rax,1)
   0x0000000000400cc0 <+80>:    mov    %ecx,%eax
   0x0000000000400cc2 <+82>:    lock cmpxchg %edx,0x2013ee(%rip)      # 0x6020b8 <lock>
   0x0000000000400cca <+90>:    je     0x400cdd <worker+109>
   0x0000000000400ccc <+92>:    nopl   0x0(%rax)
   0x0000000000400cd0 <+96>:    mov    0x2013e2(%rip),%eax        # 0x6020b8 <lock>
   0x0000000000400cd6 <+102>:   test   %eax,%eax
   0x0000000000400cd8 <+104>:   je     0x400cc0 <worker+80>
   0x0000000000400cda <+106>:   nop
   0x0000000000400cdb <+107>:   jmp    0x400cd0 <worker+96>
   0x0000000000400cdd <+109>:   xor    %r12d,%r12d
   0x0000000000400ce0 <+112>:   test   %ebx,%ebx
   0x0000000000400ce2 <+114>:   je     0x400cfb <worker+139>
   0x0000000000400ce4 <+116>:   mov    %ebp,%edi
   0x0000000000400ce6 <+118>:   add    $0x1,%r12d
   0x0000000000400cea <+122>:   addl   $0x1,0x2013f3(%rip)        # 0x6020e4 <i>
   0x0000000000400cf1 <+129>:   callq  0x4009f0 <usleep@plt>
   0x0000000000400cf6 <+134>:   cmp    %ebx,%r12d
   0x0000000000400cf9 <+137>:   jne    0x400ce4 <worker+116>
   0x0000000000400cfb <+139>:   mov    $0x1,%edi
   0x0000000000400d00 <+144>:   mfence 
   0x0000000000400d03 <+147>:   movl   $0x1,0x2013ab(%rip)        # 0x6020b8 <lock>
   0x0000000000400d0d <+157>:   callq  0x400990 <pthread_exit@plt>
End of assembler dump.


内置函数产生更多代码:

   0x0000000000400cb0 <+64>:    mov    $0x1,%ecx
   0x0000000000400cb5 <+69>:    xor    %edx,%edx
   0x0000000000400cb7 <+71>:    nopw   0x0(%rax,%rax,1)
   0x0000000000400cc0 <+80>:    mov    %ecx,%eax
   0x0000000000400cc2 <+82>:    lock cmpxchg %edx,0x2013ee(%rip)  # 0x6020b8 <lock>
   0x0000000000400cca <+90>:    je     0x400cdd <worker+109>
   0x0000000000400ccc <+92>:    nopl   0x0(%rax)
   0x0000000000400cd0 <+96>:    mov    0x2013e2(%rip),%eax        # 0x6020b8 <lock>
   0x0000000000400cd6 <+102>:   test   %eax,%eax
   0x0000000000400cd8 <+104>:   je     0x400cc0 <worker+80>
   0x0000000000400cda <+106>:   nop
   0x0000000000400cdb <+107>:   jmp    0x400cd0 <worker+96>


96 107 之间的代码有什么作用?
我不认为内置函数会产生无用的代码,所以我想以错误的方式理解我的内容。


它有时工作的内置函数意味着它根本不起作用!



更新:

我已经使用内置版重新测试了它的实现。它可以工作。


我该怎么办?

0 个答案:

没有答案