Question

Hardware:
Darwin Kernel Version 13.2.0: Thu Apr 17 23:03:13 PDT 2014; root:xnu-2422.100.13~1/RELEASE_X86_64 x86_64

  atomics.hpp

  1 #ifndef ATOMIC_UTILS_H
  2 #define ATOMIC_UTILS_H
  3
  4 #include
  5
  6 #define BARRIER() __asm__ volatile ( "": : :"memory" )
  7
  8 #define CPU_RELAX() __asm__ volatile( "pause\n\t": : :"memory" )
  9
 10 #define STORE_FENCE() __asm__ volatile("mfence" ::: "memory");
 11
 12 class AtomicUtils
 13 {
 14     public:
 15
 16     /**
 17      * check if the value at addr is equal to oldval, if so replace it with newva    l
 18      * and return the oldval
 19      */
 20     inline static size_t compareAndExchange( volatile size_t* addr, size_t oldval    , size_t newval )
 21     {
 22       size_t ret;
 23       __asm__ volatile( "lock cmpxchgq %2, %1\n\t"
 24                     :"=a"(ret), "+m"(*addr)
 25                     : "r"(newval), "0"(oldval)
 26                     : "memory" );
 27       return ret;
 28     }
 29
 30     /**
 31      * Atomically stores x into addr and returns the previous
 32      * stored in addr
 33      */
 34   inline static size_t loadAndStore( size_t x, volatile size_t* addr )
 36     {
 37       size_t ret;
 38       __asm__ volatile( "lock xchgq %1, %0\n\t"
 39                         : "+m"(*addr), "=r"(ret)
 40                         : "1"(x) );
 41       return ret;
 42     }
 43
 44 };
 45
 46 #endif

  mcs.hpp

  1 #ifndef MCS_LOCK_H
  2 #define MCS_LOCK_H
  3
  4 #include "atomics.hpp"
  5 #include 
  6
  7 class MCSLock
  8 {
  9     struct mcs_lock_t
 10     {
 11         mcs_lock_t():next(0), locked(false){}
 12         struct mcs_lock_t* next;
 13         bool locked;
 14     };
 15
 16     public:
 17     typedef struct mcs_lock_t mcs_lock;
 18
 19     private:
 20     mcs_lock** tail;
 21     static boost::thread_specific_ptr tls_node;
 22
 23     public:
 24     MCSLock( mcs_lock** lock_tail ):tail( lock_tail )
 25     {
 26       if( tls_node.get() == 0 )
 27           tls_node.reset( new mcs_lock() );
 28     }
 29
 30     void lock()
 31     {
 32       mcs_lock* thread_node = tls_node.get();
 33       thread_node->next = 0;
 34       thread_node->locked = true;
 35
 36       volatile mcs_lock* pred = reinterpret_cast(
 37                            AtomicUtils::loadAndStore(
 38                                reinterpret_cast( thread_node ),
 39                                reinterpret_cast( tail )
 40                            )
 41                        );
 42       if( pred != 0 )
 43       {
 44         pred->next = *tail;
 45
 46         STORE_FENCE();
 47         //BARRIER(); // Required to prevent re ordering between prev->next = tail     and thread_node->locked. ( WR harzard )
 48
 49         // Spin on a local variable. Someone unlock me plz !!
 50         while( thread_node->locked )
 51             CPU_RELAX();
 52
 53       }
 54     }
 55
 56     void unlock()
 57     {
 58         mcs_lock* thread_node = tls_node.get();
 59         if( thread_node->next == 0 )
 60         {
 61             // If false, then we a new thread has request for lock. Now release t    he lock for the new thread
 62             if(
 63                     AtomicUtils::compareAndExchange(
 64                         reinterpret_cast( tail ),
 65                         reinterpret_cast( thread_node ),
 66                         0
 67                     )  == reinterpret_cast( thread_node ) 68             )
 69             {
 70                 return;
 71             }
 72
 73             while( thread_node->next == 0 )
 74                 CPU_RELAX();
 75         }
 76
 77         thread_node->next->locked = false;
 78     }
 79 };
 80
 81 boost::thread_specific_ptr MCSLock::tls_node;
 82 #endif

mcs_test.cpp

  1 #include "mcs.hpp"
  2 #include <iostream>
  3 #include <pthread.h>
  4 #include <vector>
  5 #define NUM_THREADS 16
  6 #define NUM_ITERATIONS 100
  7
  8 std::vector<int> elements;
  9 MCSLock::mcs_lock *tail = 0;
 10
 11 void* thread_run( void* data )
 12 {
 13   MCSLock lock( &tail );
 14   for( int i = 0; i < NUM_ITERATIONS; ++i )
 15   {
 16       lock.lock();
 17       elements.push_back( i );
 18       lock.unlock();
 19   }
 20
 21   return 0;
 22 }
 23
 24 int main()
 25 {
 26     pthread_t threads[ NUM_THREADS ];
 27     elements.reserve( NUM_THREADS * NUM_ITERATIONS );
 28
 29     {
 30         for( int i = 0; i < NUM_THREADS; ++i )
 31             pthread_create( &threads[i], NULL, thread_run, NULL );
 32
 33         for( int i = 0; i < NUM_THREADS; ++i )
 34             pthread_join( threads[i], NULL );
 35
 36         std::cout <<"\nExiting main thread: " << std::endl;
 37     }
 38 }

上面的代码是使用clang

编译的

问题：

我看到1或2个线程被卡在第50行的lock（）中。除了主线程之外，被锁定在lock（）中的线程没有其他线程存活。这意味着当其他线程调用unlock（）时，它们不会为其他变量设置locked = false并退出。

有关调试的任何指示吗？

坚持了好几个小时，没有任何线索。

Answer 1

clang是否有内置这些内联asm块（如gcc的__sync_val_compare_and_swap）？为什么重新发明轮子？

其次，我真的考虑将内存clobber添加到loadAndStore。您需要确保编译器在寄存器中保存的任何写入在执行xchgq之前都会刷新到内存中。同样，它会阻止gcc在xchgq之前优化内存读取。要么是坏事。

第三，我将检查while循环的asm输出（thread_node-＆gt; locked和thread_node-＆gt; next）。由于这些变量不是易失性的，因此gcc可以将其优化为仅执行一次读取。

这些可能无法解决您的问题，但这就是我开始的地方。

MCS锁实现中的死锁

1 个答案: