Hardware: Darwin Kernel Version 13.2.0: Thu Apr 17 23:03:13 PDT 2014; root:xnu-2422.100.13~1/RELEASE_X86_64 x86_64
atomics.hpp 1 #ifndef ATOMIC_UTILS_H 2 #define ATOMIC_UTILS_H 3 4 #include 5 6 #define BARRIER() __asm__ volatile ( "": : :"memory" ) 7 8 #define CPU_RELAX() __asm__ volatile( "pause\n\t": : :"memory" ) 9 10 #define STORE_FENCE() __asm__ volatile("mfence" ::: "memory"); 11 12 class AtomicUtils 13 { 14 public: 15 16 /** 17 * check if the value at addr is equal to oldval, if so replace it with newva l 18 * and return the oldval 19 */ 20 inline static size_t compareAndExchange( volatile size_t* addr, size_t oldval , size_t newval ) 21 { 22 size_t ret; 23 __asm__ volatile( "lock cmpxchgq %2, %1\n\t" 24 :"=a"(ret), "+m"(*addr) 25 : "r"(newval), "0"(oldval) 26 : "memory" ); 27 return ret; 28 } 29 30 /** 31 * Atomically stores x into addr and returns the previous 32 * stored in addr 33 */ 34 inline static size_t loadAndStore( size_t x, volatile size_t* addr ) 36 { 37 size_t ret; 38 __asm__ volatile( "lock xchgq %1, %0\n\t" 39 : "+m"(*addr), "=r"(ret) 40 : "1"(x) ); 41 return ret; 42 } 43 44 }; 45 46 #endif
mcs.hpp 1 #ifndef MCS_LOCK_H 2 #define MCS_LOCK_H 3 4 #include "atomics.hpp" 5 #include 6 7 class MCSLock 8 { 9 struct mcs_lock_t 10 { 11 mcs_lock_t():next(0), locked(false){} 12 struct mcs_lock_t* next; 13 bool locked; 14 }; 15 16 public: 17 typedef struct mcs_lock_t mcs_lock; 18 19 private: 20 mcs_lock** tail; 21 static boost::thread_specific_ptr tls_node; 22 23 public: 24 MCSLock( mcs_lock** lock_tail ):tail( lock_tail ) 25 { 26 if( tls_node.get() == 0 ) 27 tls_node.reset( new mcs_lock() ); 28 } 29 30 void lock() 31 { 32 mcs_lock* thread_node = tls_node.get(); 33 thread_node->next = 0; 34 thread_node->locked = true; 35 36 volatile mcs_lock* pred = reinterpret_cast( 37 AtomicUtils::loadAndStore( 38 reinterpret_cast( thread_node ), 39 reinterpret_cast( tail ) 40 ) 41 ); 42 if( pred != 0 ) 43 { 44 pred->next = *tail; 45 46 STORE_FENCE(); 47 //BARRIER(); // Required to prevent re ordering between prev->next = tail and thread_node->locked. ( WR harzard ) 48 49 // Spin on a local variable. Someone unlock me plz !! 50 while( thread_node->locked ) 51 CPU_RELAX(); 52 53 } 54 } 55 56 void unlock() 57 { 58 mcs_lock* thread_node = tls_node.get(); 59 if( thread_node->next == 0 ) 60 { 61 // If false, then we a new thread has request for lock. Now release t he lock for the new thread 62 if( 63 AtomicUtils::compareAndExchange( 64 reinterpret_cast( tail ), 65 reinterpret_cast( thread_node ), 66 0 67 ) == reinterpret_cast( thread_node ) 68 ) 69 { 70 return; 71 } 72 73 while( thread_node->next == 0 ) 74 CPU_RELAX(); 75 } 76 77 thread_node->next->locked = false; 78 } 79 }; 80 81 boost::thread_specific_ptr MCSLock::tls_node; 82 #endif
mcs_test.cpp
1 #include "mcs.hpp"
2 #include <iostream>
3 #include <pthread.h>
4 #include <vector>
5 #define NUM_THREADS 16
6 #define NUM_ITERATIONS 100
7
8 std::vector<int> elements;
9 MCSLock::mcs_lock *tail = 0;
10
11 void* thread_run( void* data )
12 {
13 MCSLock lock( &tail );
14 for( int i = 0; i < NUM_ITERATIONS; ++i )
15 {
16 lock.lock();
17 elements.push_back( i );
18 lock.unlock();
19 }
20
21 return 0;
22 }
23
24 int main()
25 {
26 pthread_t threads[ NUM_THREADS ];
27 elements.reserve( NUM_THREADS * NUM_ITERATIONS );
28
29 {
30 for( int i = 0; i < NUM_THREADS; ++i )
31 pthread_create( &threads[i], NULL, thread_run, NULL );
32
33 for( int i = 0; i < NUM_THREADS; ++i )
34 pthread_join( threads[i], NULL );
35
36 std::cout <<"\nExiting main thread: " << std::endl;
37 }
38 }
上面的代码是使用clang
编译的问题:
我看到1或2个线程被卡在第50行的lock()中。除了主线程之外,被锁定在lock()中的线程没有其他线程存活。这意味着当其他线程调用unlock()时,它们不会为其他变量设置locked = false并退出。
有关调试的任何指示吗?
坚持了好几个小时,没有任何线索。
答案 0 :(得分:0)
clang是否有内置这些内联asm块(如gcc的__sync_val_compare_and_swap)?为什么重新发明轮子?
其次,我真的考虑将内存clobber添加到loadAndStore。您需要确保编译器在寄存器中保存的任何写入在执行xchgq之前都会刷新到内存中。同样,它会阻止gcc在xchgq之前优化内存读取。要么是坏事。
第三,我将检查while循环的asm输出(thread_node-&gt; locked和thread_node-&gt; next)。由于这些变量不是易失性的,因此gcc可以将其优化为仅执行一次读取。
这些可能无法解决您的问题,但这就是我开始的地方。