C ++无锁堆栈已损坏

时间:2016-06-10 11:01:59

标签: c++ stack atomic lock-free corrupt

我正在尝试实现一个无锁堆栈,可以从有界普通c数组中使用外部托管内存。我知道参考实现(如来自Anthony Williams:Concurrency in Action)以及其他书籍和博客/网络文章。

实现遵循这些引用并避免ABA问题,因为外部存储器位置使用唯一索引而不是循环指针来寻址。因此,它根本不需要处理mem管理而且很简单。

我编写了一些测试,在高负载和争用(压力测试)和单线程下对该堆栈执行pop和push操作。前者因奇怪的问题而失败,我不明白,而且看起来模糊不清。

也许有人有想法?

  1. 问题:将已经弹出的节点推回堆栈失败,因为违反了前提条件,该节点没有后继节点(下一个)。

    BOOST_ASSERT(!m_aData.m_aNodes[nNode-1].next);
    

    再现设置:至少3个线程,容量为~16。大约500次传球。然后推操作失败。

  2. 问题:所有线程弹出的元素数和连接后堆栈中剩余的元素数与容量不匹配(转换中节点丢失)。

    BOOST_ASSERT(aNodes.size()+nPopped == nCapacity);
    

    再现设置:2个线程和容量2.需要大量的传递,对我来说至少700.在堆栈头为0之后,但弹出的容器中只有一个节点。节点{2,0}悬空。

  3. 我用vs2005,vs2013和vs2015编译。所有都有相同的问题(vs2005也是代码看起来像C ++ 03的原因)。

    这是node + stack

    的基本代码
    template <typename sizeT> struct node
    {
      sizeT         cur;  //!< construction invariant
      atomic<sizeT> next;
      atomic<sizeT> data;
    
      explicit node() // invalid node
        : cur(0), next(0), data(0)
      {}
    
      explicit node(sizeT const& nCur, sizeT const& nNext, sizeT const& nData)
        : cur(nCur), next(nNext), data(nData)
      {}
    
      node& operator=(node const& rhs)
      {
        cur  = rhs.cur;
        next.store(rhs.next.load(memory_order_relaxed));
        data.store(rhs.data.load(memory_order_relaxed));
        return *this;
      }
    };
    
    template <typename sizeT> struct stack
    {
    private:
      static memory_order const relaxed = memory_order_relaxed;
      atomic<sizeT> m_aHead;
    
    public:
      explicit stack(sizeT const& nHead) : m_aHead(nHead) {}
    
      template <typename tagT, typename T, std::size_t N>
      typename enable_if<is_same<tagT,Synchronized>,sizeT>::type
      pop(T (&aNodes)[N])
      {
        sizeT nOldHead = m_aHead.load();
    
        for(;;)
        {
          if(!nOldHead) return 0;
    
          BOOST_ASSERT(nOldHead <= N);
          T& aOldHead = aNodes[nOldHead-1];
          sizeT const nNewHead = aOldHead.next.load(/*relaxed*/);
          BOOST_ASSERT(nNewHead <= N);
          sizeT const nExpected = nOldHead;
    
          if(m_aHead.compare_exchange_weak(nOldHead,nNewHead
            /*,std::memory_order_acquire,std::memory_order_relaxed*/))
          {
            BOOST_ASSERT(nExpected == nOldHead);
    
            // <--- from here on aOldHead is thread local ---> //
            aOldHead.next.store(0 /*,relaxed*/);
    
            return nOldHead;
          }
    
          // TODO: add back-off strategy under contention (use loop var)
        }
      }
    
      template <typename tagT, typename T, std::size_t N>
      typename enable_if<is_same<tagT,Synchronized>,void>::type
      push(T (&aNodes)[N], sizeT const& nNewHead)
      {
    #ifndef NDEBUG
        {
          BOOST_ASSERT(0 < nNewHead && nNewHead <= N);
          sizeT const nNext = aNodes[nNewHead-1].next;
          BOOST_ASSERT(!nNext);
        }
    #endif
    
        sizeT nOldHead = m_aHead.load(/*relaxed*/);
    
        for(;;)
        {
          aNodes[nNewHead-1].next.store(nOldHead /*,relaxed*/);
          sizeT const nExpected = nOldHead;
          BOOST_ASSERT(nOldHead <= N);
    
          if(m_aHead.compare_exchange_weak(nOldHead,nNewHead
            /*,std::memory_order_release,std::memory_order_relaxed*/))
          {
            BOOST_ASSERT(nExpected == nOldHead);
            return;
          }
    
          // TODO: add back-off strategy under contention (use loop var)
        }
      }
    };
    

    和相当嘈杂的测试类

    class StackTest
    {
    private:
    
      typedef boost::mpl::size_t<64> Capacity;
      //typedef boost::uint_t<static_log2_ceil<Capacity::value>::value>::least    size_type;
      typedef std::size_t size_type;
    
      static size_type const nCapacity = Capacity::value;
      static size_type const nNodes = Capacity::value;
    
      typedef node<size_type>  Node;
      typedef stack<size_type> Stack;
    
      typedef mt19937                                        Twister;
      typedef random::uniform_int_distribution<std::size_t>  Distribution;
      typedef variate_generator<Twister,Distribution>        Die;
    
      struct Data //!< shared along threads
      {
        Node  m_aNodes[nNodes];
        Stack m_aStack;
    
        explicit Data() : m_aStack(nNodes)
        {
          m_aNodes[0] = Node(1,0,0); // tail of stack
    
          for(size_type i=1; i<nNodes; ++i)
          {
            m_aNodes[i] = Node(static_cast<size_type>(i+1),i,0);
          }
        }
    
        template <typename syncT>
        void Run(
          uuids::random_generator& aUUIDGen,
          std::size_t const&       nPasses,
          std::size_t const&       nThreads)
        {
          std::vector<ThreadLocalData>  aThreadLocalDatas(nThreads,ThreadLocalData(*this));
    
          {
            static std::size_t const N = 100000;
            Die aRepetition(Twister(hash_value(aUUIDGen())),Distribution(0,N));
            Die aAction(Twister(hash_value(aUUIDGen())),Distribution(0,1));
    
            for(std::size_t i=0; i<nThreads; ++i)
            {
              std::vector<bool>& aActions = aThreadLocalDatas[i].m_aActions;
              std::size_t const nRepetition = aRepetition();
              aActions.reserve(nRepetition);
    
              for(std::size_t k=0; k<nRepetition; ++k)
              {
                aActions.push_back(static_cast<bool>(aAction()));
              }
            }
          }
    
          std::size_t nPopped = 0;
    
          if(nThreads == 1)
          {
            std::size_t const i = 0;
            aThreadLocalDatas[i].Run<syncT>(i);
            nPopped += aThreadLocalDatas[i].m_aPopped.size();
          }
          else
          {
            std::vector<boost::shared_ptr<thread> > aThreads;
            aThreads.reserve(nThreads);
    
            for(std::size_t i=0; i<nThreads; ++i)
            {
              aThreads.push_back(boost::make_shared<thread>(boost::bind(&ThreadLocalData::Run<syncT>,&aThreadLocalDatas[i],i)));
            }
    
            for(std::size_t i=0; i<nThreads; ++i)
            {
              aThreads[i]->join();
              nPopped += aThreadLocalDatas[i].m_aPopped.size();
            }
          }
    
          std::vector<size_type> aNodes;
          aNodes.reserve(nCapacity);
    
          while(size_type const nNode = m_aStack.pop<syncT>(m_aNodes))
          {
            aNodes.push_back(nNode);
          }
    
          std::clog << dump(m_aNodes,4) << std::endl;
    
          BOOST_ASSERT(aNodes.size()+nPopped == nCapacity);
        }
      };
    
    
      struct ThreadLocalData //!< local to each thread
      {
        Data&                  m_aData;    //!< shared along threads
        std::vector<bool>      m_aActions; //!< either pop or push
        std::vector<size_type> m_aPopped;   //!< popp'ed nodes
    
        explicit ThreadLocalData(Data& aData)
          : m_aData(aData), m_aActions(), m_aPopped()
        {
          m_aPopped.reserve(nNodes);
        }
    
        template <typename syncT>
        void Run(std::size_t const& k)
        {
          BOOST_FOREACH(bool const& aAction, m_aActions)
          {
            if(aAction)
            {
              if(size_type const nNode = m_aData.m_aStack.pop<syncT>(m_aData.m_aNodes))
              {
                BOOST_ASSERT(!m_aData.m_aNodes[nNode-1].next);
                m_aPopped.push_back(nNode);
              }
            }
            else
            {
              if(!m_aPopped.empty())
              {
                size_type const nNode = m_aPopped.back();
                size_type const nNext = m_aData.m_aNodes[nNode-1].next;
                ASSERT_IF(!nNext,"nNext=" << nNext << " for " << m_aData.m_aNodes[nNode-1] << "\n\n" << dump(m_aData.m_aNodes));
                m_aData.m_aStack.push<syncT>(m_aData.m_aNodes,nNode);
                m_aPopped.pop_back();
              }
            }
          }
        }
      };
    
    
      template <typename syncT>
      static void PushPop(
        uuids::random_generator& aUUIDGen,
        std::size_t const&       nPasses,
        std::size_t const&       nThreads)
      {
        BOOST_ASSERT(nThreads > 0);
        BOOST_ASSERT(nThreads == 1 || (is_same<syncT,Synchronized>::value));
    
        std::clog << BOOST_CURRENT_FUNCTION << " with threads=" << nThreads << std::endl;
    
        for(std::size_t nPass=0; nPass<nPasses; ++nPass)
        {
          std::ostringstream s;
          s << "  " << nPass << "/" << nPasses << ": ...";
          std::clog << s.str() << std::endl;
    
          Data().Run<syncT>(aUUIDGen,nPass,nThreads);
        }
      }
    
    public:
    
      static void Run()
      {
        typedef StackTest self_t;
    
        uuids::random_generator aUUIDGen;
    
        static std::size_t const nMaxPasses = 1000;
        Die aPasses(Twister(hash_value(aUUIDGen())),Distribution(0,nMaxPasses));
    
        {
         //std::size_t const nThreads = 2; // thread::hardware_concurrency()+1;
          std::size_t const nThreads = thread::hardware_concurrency()+1;
          self_t().PushPop<Synchronized>(aUUIDGen,aPasses(),nThreads);
        }
      }
    };
    

    以下是link下载所有必需文件。

1 个答案:

答案 0 :(得分:0)

这两个问题只是ABA问题的另一个方面。

堆叠:{2,1},{1,0}

  1. 线程A
    1. 弹出
      new_head = 1
      ...时间片超过了
  2. 线程B
    1. 弹出
      堆栈:{1,0},弹出:{2,0}
    2. 弹出
      stack:{},popped:{2,0},{1,0}
    3. 推({2,0})
      堆栈:{2,0}
  3. 线程A
    1. 流行继续
      cmp_exch成功,因为头是2 stack:{},head = 1 --- WRONG,0将是正确的
  4. 可能会出现任何问题,因为对节点的访问不再是线程本地的。这包括弹出节点(问题1)或丢失节点(问题2)的下一个意外修改。

    head + next需要在一个cmp_exch中修改以避免这个问题。