我正在尝试实现一个无锁堆栈,可以从有界普通c数组中使用外部托管内存。我知道参考实现(如来自Anthony Williams:Concurrency in Action)以及其他书籍和博客/网络文章。
实现遵循这些引用并避免ABA问题,因为外部存储器位置使用唯一索引而不是循环指针来寻址。因此,它根本不需要处理mem管理而且很简单。
我编写了一些测试,在高负载和争用(压力测试)和单线程下对该堆栈执行pop和push操作。前者因奇怪的问题而失败,我不明白,而且看起来模糊不清。
也许有人有想法?
问题:将已经弹出的节点推回堆栈失败,因为违反了前提条件,该节点没有后继节点(下一个)。
BOOST_ASSERT(!m_aData.m_aNodes[nNode-1].next);
再现设置:至少3个线程,容量为~16。大约500次传球。然后推操作失败。
问题:所有线程弹出的元素数和连接后堆栈中剩余的元素数与容量不匹配(转换中节点丢失)。
BOOST_ASSERT(aNodes.size()+nPopped == nCapacity);
再现设置:2个线程和容量2.需要大量的传递,对我来说至少700.在堆栈头为0之后,但弹出的容器中只有一个节点。节点{2,0}悬空。
我用vs2005,vs2013和vs2015编译。所有都有相同的问题(vs2005也是代码看起来像C ++ 03的原因)。
这是node + stack
的基本代码template <typename sizeT> struct node
{
sizeT cur; //!< construction invariant
atomic<sizeT> next;
atomic<sizeT> data;
explicit node() // invalid node
: cur(0), next(0), data(0)
{}
explicit node(sizeT const& nCur, sizeT const& nNext, sizeT const& nData)
: cur(nCur), next(nNext), data(nData)
{}
node& operator=(node const& rhs)
{
cur = rhs.cur;
next.store(rhs.next.load(memory_order_relaxed));
data.store(rhs.data.load(memory_order_relaxed));
return *this;
}
};
template <typename sizeT> struct stack
{
private:
static memory_order const relaxed = memory_order_relaxed;
atomic<sizeT> m_aHead;
public:
explicit stack(sizeT const& nHead) : m_aHead(nHead) {}
template <typename tagT, typename T, std::size_t N>
typename enable_if<is_same<tagT,Synchronized>,sizeT>::type
pop(T (&aNodes)[N])
{
sizeT nOldHead = m_aHead.load();
for(;;)
{
if(!nOldHead) return 0;
BOOST_ASSERT(nOldHead <= N);
T& aOldHead = aNodes[nOldHead-1];
sizeT const nNewHead = aOldHead.next.load(/*relaxed*/);
BOOST_ASSERT(nNewHead <= N);
sizeT const nExpected = nOldHead;
if(m_aHead.compare_exchange_weak(nOldHead,nNewHead
/*,std::memory_order_acquire,std::memory_order_relaxed*/))
{
BOOST_ASSERT(nExpected == nOldHead);
// <--- from here on aOldHead is thread local ---> //
aOldHead.next.store(0 /*,relaxed*/);
return nOldHead;
}
// TODO: add back-off strategy under contention (use loop var)
}
}
template <typename tagT, typename T, std::size_t N>
typename enable_if<is_same<tagT,Synchronized>,void>::type
push(T (&aNodes)[N], sizeT const& nNewHead)
{
#ifndef NDEBUG
{
BOOST_ASSERT(0 < nNewHead && nNewHead <= N);
sizeT const nNext = aNodes[nNewHead-1].next;
BOOST_ASSERT(!nNext);
}
#endif
sizeT nOldHead = m_aHead.load(/*relaxed*/);
for(;;)
{
aNodes[nNewHead-1].next.store(nOldHead /*,relaxed*/);
sizeT const nExpected = nOldHead;
BOOST_ASSERT(nOldHead <= N);
if(m_aHead.compare_exchange_weak(nOldHead,nNewHead
/*,std::memory_order_release,std::memory_order_relaxed*/))
{
BOOST_ASSERT(nExpected == nOldHead);
return;
}
// TODO: add back-off strategy under contention (use loop var)
}
}
};
和相当嘈杂的测试类
class StackTest
{
private:
typedef boost::mpl::size_t<64> Capacity;
//typedef boost::uint_t<static_log2_ceil<Capacity::value>::value>::least size_type;
typedef std::size_t size_type;
static size_type const nCapacity = Capacity::value;
static size_type const nNodes = Capacity::value;
typedef node<size_type> Node;
typedef stack<size_type> Stack;
typedef mt19937 Twister;
typedef random::uniform_int_distribution<std::size_t> Distribution;
typedef variate_generator<Twister,Distribution> Die;
struct Data //!< shared along threads
{
Node m_aNodes[nNodes];
Stack m_aStack;
explicit Data() : m_aStack(nNodes)
{
m_aNodes[0] = Node(1,0,0); // tail of stack
for(size_type i=1; i<nNodes; ++i)
{
m_aNodes[i] = Node(static_cast<size_type>(i+1),i,0);
}
}
template <typename syncT>
void Run(
uuids::random_generator& aUUIDGen,
std::size_t const& nPasses,
std::size_t const& nThreads)
{
std::vector<ThreadLocalData> aThreadLocalDatas(nThreads,ThreadLocalData(*this));
{
static std::size_t const N = 100000;
Die aRepetition(Twister(hash_value(aUUIDGen())),Distribution(0,N));
Die aAction(Twister(hash_value(aUUIDGen())),Distribution(0,1));
for(std::size_t i=0; i<nThreads; ++i)
{
std::vector<bool>& aActions = aThreadLocalDatas[i].m_aActions;
std::size_t const nRepetition = aRepetition();
aActions.reserve(nRepetition);
for(std::size_t k=0; k<nRepetition; ++k)
{
aActions.push_back(static_cast<bool>(aAction()));
}
}
}
std::size_t nPopped = 0;
if(nThreads == 1)
{
std::size_t const i = 0;
aThreadLocalDatas[i].Run<syncT>(i);
nPopped += aThreadLocalDatas[i].m_aPopped.size();
}
else
{
std::vector<boost::shared_ptr<thread> > aThreads;
aThreads.reserve(nThreads);
for(std::size_t i=0; i<nThreads; ++i)
{
aThreads.push_back(boost::make_shared<thread>(boost::bind(&ThreadLocalData::Run<syncT>,&aThreadLocalDatas[i],i)));
}
for(std::size_t i=0; i<nThreads; ++i)
{
aThreads[i]->join();
nPopped += aThreadLocalDatas[i].m_aPopped.size();
}
}
std::vector<size_type> aNodes;
aNodes.reserve(nCapacity);
while(size_type const nNode = m_aStack.pop<syncT>(m_aNodes))
{
aNodes.push_back(nNode);
}
std::clog << dump(m_aNodes,4) << std::endl;
BOOST_ASSERT(aNodes.size()+nPopped == nCapacity);
}
};
struct ThreadLocalData //!< local to each thread
{
Data& m_aData; //!< shared along threads
std::vector<bool> m_aActions; //!< either pop or push
std::vector<size_type> m_aPopped; //!< popp'ed nodes
explicit ThreadLocalData(Data& aData)
: m_aData(aData), m_aActions(), m_aPopped()
{
m_aPopped.reserve(nNodes);
}
template <typename syncT>
void Run(std::size_t const& k)
{
BOOST_FOREACH(bool const& aAction, m_aActions)
{
if(aAction)
{
if(size_type const nNode = m_aData.m_aStack.pop<syncT>(m_aData.m_aNodes))
{
BOOST_ASSERT(!m_aData.m_aNodes[nNode-1].next);
m_aPopped.push_back(nNode);
}
}
else
{
if(!m_aPopped.empty())
{
size_type const nNode = m_aPopped.back();
size_type const nNext = m_aData.m_aNodes[nNode-1].next;
ASSERT_IF(!nNext,"nNext=" << nNext << " for " << m_aData.m_aNodes[nNode-1] << "\n\n" << dump(m_aData.m_aNodes));
m_aData.m_aStack.push<syncT>(m_aData.m_aNodes,nNode);
m_aPopped.pop_back();
}
}
}
}
};
template <typename syncT>
static void PushPop(
uuids::random_generator& aUUIDGen,
std::size_t const& nPasses,
std::size_t const& nThreads)
{
BOOST_ASSERT(nThreads > 0);
BOOST_ASSERT(nThreads == 1 || (is_same<syncT,Synchronized>::value));
std::clog << BOOST_CURRENT_FUNCTION << " with threads=" << nThreads << std::endl;
for(std::size_t nPass=0; nPass<nPasses; ++nPass)
{
std::ostringstream s;
s << " " << nPass << "/" << nPasses << ": ...";
std::clog << s.str() << std::endl;
Data().Run<syncT>(aUUIDGen,nPass,nThreads);
}
}
public:
static void Run()
{
typedef StackTest self_t;
uuids::random_generator aUUIDGen;
static std::size_t const nMaxPasses = 1000;
Die aPasses(Twister(hash_value(aUUIDGen())),Distribution(0,nMaxPasses));
{
//std::size_t const nThreads = 2; // thread::hardware_concurrency()+1;
std::size_t const nThreads = thread::hardware_concurrency()+1;
self_t().PushPop<Synchronized>(aUUIDGen,aPasses(),nThreads);
}
}
};
以下是link下载所有必需文件。
答案 0 :(得分:0)
这两个问题只是ABA问题的另一个方面。
堆叠:{2,1},{1,0}
可能会出现任何问题,因为对节点的访问不再是线程本地的。这包括弹出节点(问题1)或丢失节点(问题2)的下一个意外修改。
head + next需要在一个cmp_exch中修改以避免这个问题。