作为一项教育性练习,我正在使用条件变量实现线程池。控制器线程创建一个线程池,等待信号(将原子变量设置为大于零的值)。通知线程唤醒后,执行它们的工作,最后一个线程完成后,它通知主线程唤醒。控制器线程将阻塞,直到最后一个线程完成为止。然后,该池可供以后重用。
有时我总是在控制器线程上超时,以等待工作人员发出完成信号(可能是由于在减少活动工作计数器时出现了竞争情况),因此,为了巩固池,我更换了“带有条件变量“ wait(lck,predicate)”的条件变量的wait方法的wait(lck)形式。由于这样做,线程池的行为似乎允许将活动工作计数器递减到0以下(这是重新唤醒控制器线程的条件)-我有一个竞争条件。我已经阅读了无数有关原子变量,同步,内存排序,堆栈溢出和其他各种站点上的虚假和丢失的唤醒的文章,已经尽我所能将我学到的东西结合了起来,但仍然无法完成我的工作为什么我编码谓词等待的方式不起作用。计数器只能与池中的线程数一样高(例如8),而应低至零。我已经开始对自己失去信心-要做一些根本上简单的事情并不难。显然我还需要在这里学习其他东西:)
当然,考虑到存在竞争状况,我确保了驱动池唤醒和终止的两个变量都是原子的,并且只有在用unique_lock保护的情况下,这两个变量才会更改。具体来说,我确保启动对池的请求时,获得了锁,活动线程计数器从0更改为8,解锁了互斥锁,然后是“ notified_all”。一旦最后一个工作线程将其递减那么远并且达到“ notified_one”,控制器线程就只会在活动线程计数为零的情况下被唤醒。
在工作线程中,仅当活动线程计数大于零时,条件变量才会等待并唤醒,解锁互斥锁,并行执行创建池时预分配给处理器的工作,重新获取互斥体,并自动减少活动线程数。然后,它将在仍然被锁定保护的同时,测试它是否仍然是最后一个处于活动状态的线程,如果是,则再次解锁互斥锁和“ notify_one”以唤醒控制器。
问题是-即使仅执行1或2次迭代,活动线程计数器也将重复执行低于零的操作。如果在新工作负载开始时测试活动线程计数,我会发现活动线程计数大约在-6以下-好像在工作完成之前允许池重新唤醒控制器线程。
鉴于线程计数器和终止标志都是原子变量,并且仅在同一个互斥锁的保护下进行过修改,因此我对所有更新都使用顺序内存排序,我只是看不到这是怎么回事,我迷路了。
#include <stdafx.h>
#include <Windows.h>
#include <iostream>
#include <thread>
using std::thread;
#include <mutex>
using std::mutex;
using std::unique_lock;
#include <condition_variable>
using std::condition_variable;
#include <atomic>
using std::atomic;
#include <chrono>
#include <vector>
using std::vector;
class IWorkerThreadProcessor
{
public:
virtual void Process(int) = 0;
};
class MyProcessor : public IWorkerThreadProcessor
{
int index_ = 0;
public:
MyProcessor(int index)
{
index_ = index;
}
void Process(int threadindex)
{
for (int i = 0; i < 5000000; i++);
std::cout << '(' << index_ << ':' << threadindex << ") ";
}
};
#define MsgBox(x) do{ MessageBox(NULL, x, L"", MB_OK ); }while(false)
class ThreadPool
{
private:
atomic<unsigned int> invokations_ = 0;
//This goes negative when using the wait_for with predicate
atomic<int> threadsActive_ = 0;
atomic<bool> terminateFlag_ = false;
vector<std::thread> threads_;
atomic<unsigned int> poolSize_ = 0;
mutex mtxWorker_;
condition_variable cvSignalWork_;
condition_variable cvSignalComplete_;
public:
~ThreadPool()
{
TerminateThreads();
}
void Init(std::vector<IWorkerThreadProcessor*>& processors)
{
unique_lock<mutex> lck2(mtxWorker_);
threadsActive_ = 0;
terminateFlag_ = false;
poolSize_ = processors.size();
for (int i = 0; i < poolSize_; ++i)
threads_.push_back(thread(&ThreadPool::launchMethod, this, processors[i], i));
}
void ProcessWorkload(std::chrono::milliseconds timeout)
{
//Only used to see how many invocations I was getting through before experiencing the issue - sadly it's only one or two
invocations_++;
try
{
unique_lock<mutex> lck(mtxWorker_);
//!!!!!! If I use the predicated wait this break will fire !!!!!!
if (threadsActive_.load() != 0)
__debugbreak();
threadsActive_.store(poolSize_);
lck.unlock();
cvSignalWork_.notify_all();
lck.lock();
if (!cvSignalComplete_.wait_for(
lck,
timeout,
[this] { return threadsActive_.load() == 0; })
)
{
//As you can tell this has taken me through a journey trying to characterise the issue...
if (threadsActive_ > 0)
MsgBox(L"Thread pool timed out with still active threads");
else if (threadsActive_ == 0)
MsgBox(L"Thread pool timed out with zero active threads");
else
MsgBox(L"Thread pool timed out with negative active threads");
}
}
catch (std::exception e)
{
__debugbreak();
}
}
void launchMethod(IWorkerThreadProcessor* processor, int threadIndex)
{
do
{
unique_lock<mutex> lck(mtxWorker_);
//!!!!!! If I use this predicated wait I see the failure !!!!!!
cvSignalWork_.wait(
lck,
[this] {
return
threadsActive_.load() > 0 ||
terminateFlag_.load();
});
//!!!!!!!! Does not cause the failure but obviously will not handle
//spurious wake-ups !!!!!!!!!!
//cvSignalWork_.wait(lck);
if (terminateFlag_.load())
return;
//Unlock to parallelise the work load
lck.unlock();
processor->Process(threadIndex);
//Re-lock to decrement the work count
lck.lock();
//This returns the value before the subtraction so theoretically if the previous value was 1 then we're the last thread going and we can now signal the controller thread to wake. This is the only place that the decrement happens so I don't know how it could possibly go negative
if (threadsActive_.fetch_sub(1, std::memory_order_seq_cst) == 1)
{
lck.unlock();
cvSignalComplete_.notify_one();
}
else
lck.unlock();
} while (true);
}
void TerminateThreads()
{
try
{
unique_lock<mutex> lck(mtxWorker_);
if (!terminateFlag_)
{
terminateFlag_ = true;
lck.unlock();
cvSignalWork_.notify_all();
for (int i = 0; i < threads_.size(); i++)
threads_[i].join();
}
}
catch (std::exception e)
{
__debugbreak();
}
}
};
int main()
{
std::vector<IWorkerThreadProcessor*> processors;
for (int i = 0; i < 8; i++)
processors.push_back(new MyProcessor(i));
std::cout << "Instantiating thread pool\n";
auto pool = new ThreadPool;
std::cout << "Initialisting thread pool\n";
pool->Init(processors);
std::cout << "Thread pool initialised\n";
for (int i = 0; i < 200; i++)
{
std::cout << "Workload " << i << "\n";
pool->ProcessWorkload(std::chrono::milliseconds(500));
std::cout << "Workload " << i << " complete." << "\n";
}
for (auto a : processors)
delete a;
delete pool;
return 0;
}
答案 0 :(得分:1)
class ThreadPool
{
private:
atomic<unsigned int> invokations_ = 0;
std::atomic<unsigned int> awakenings_ = 0;
std::atomic<unsigned int> startedWorkloads_ = 0;
std::atomic<unsigned int> completedWorkloads_ = 0;
atomic<bool> terminate_ = false;
atomic<bool> stillFiring_ = false;
vector<std::thread> threads_;
atomic<unsigned int> poolSize_ = 0;
mutex mtx_;
condition_variable cvSignalWork_;
condition_variable cvSignalComplete_;
public:
~ThreadPool()
{
TerminateThreads();
}
void Init(std::vector<IWorkerThreadProcessor*>& processors)
{
unique_lock<mutex> lck2(mtx_);
//threadsActive_ = 0;
terminate_ = false;
poolSize_ = processors.size();
for (int i = 0; i < poolSize_; ++i)
threads_.push_back(thread(&ThreadPool::launchMethod, this, processors[i], i));
awakenings_ = 0;
completedWorkloads_ = 0;
startedWorkloads_ = 0;
invokations_ = 0;
}
void ProcessWorkload(std::chrono::milliseconds timeout)
{
try
{
unique_lock<mutex> lck(mtx_);
invokations_++;
if (startedWorkloads_ != 0)
__debugbreak();
if (completedWorkloads_ != 0)
__debugbreak();
if (awakenings_ != 0)
__debugbreak();
if (stillFiring_)
__debugbreak();
stillFiring_ = true;
lck.unlock();
cvSignalWork_.notify_all();
lck.lock();
if (!cvSignalComplete_.wait_for(
lck,
timeout,
//[this] { return this->threadsActive_.load() == 0; })
[this] { return completedWorkloads_ == poolSize_ && !stillFiring_; })
)
{
if (completedWorkloads_ < poolSize_)
{
if (startedWorkloads_ < poolSize_)
MsgBox(L"Thread pool timed out with some threads unstarted");
else if (startedWorkloads_ == poolSize_)
MsgBox(L"Thread pool timed out with all threads started but not all completed");
}
else
__debugbreak();
}
if (completedWorkloads_ != poolSize_)
__debugbreak();
if (awakenings_ != poolSize_)
__debugbreak();
awakenings_ = 0;
completedWorkloads_ = 0;
startedWorkloads_ = 0;
}
catch (std::exception e)
{
__debugbreak();
}
}
void launchMethod(IWorkerThreadProcessor* processor, int threadIndex)
{
do
{
unique_lock<mutex> lck(mtx_);
cvSignalWork_.wait(
lck,
[this] {
return
(stillFiring_ && (startedWorkloads_ < poolSize_)) ||
terminate_;
});
awakenings_++;
if (startedWorkloads_ == 0 && terminate_)
return;
if (stillFiring_ && startedWorkloads_ < poolSize_) //guard against spurious wakeup
{
startedWorkloads_++;
if (startedWorkloads_ == poolSize_)
stillFiring_ = false;
lck.unlock();
processor->Process(threadIndex);
lck.lock();
completedWorkloads_++;
if (completedWorkloads_ == poolSize_)
{
lck.unlock();
cvSignalComplete_.notify_one();
}
else
lck.unlock();
}
else
lck.unlock();
} while (true);
}
void TerminateThreads()
{
try
{
unique_lock<mutex> lck(mtx_);
if (!terminate_) //Don't attempt to double-terminate
{
terminate_ = true;
lck.unlock();
cvSignalWork_.notify_all();
for (int i = 0; i < threads_.size(); i++)
threads_[i].join();
}
}
catch (std::exception e)
{
__debugbreak();
}
}
};
答案 1 :(得分:0)
我不确定以下内容是否可以解决问题,但我认为错误如下所示:
此
if (!cvSignalComplete_.wait_for(
lck,
timeout,
[this] { return threadsActive_.load() == 0; })
)
应替换为
if (!cvSignalComplete_.wait_for(
lck,
timeout,
[&] { return threadsActive_.load() == 0; })
)
好像lambda不在访问类的实例化成员。这是支持我的情况的一些参考。请查看此page的Lambda Capture部分。
编辑: 您正在使用的另一个地方等待lambda。
cvSignalWork_.wait(
lck,
[this] {
return
threadsActive_.load() > 0 ||
terminateFlag_.load();
});
也许修改所有的lambda,然后查看是否有效?
我正在查看lambda的原因是因为它似乎类似于虚假唤醒的情况。希望对您有所帮助。