为什么线程池工作缓慢?

时间:2020-08-21 08:03:25

标签: c++ multithreading

我有一个程序可以使用N个线程对给定目录中所有.log文件中的所有单词进行计数。
我写了这样的东西。
ThreadPool.h

#ifndef THREAD_POOL_H
#define THREAD_POOL_H

#include <boost/thread/condition_variable.hpp>
#include <boost/thread.hpp>

#include <future> // I don't how to work with boost future
#include <queue>
#include <vector>
#include <functional>


class ThreadPool
{
public:
    using Task = std::function<void()>; // Our task

    explicit ThreadPool(int num_threads)
    {
        start(num_threads);
    }

    ~ThreadPool()
    {
        stop();
    }

    template<class T>
    auto enqueue(T task)->std::future<decltype(task())>
    {
        // packaged_task wraps any Callable target
        auto wrapper = std::make_shared<std::packaged_task<decltype(task()) ()>>(std::move(task));

        {
            boost::unique_lock<boost::mutex> lock{ mutex_p };
            tasks_p.emplace([=] {
                (*wrapper)();
            });
        }

        event_p.notify_one();

        return wrapper->get_future();
    }

    /*void enqueue(Task task)
    {
        {
            boost::unique_lock<boost::mutex> lock { mutex_p };
            tasks_p.emplace(std::move(task));
            event_p.notify_one();
        }
    }*/

private:
    std::vector<boost::thread> threads_p; // num of threads
    std::queue<Task>           tasks_p;   // Tasks to make
    boost::condition_variable  event_p; 
    boost::mutex               mutex_p;

    bool                       isStop = false;

    void start(int num_threads)
    {
        for (int i = 0; i < num_threads; ++i)
        {
            // Add to the end our thread
            threads_p.emplace_back([=] {
                while (true)
                {
                    // Task to do
                    Task task;

                    {
                        boost::unique_lock<boost::mutex> lock(mutex_p);

                        event_p.wait(lock, [=] { return isStop || !tasks_p.empty(); });

                        // If we make all tasks
                        if (isStop && tasks_p.empty())
                            break;

                        // Take new task from queue
                        task = std::move(tasks_p.front());
                        tasks_p.pop();
                    }

                    // Execute our task
                    task();
                }
            });
        }
    }

    void stop() noexcept
    {
        {
            boost::unique_lock<boost::mutex> lock(mutex_p);
            isStop = true;
        }

        event_p.notify_all();

        for (auto& thread : threads_p)
        {
            thread.join();
        }
    }
};

#endif

main.cpp

#include "ThreadPool.h"

#include <iostream>
#include <iomanip>
#include <Windows.h>
#include <chrono> 

#include <vector>
#include <map>

#include <boost/filesystem.hpp>
#include <boost/thread.hpp>

#include <locale.h>


namespace bfs = boost::filesystem;

//int count_words(boost::filesystem::ifstream& file)
//{
//  int counter = 0;
//  std::string buffer;
//  while (file >> buffer)
//  {
//      ++counter;
//  }
//  
//  return counter;
//}
//
int count_words(boost::filesystem::path filename)
{
    boost::filesystem::ifstream ifs(filename);
    return std::distance(std::istream_iterator<std::string>(ifs), std::istream_iterator<std::string>());
}

int main(int argc, const char* argv[])
{
    std::cin.tie(0);
    std::ios_base::sync_with_stdio(false);

    bfs::path path = argv[1];
    // If this path is exist and if this is dir
    if (bfs::exists(path) && bfs::is_directory(path))
    {
        // Number of threads. Default = 4
        int n = (argc == 3 ? atoi(argv[2]) : 4);
        ThreadPool pool(n);

        // Container to store all filenames and number of words inside them
        //std::map<bfs::path, std::future<int>> all_files_and_sums;
        std::vector<std::future<int>> futures;
        
        auto start = std::chrono::high_resolution_clock::now();

        // Iterate all files in dir
        for (auto& p : bfs::directory_iterator(path)) {
            // Takes only .txt files
            if (p.path().extension() == ".log") {
                // Future for taking value from here
                auto fut = pool.enqueue([p]() {
                    // In this lambda function I count all words in file and return this value
                    int result = count_words(p.path());
                    static int count = 0;
                    ++count;
                    std::ostringstream oss;
                    oss << count << ". TID, " << GetCurrentThreadId() << "\n";
                    std::cout << oss.str();
                    return result;
                });
                // "filename = words in this .txt file"
                futures.emplace_back(std::move(fut));
            }
        }

        int result = 0;

        for (auto& f : futures)
        {
            result += f.get();
        }

        auto stop = std::chrono::high_resolution_clock::now();

        auto duration = std::chrono::duration_cast<std::chrono::seconds>(stop - start);

        std::cout << "Result: " << result << "\n";
 
        std::cout << duration.count() << '\n';
    }
    else
        std::perror("Dir is not exist");
}

变量N为4(线程数)。我的目录中有320个.log文件,我需要在此文件中计算字数。一切正常,但是当变量“ count”为180时-程序停止一会儿,然后继续执行,但速度慢得多。
可能是什么原因? CPU-Xeon e5430(我已经在另一个CPU上测试了该程序-结果相同)。

1 个答案:

答案 0 :(得分:0)

这取决于您如何测量“慢速”,但基本上,您使用的是最差的一种模型:

    在所有线程之间共享
  1. 一个任务队列

这种方法的问题是阻塞了共享队列中的每个线程。

更好的模型是类似的

  1. 任务窃取-您可以尝试创建任务队列专业线程,然后使用 try_lock (不阻止)并启用每个线程“如果没有其他事情要做,则从其他线程的任务中窃取”工作。

很好的 Sean Parent Talk about Concurrency 中对此进行了很好的解释。