为什么即使使用锁定,多线程版本的数组元素总和也会更快?

时间:2017-08-27 06:17:38

标签: c++ multithreading

我正在比较两个版本的数组元素的计算总和:序列化和并行化。对于并行化版本,我在函数的开头获取一个计算部分和的锁。由于它将使顺序计算部分和,我希望并行版本比串行版本慢。但是,当数组大小与线程池大小的比率较高时,并行化版本更快。你能解释一下原因吗?

#include "stdafx.h"
#include <thread>
#include <string>
#include <iostream>
#include <functional>
#include <vector>
#include <cmath>
#include <chrono>
#include <mutex>
#include <random>
#include <atomic>

int total_entries;
float sum;
size_t occupancy;
std::mutex m;

#define SIZE 500000
#define THREAD_POOL 50

void fill_detector(std::vector<float> &det, size_t n = SIZE) {
    std::default_random_engine generator;
    std::uniform_real_distribution<float> distribution(-0.2, 1.0);

    if (!det.empty())
        det.erase(det.begin(), det.end());
    det.reserve(n);

    for (size_t i = 0; i<n; ++i)
        det.push_back(distribution(generator));
}

size_t serial_occupancy(std::vector<float> &det) {
    size_t sum = 0;
    for (size_t i = 0; i < det.size(); ++i) {
        if (det[i] > 0.0f) {
            ++sum;
        }
    }
    return sum;
}

void partial_occupancy(std::vector<float> &det, size_t begin, size_t end) {
    m.lock();
    for (size_t i = begin; i<end; ++i) {
        if (det[i] > 0.0f) {
            ++occupancy;
        }
        //std::cout << i << " ";
    }
    m.unlock();
}

int _tmain(int argc, _TCHAR* argv[])
{
    std::vector<float> det;

    fill_detector(det, SIZE);

    // Serial calculation

    auto start = std::chrono::high_resolution_clock::now();
    std::cout << "Occupancy is " << serial_occupancy(det) << std::endl;

    auto end = std::chrono::high_resolution_clock::now();
    auto duration = end - start;
    std::cout << std::chrono::duration<float, std::milli>(duration).count() << std::endl;

    // Multithreaded calculation
    start = std::chrono::high_resolution_clock::now();
    occupancy = 0;
    std::thread pool[THREAD_POOL];
    size_t chunk = SIZE / THREAD_POOL;


    for (size_t t = 0; t<THREAD_POOL; ++t) {
        pool[t] = std::thread(partial_occupancy, std::ref(det), chunk*t, chunk*(t + 1));
    }


    for (int t = 0; t<THREAD_POOL; ++t)
        pool[t].join();

    std::cout << "Multi-thread occupancy is " << occupancy << std::endl;
    end = std::chrono::high_resolution_clock::now();
    duration = end - start;
    std::cout << std::chrono::duration<float, std::milli>(duration).count() << std::endl;

    return 0;
}

0 个答案:

没有答案