Question

运行一个多线程程序，我注意到程序运行速度比使用1个线程更快，而4个线程，尽管CPU有4个核心。

经过调查，我发现这个问题只在洗牌时出现。

在我为重现问题而创建的最小程序之下：

SELECT * 
FROM   unnest(
    ARRAY[1, 2, 3]
   ,ARRAY[2, 3, 4]
) as U(array_1_item , array_2_item );

打印：

#include <math.h>
#include <future>
#include <ctime>
#include <vector>
#include <iostream>
#include <algorithm>

#define NB_JOBS 5000.0
#define MAX_CORES 8

static bool _no_shuffle(int nb_jobs){
  bool b=false;
  for(int i=0;i<nb_jobs;i++){
    std::vector<float> v;
    for(float i=0;i<100.0;i+=0.01) v.push_back(i);
    float sum = 0;
    // no meaning, just using CPU
    for(int i=0;i<v.size();i++) sum+=pow(sin(v[i]),1.1);
    if(sum==100) b=true;
  }
  return b;

}

static bool _shuffle(int nb_jobs){
  bool b=false;
  for(int i=0;i<nb_jobs;i++){
    std::vector<float> v;
    for(float i=0;i<100.0;i+=0.01) v.push_back(i);
    std::random_shuffle(v.begin(), v.end()); // !!! 
    if (v[0]==0.0) b=true;
  }
  return b;
}

static double _duration(int nb_cores){

  auto start = std::chrono::system_clock::now();

  int nb_jobs_per_core = rint ( NB_JOBS / (float)nb_cores );

  std::vector < std::future<bool> > futures;
  for(int i=0;i<nb_cores;i++){
    futures.push_back( std::async(std::launch::async,_shuffle,nb_jobs_per_core));
  }
  for (auto &e: futures) {
    bool foo = e.get();
  }

  auto end = std::chrono::system_clock::now();
  std::chrono::duration<double> elapsed = end - start;

  return elapsed.count();

}


int main(){

  for(int nb_cores=1 ; nb_cores<=MAX_CORES ; nb_cores++){

    double duration = _duration(nb_cores);
    std::cout << nb_cores << " threads: " << duration << " seconds\n";

  }

  return 0;

}

使用线程会减慢程序的速度！

另一方面，更换时：

1 threads: 1.18503 seconds
2 threads: 9.6502 seconds
3 threads: 3.64973 seconds
4 threads: 9.8834 seconds
5 threads: 10.7937 seconds
6 threads: 11.6447 seconds
7 threads: 11.9236 seconds
8 threads: 12.1254 seconds

使用：

std::async(std::launch::async,_shuffle,nb_jobs_per_core));

然后：

std::async(std::launch::async,_no_shuffle,nb_jobs_per_core));

这看起来像预期的那样，表明洗牌确实是个问题。

洗牌不是线程友好的，如果是，那么如何在多线程程序中对一个向量进行洗牌？

c ++ async：如何在多线程上下文中混淆一个向量？

0 个答案: