Question

我试图使用montecarlo方法找到PI的值，并使用并行C代码。我写了serail代码，工作正常。但是并行代码给了我错误的pi值，有时是0或负值

我的代码

 #include <pthread.h>
 #include <stdio.h>
#include <stdlib.h>
#include <math.h>
 #define NUM_THREADS 4 //number of threads
#define TOT_COUNT 10000055 //total number of iterations



void *doCalcs(void *threadid)
{
long longTid;
longTid = (long)threadid;
int tid = (int)longTid; //obtain the integer value of thread id
//using malloc for the return variable in order make
//sure that it is not destroyed once the thread call is finished
float *in_count = (float *)malloc(sizeof(float));
*in_count=0;
unsigned int rand_state = rand();
//get the total number of iterations for a thread
float tot_iterations= TOT_COUNT/NUM_THREADS;
int counter=0;
//calculation
for(counter=0;counter<tot_iterations;counter++){
    //float x = (double)random()/RAND_MAX;
    //float y = (double)random()/RAND_MAX;
    //float result = sqrt((x*x) + (y*y));
    double x = rand_r(&rand_state) / ((double)RAND_MAX + 1) * 2.0 - 1.0;
    double y = rand_r(&rand_state) / ((double)RAND_MAX + 1) * 2.0 - 1.0;
    float result = sqrt((x*x) + (y*y));
    if(result<1){
        *in_count+=1; //check if the generated value is inside a unit circle
    }
}
//get the remaining iterations calculated by thread 0
 if(tid==0){
      float remainder = TOT_COUNT%NUM_THREADS;
      for(counter=0;counter<remainder;counter++){
            float x = (double)random()/RAND_MAX;
         float y = (double)random()/RAND_MAX;
          float result = sqrt((x*x) + (y*y));
        if(result<1){
            *in_count+=1; //check if the generated value is inside a unit circle
        }
    }
}
 }


 int main(int argc, char *argv[])
  {
    pthread_t threads[NUM_THREADS];
   int rc;
   long t;
   void *status;
    float tot_in=0;
     for(t=0;t<NUM_THREADS;t++){
      rc = pthread_create(&threads[t], NULL, doCalcs, (void *)t);
      if (rc){
    printf("ERROR; return code from pthread_create() is %d\n", rc);
    exit(-1);
     }
}
//join the threads
for(t=0;t<NUM_THREADS;t++){
pthread_join(threads[t], &status);
//printf("Return from thread %ld is : %f\n",t, *(float*)status);
tot_in+=*(float*)status; //keep track of the total in count
}
printf("Value for PI is %f \n",1, 4*(tot_in/TOT_COUNT));
/* Last thing that main() should do */
pthread_exit(NULL);
  }

Answer 1

这是@vladon 建议的使用 async 和 future 的解决方案。

#include <iostream>
#include <vector>
#include <random>
#include <future>

using namespace std;

long random_circle_sampling(long n_samples){
    std::random_device rd;  //Will be used to obtain a seed for the random number engine
    std::mt19937 gen(rd()); //Standard mersenne_twister_engine seeded with rd()
    std::uniform_real_distribution<> dis(0.0, 1.0);
    long points_inside = 0;
    for(long i = 0; i < n_samples; ++i){
        double x = dis(gen);
        double y = dis(gen);
        if(x*x + y*y <= 1.0){
            ++points_inside;
        }
    }
    return points_inside;
}

double approximate_pi(long tot_samples, int n_threads){
    long samples_per_thread = tot_samples / n_threads;

    // Used to store the future results
    vector<future<long>> futures;
    for(int t = 0; t < n_threads; ++t){
        // Start a new asynchronous task
        futures.emplace_back(async(launch::async, random_circle_sampling, samples_per_thread));
    }

    long tot_points_inside = 0;
    for(future<long>& f : futures){
        // Wait for the result to be ready
        tot_points_inside += f.get();
    }

    double pi = 4.0 * (double) tot_points_inside / (double) tot_samples;
    return pi;

}


int main() {
    cout.precision(32);

    long tot_samples = 1e6;
    int n_threads = 8;
    double pi = 3.14159265358979323846;
    double approx_pi = approximate_pi(tot_samples, n_threads);
    double abs_diff = abs(pi - approx_pi);
    cout << "pi\t\t" <<pi << endl;
    cout << "approx_pi\t" <<approx_pi << endl;
    cout << "abs_diff\t" <<abs_diff << endl;

    return 0;
}

你可以简单地运行它：

$ g++ -std=c++11 -O3 pi.cpp -o pi && time ./pi
pi              3.1415926535897931159979634685442
approx_pi       3.1427999999999998159694314381341
abs_diff        0.0012073464102066999714679695898667
./pi  0.04s user 0.00s system 27% cpu 0.163 total

Answer 2

你的代码不是C ++，它是坏的，非常糟糕的旧C。

那就是C ++：

#include <cmath>
#include <iostream>
#include <numeric>
#include <random>
#include <thread>
#include <vector>

constexpr auto num_threads = 4; //number of threads
constexpr auto total_count = 10000055; //total number of iterations

void doCalcs(int total_iterations, int & in_count_result)
{
    auto seed = std::random_device{}();
    auto gen = std::mt19937{ seed };
    auto dist = std::uniform_real_distribution<>{0, 1};

    auto in_count{ 0 };

    //calculation
    for (auto counter = 0; counter < total_iterations; ++counter) {
        auto x = dist(gen);
        auto y = dist(gen);
        auto result = std::sqrt(std::pow(x, 2) + std::pow(y, 2));
        if (result < 1) {
            ++in_count; //check if the generated value is inside a unit circle
        }
    }

    in_count_result = in_count;
}

void main()
{
    std::vector<std::thread> threads(num_threads);
    std::vector<int> in_count(num_threads);
    in_count.resize(num_threads);

    for (size_t i = 0; i < num_threads; ++i) {
        int total_iterations = total_count / num_threads;
        if (i == 0) {
            total_iterations += total_count % num_threads; // get the remaining iterations calculated by thread 0
        }

        threads.emplace_back(doCalcs, total_iterations, std::ref(in_count[i]));
    }

    for (auto & thread : threads) {
        if (thread.joinable()) {
            thread.join();
        }
    }

    double pi_value = 4.0 * static_cast<double>(std::accumulate(in_count.begin(), in_count.end(), 0)) / static_cast<double>(total_count);
    std::cout << "Value of PI is: " << pi_value << std::endl;
}

P.S。它也不是那么好，请阅读future，promise和std::async。

使用蒙特卡罗方法多线程计算pi值

2 个答案: