我写了一个函数,它使用 parallel for 用静态调度做一些计算,然后它返回到我的主程序。之后,我再次调用该函数,但这次它永远运行,所以我不得不中止程序。

#include <stdio.h>
#include <stdlib.h>
#include <pthread.h>
#include <omp.h>
#include <time.h>

int thread_count;

void work(int x) {
    int divisor = 0;
    for (int i=1; i<=x; i++) {
        if ((x%i) == 0) {

void initialize(int *codes, int n) {
    thread_count = 4;

    for (int i=0; i<n; i++) {
        codes[i] = rand() % 10000;

double get_difference(double *times, int n) {
    double min, max;
    min = max = times[0];
    for (int i=1; i<n; i++) {
        if (times[i] > max) {
            max = times[i];
        if (times[i] < min) {
            min = times[i];
    return (max-min);

void my_function(int *a, double *times, int n, int thread_count) {
    long i;
    #pragma omp parallel
      #pragma omp parallel for num_threads(thread_count) \
        shared(a, n) private(i) schedule(static, 1)
        for (i=0; i<n; i++) {
        double wtime = omp_get_wtime();
        printf( "Time taken by thread %d is %f\n", omp_get_thread_num(), wtime);
        times[omp_get_thread_num()] = wtime;

void odd_even(int *a, int n) {
   int phase, i, tmp;

   # pragma omp parallel num_threads(thread_count) \
      default(none) shared(a, n) private(i, tmp, phase)
   for (phase = 0; phase < n; phase++) {
      if (phase % 2 == 0)
        # pragma omp for 
         for (i = 1; i < n; i += 2) {
            if (a[i-1] < a[i]) {
               tmp = a[i-1];
               a[i-1] = a[i];
               a[i] = tmp;
         #pragma omp for 
         for (i = 1; i < n-1; i += 2) {
            if (a[i] < a[i+1]) {
               tmp = a[i+1];
               a[i+1] = a[i];
               a[i] = tmp;


int main(int argc, char *argv[]) {
    int n = atoi(argv[1]);
    int arr[n];
    double times[thread_count];
    initialize(arr, n);
    odd_even(arr, n);
    my_function(arr, times, n, thread_count);
    double difference = get_difference(times, thread_count);
    printf("Difference is %f\n", difference);

    // my_function(arr, times, n, thread_count);
    // difference = get_difference(times, thread_count);
    // printf("Difference is %f\n", difference);



我尝试了块大小为 n/thread_count 的块分配和块大小为 1 的块循环分配,但无论哪种方式我都遇到相同的问题。



您的代码存在一些问题,在函数 my_function 中,循环的迭代没有按照您的意愿分配给线程。因为您再次将子句 parallel 添加到 #pragma omp for,并假设您禁用了嵌套并行性,默认情况下,在外部并行区域中创建的每个线程将“顺序”执行该区域内的代码。因此,对于 n = 6number of threads = 4,您将拥有以下代码块:

for (i=0; i<n; i++) {

被执行 6 x 4 = 24 次(循环迭代总数乘以线程总数)。如需更深入的解释,请查看有关类似问题的 SO Thread。不过,下图提供了基本要素的可视化:

因此将 my_function 修正为:

void my_function(int *a, double *times, int n, int thread_count) {
    # pragma omp parallel num_threads(thread_count) shared(a)
       #pragma omp for schedule(static, 1)
       for (long i=0; i<n; i++) {
    double wtime = omp_get_wtime();
    printf( "Time taken by thread %d is %f\n", omp_get_thread_num(), wtime);
    times[omp_get_thread_num()] = wtime;


double times[thread_count];
initialize(arr, n);


initialize(arr, n);
double times[thread_count];

最后一个问题导致了 undefined behavior,这可能会导致无法预料的问题。

另一个您可能知道或不知道的问题是,讽刺的是函数 work 实际上并没有做任何有意义的事情

单独调用 double wtime = omp_get_wtime(); 不会告诉您线程工作了多长时间。 According to the OpenMP documentation


omp_get_wtime 例程以秒为单位返回经过的挂钟时间。


double begin = omp_get_wtime();
// block of code
double end = omp_get_wtime();

并使用表达式 end-begin 获取以秒为单位的时间花费。在你的情况下:

 void my_function(int *a, double *times, int n, int thread_count) {
        # pragma omp parallel num_threads(thread_count) shared(a)
           double begin = omp_get_wtime();
           #pragma omp for schedule(static, 1)
           for (long i=0; i<n; i++) {
           double end = omp_get_wtime();
           double time = end - begin;
           printf( "Time taken by thread %d is %f\n", omp_get_thread_num(), time);
           times[omp_get_thread_num()] = time;