Question

我看到的openmp的所有教程示例都是为for循环创建线程。但我需要为普通的语句组创建线程，这些语句可以聚集到函数中。例如，如下所示：

#include <stdio.h>
#include <omp.h>
int A() { printf("in A:%d\n", omp_get_thread_num()); }
int B() { printf("in B:%d\n", omp_get_thread_num()); }
int D() { printf("in D:%d\n", omp_get_thread_num()); }
int E() { printf("in E:%d\n", omp_get_thread_num()); }
int F() { printf("in F:%d\n", omp_get_thread_num()); }
int G() { printf("in G:%d\n", omp_get_thread_num()); }
int H() { printf("in H:%d\n", omp_get_thread_num()); }
int C() {
    printf("in C:%d\n", omp_get_thread_num());
    #pragma omp parallel num_threads(2)
    {
        D(); // want to execute D,E in separate threads
        E();
    }
    F();
}
main() {
    omp_set_nested(1);
    printf("in main:%d\n", omp_get_thread_num());
    G();
    #pragma omp parallel num_threads(3)
    {
        A(); // want to execute A,B,C in separate threads
        B();
        C();
    }
    H();
}

在上面的代码中，我希望每个函数只执行一次，但是在不同的线程中。（所以我在上面的代码中使用指令可能会出错，请根据需要更正。）

如何用openmp编写这种嵌套的函数并行性？这些函数是否会共享所有可用的全局变量，或者有没有办法指定哪些变量将由哪些函数共享？

编辑：在阅读了下面的Jorge Bellon的回答之后，我编写了以下代码，其输出显示在代码之后。看起来thread-0被用于许多函数，这不是我想要的 - 我希望函数并行执行。另外，我只希望G执行一次，所以看起来我必须删除＆＃34; num_threads（3）＆＃34;线。让我知道这个问题的解决方法是什么。

// compile this with: g++ -fopenmp
int A() { printf("in H:%d\n", omp_get_thread_num()); sleep(1); }
// similarly for B, D, E, F, G, H
int C() {
    printf("in C:%d\n", omp_get_thread_num()); sleep(1);
    #pragma omp task
    D();
    #pragma omp task
    E();
    #pragma omp taskwait
    F(); sleep(1);
}
main() {
    omp_set_nested(1);
    printf("in main:%d\n", omp_get_thread_num());
    #pragma omp parallel num_threads(3)
    G();
    #pragma omp task
    A();
    #pragma omp task
    B();
    #pragma omp task
    C();
    #pragma omp taskwait
    H();
}
// outputs:
in main:0
in G:1
in G:0
in G:2
in A:0
in B:0
in C:0
in D:0
in E:0
in F:0
in H:0

Answer 1

并行化此类代码的最佳方法是使用OpenMP task构造。您的并行区域将创建一个线程池，一个主线程将创建外部任务，其余线程将在它们可用后立即处理这些任务。

// [...]

int C() {
  // You can create tasks within tasks
  // In this example is better to place {D,E} and {E} in tasks
  // and omit the task construct of C function call
  #pragma omp task
  {
    D();
    E();
  }
  // if F() needs D and E to finish, a taskwait is necessary
  F();
}

main() {
  // omp_set_nested no longer necessary
  printf("in main:%d\n", omp_get_thread_num());
  G();
  #pragma omp parallel num_threads(3)
  #pragma omp single
  {
    // a single thread creates the tasks
    // other threads in the team will be able to execute them
    // want to execute A,B,C in separate threads
    #pragma omp task
    A();
    #pragma omp task
    B();
    #pragma omp task
    C();
    // wait until all the tasks have been finished
    #pragma omp taskwait
  }
  H();
}

每个函数是否在不同的线程中执行完全取决于运行时程序的状态。这意味着如果所有其他线程都忙，某些任务可能在同一个线程中执行，这不是特别的问题。

您可以使用task dependences（从OpenMP 4开始）控制是否允许任务在创建时继续执行。

Answer 2

以下解决方案是使用c ++ 11线程实现的。详细的openmp版本仍有待制定。

// compile this with: g++ -pthread -std=gnu++0x
#include <stdio.h>
#include <unistd.h> // for sleep
#include <thread>
#include <iostream>
#include <sstream>
using namespace std;
int A() { stringstream ss; ss << this_thread::get_id();
          printf("in A:%s\n", ss.str().c_str()); sleep(1); }
// similarly for B, D, E, F, G, H
int C() {
    stringstream ss; ss << this_thread::get_id();
    printf("in C:%s\n", ss.str().c_str()); sleep(1);
    std::thread thread_1(D);
    std::thread thread_2(E);
    thread_1.join();
    thread_2.join();
    F(); sleep(1);
}
main() {
    printf("in main\n");
    G();
    std::thread thread_1(A);
    std::thread thread_2(B);
    std::thread thread_3(C);
    thread_1.join();
    thread_2.join();
    thread_3.join();
    H();
}
// outputs:
in main
in G:0x600000060
in A:0x60005aa10
in B:0x60005ab10
in C:0x60005ae40
in D:0x60005af40
in E:0x60005b040
in F:0x60005ae40
in H:0x600000060

为函数创建openmp线程

2 个答案: