Question

是否可以选择性地启用带模板参数或运行时变量的openmp指令？

this (all threads work on the same for loop).
#pragma omp parallel
{
    #pragma omp for
    for (int i = 0; i < 10; ++i) { /*...*/ }
}
versus this (each thread works on its own for loop)
#pragma omp parallel
{
    for (int i = 0; i < 10; ++i) { /*...*/ }
}

更新（测试if子句）

TEST.CPP：

#include <iostream>
#include <omp.h>

int main() {
    bool var = true;
    #pragma omp parallel 
    {
        #pragma omp for if (var)
        for (int i = 0; i < 4; ++i) {
            std::cout << omp_get_thread_num() << "\n";
        }
    }
}

错误消息（g ++ 6，使用g ++ test.cpp -fopenmp编译）

test.cpp: In function ‘int main()’:
test.cpp:8:25: error: ‘if’ is not valid for ‘#pragma omp for’
         #pragma omp for if (var)
                         ^~

Answer 1

一种解决方法。不知道是否有可能摆脱获取线程ID的条件。

#include <iostream>
#include <omp.h>
#include <sstream>
#include <vector>
int main() {
    constexpr bool var = true;
    int n_threads = omp_get_num_procs();
    std::cout << "n_threads: " << n_threads << "\n";
    std::vector<std::stringstream> s(omp_get_num_procs());

    #pragma omp parallel if (var) 
    {

        const int thread_id0 = omp_get_thread_num();
        #pragma omp parallel
        {
            int thread_id1;
            if (var) {
                thread_id1 = thread_id0;
            } else {
                thread_id1 = omp_get_thread_num();
            }

            #pragma omp for
            for (int i = 0; i < 8; ++i) {
                s[thread_id1] << i << ", ";
            }
        }
    }

    for (int i = 0; i < s.size(); ++i) {
        std::cout << "thread " << i << ": " 
                  << s[i].str() << "\n";
    }
}

输出（var == true时）：

n_threads: 8
thread 0: 0, 1, 2, 3, 4, 5, 6, 7, 
thread 1: 0, 1, 2, 3, 4, 5, 6, 7, 
thread 2: 0, 1, 2, 3, 4, 5, 6, 7, 
thread 3: 0, 1, 2, 3, 4, 5, 6, 7, 
thread 4: 0, 1, 2, 3, 4, 5, 6, 7, 
thread 5: 0, 1, 2, 3, 4, 5, 6, 7, 
thread 6: 0, 1, 2, 3, 4, 5, 6, 7, 
thread 7: 0, 1, 2, 3, 4, 5, 6, 7,

输出（var == false时）：

n_threads: 8
thread 0: 0, 
thread 1: 1, 
thread 2: 2, 
thread 3: 3, 
thread 4: 4, 
thread 5: 5, 
thread 6: 6, 
thread 7: 7,

Answer 2

我认为惯用的C ++解决方案是隐藏算法重载背后的不同OpenMP pragma。

#include <iostream>
#include <sstream>
#include <vector>
#include <omp.h>

#include <type_traits>
template <bool ALL_PARALLEL>
struct impl;

template<>
struct impl<true>
{
  template<typename ITER, typename CALLABLE>
  void operator()(ITER begin, ITER end, const CALLABLE& func) {
    #pragma omp parallel
    {
      for (ITER i = begin; i != end; ++i) {
        func(i);
      }
    }
  }
};

template<>
struct impl<false>
{
  template<typename ITER, typename CALLABLE>
  void operator()(ITER begin, ITER end, const CALLABLE& func) {
    #pragma omp parallel for
    for (ITER i = begin; i < end; ++i) {
      func(i);
    }
  }
};

// This is just so we don't have to write parallel_foreach()(...)
template <bool ALL_PARALLEL, typename ITER, typename CALLABLE>
void parallel_foreach(ITER begin, ITER end, const CALLABLE& func)
{
    impl<ALL_PARALLEL>()(begin, end, func);
}

int main()
{
    constexpr bool var = false;
    int n_threads = omp_get_num_procs();
    std::cout << "n_threads: " << n_threads << "\n";
    std::vector<std::stringstream> s(omp_get_num_procs());

    parallel_foreach<var>(0, 8, [&s](auto i) {
        s[omp_get_thread_num()] << i << ", ";
    });

    for (int i = 0; i < s.size(); ++i) {
        std::cout << "thread " << i << ": " 
                  << s[i].str() << "\n";
    }
}

如果使用某些特定类型，则可以按类型执行重载，而不是使用bool模板参数并迭代元素而不是数字索引循环。请注意，您可以在OpenMP工作共享循环中使用C ++随机访问迭代器！根据您的类型，您很可能能够实现一个迭代器，它隐藏了来自调用者的内部数据访问的所有内容。

Answer 3

#include <omp.h>
#include <sstream>
#include <vector>
#include <iostream>
int main() {
    constexpr bool var = false;
    int n_threads = omp_get_num_procs();
    std::cout << "n_threads: " << n_threads << "\n";
    std::vector<std::stringstream> s(omp_get_num_procs());

    #pragma omp parallel
    {
        const int thread_id = omp_get_thread_num();
        if (var) {
            #pragma omp for
            for (int i = 0; i < 8; ++i) {
                s[thread_id] << i << ", ";
            }
        } else {
            for (int i = 0; i < 8; ++i) {
                s[thread_id] << i << ", ";
            } // code duplication
        }
    }
    for (int i = 0; i < s.size(); ++i) {
        std::cout << "thread " << i << ": " 
                  << s[i].str() << "\n";
    }
}

在并行区域内有选择地启用OpenMP for循环

3 个答案: