Skip to content

Latest commit

 

History

History
147 lines (112 loc) · 3.63 KB

openmp.md

File metadata and controls

147 lines (112 loc) · 3.63 KB
title
OpenMP
// ...
#include <omp.h>

// the thread function
void hello() {
  int rank = omp_get_thread_num();
  int size = omp_get_num_threads();
  // ...
}

int main(int argc, char *argv[]) {
  int n_thread = std::atoi(argv[1]);

# pragma omp parallel num_threads(n_thread)
  hello();

  return 0;
}
double thread_by_return(F f, double a, double b, int n_global);

void thread_by_pointer(F f, double a, double b, int n_global, double *riemann_sum_global_ptr) {
  auto riemann_sum_local = thread_by_return(f, a, b, n_global);

# pragma omp critical
  *riemann_sum_global_ptr += riemann_sum_local;
}

double parallel_by_pointer(F f, double a, double b, int n, int n_thread) {
  auto riemann_sum_global = 0.0;

# pragma omp parallel num_threads(n_thread)
  thread_by_pointer(f, a, b, n, &riemann_sum_global);
 
  return riemann_sum_global;
}

Scope

默认地

  • parallel-block 定义的变量,被所有线程共享
  • parallel-block 定义的变量,被当前线程独有
double parallel_by_return(F f, double a, double b, int n, int n_thread) {
  double riemann_sum_global = 0.0;  // shared
# pragma omp parallel num_threads(n_thread)
  {
    double riemann_sum_local/* private */
        = thread_by_return(f, a, b, n)/* f, a, b, n are shared, too */;
#   pragma omp critical
    riemann_sum_global += riemann_sum_local;
  }
  return riemann_sum_global;
}

可以在 #pragma 里,用 private(var) 将默认共享的变量 var 改为私有的,此时需在各线程内部对其初始化。

建议在 #pragma 里,用 default(none) 取消默认行为,强制要求用 private(var)shared(var)reduction(+: var) 显式指定变量的作用域。

Reduction

double parallel_by_reduction(F f, double a, double b, int n, int n_thread) {
  double riemann_sum_global = 0.0;
  # pragma omp parallel num_threads(n_thread) \
        reduction(+/* 规约运算 */: riemann_sum_global/* 规约变量 */)
  {
    riemann_sum_global += thread_by_return(f, a, b, n);
  }
  return riemann_sum_global;
}

相当于 OpenMP 自动创建了私有变量 riemann_sum_localcritical-block。

parallel for

  • 必须是 for 循环。
  • 循环变量 i 必须是整型,且为各线程独有。
  • 循环总数必须能确定,不能有无限循环、循环体内不能有 breakreturn、循环体内修改起、止、步长等情况。

即使编译通过,运行期仍有可能出错: 如 dynamic programming 存在数据依赖,用 parallel for 可能发生数据竞争

double parallel_for(F f, double a, double b, int n, int n_thread) {
  int i;  // 定义在外,但作为循环变量,为各线程独有
  auto h = (b - a) / n;
  auto riemann_sum = (f(a) + (b)) * h / 2;
# pragma omp parallel for num_threads(n_thread) \
      reduction(+: riemann_sum)
  for (int i = 1; i < n; ++i) {
    riemann_sum += f(a + i * h) * h;
  }
  return riemann_sum;
}

由主线程创建任务:

# pragma omp parallel
# pragma omp single
  {
#   pragma omp task
    // the following code is executed by each task
    // ...
  }

由任一 task 创建 subtasks:

int parallel_by_tasks(int n, std::vector<int> *dp) {
  if (n < 2) {
    return dp->at(n) = n;
  }

  int i = 0, j = 0;
# pragma omp task shared(i) if(n > 30)
  i = parallel_by_tasks(n - 1, dp);  // executed by a subtask

  j = parallel_by_tasks(n - 2, dp);  // executed by the current task

# pragma omp taskwait
  dp->at(n) = i + j;  // use the subtask's result j

  return dp->at(n);
}