GetFEM  5.5
getfem_omp.h
Go to the documentation of this file.
1 /* -*- c++ -*- (enables emacs c++ mode) */
2 /*===========================================================================
3 
4  Copyright (C) 2012-2026 Andriy Andreykiv
5 
6  This file is a part of GetFEM
7 
8  GetFEM is free software; you can redistribute it and/or modify it
9  under the terms of the GNU Lesser General Public License as published
10  by the Free Software Foundation; either version 3 of the License, or
11  (at your option) any later version along with the GCC Runtime Library
12  Exception either version 3.1 or (at your option) any later version.
13  This program is distributed in the hope that it will be useful, but
14  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
15  or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
16  License and GCC Runtime Library Exception for more details.
17  You should have received a copy of the GNU Lesser General Public License
18  along with this program. If not, see https://www.gnu.org/licenses/.
19 
20  As a special exception, you may use this file as it is a part of a free
21  software library without restriction. Specifically, if other files
22  instantiate templates or use macros or inline functions from this file,
23  or you compile this file and link it with other files to produce an
24  executable, this file does not by itself cause the resulting executable
25  to be covered by the GNU Lesser General Public License. This exception
26  does not however invalidate any other reasons why the executable file
27  might be covered by the GNU Lesser General Public License.
28 
29 ===========================================================================*/
30 
31 /**@file getfem_omp.h
32 @author Andriy Andreykiv <[email protected]>
33 @date May 14th, 2013.
34 @brief Tools for multithreaded, OpenMP and Boost based parallelization.
35 
36 This is the kernel of getfem.
37 */
38 #pragma once
39 
40 #include <atomic>
41 #include <memory>
42 #include <set>
43 #include <vector>
44 
45 #include "bgeot_config.h"
46 
47 #ifdef GETFEM_HAS_OPENMP
48  #include <mutex>
49 #endif
50 
51 namespace getfem
52 {
53  using bgeot::size_type;
54 
55 #ifdef GETFEM_HAS_OPENMP
56  void parallel_execution(std::function<void(void)> lambda,
57  bool iterate_over_partitions);
58 
59  //declaring a thread lock, to protect multi-threaded accesses to
60  //asserts, traces and warnings. Using a global mutex
61  class omp_guard
62  {
63  public:
64  omp_guard();
65 
66  private:
67  std::unique_ptr<std::lock_guard<std::recursive_mutex>> plock;
68  static std::recursive_mutex mutex;
69  };
70 
71  //like std::lock_guard, but copyable
72  class local_guard
73  {
74  public:
75  local_guard(std::recursive_mutex&);
76 
77  private:
78  std::recursive_mutex& mutex;
79  std::shared_ptr<std::lock_guard<std::recursive_mutex>> plock;
80  };
81 
82  //produces scoped lock on the
83  //mutex, held in this class
84  class lock_factory
85  {
86  public:
87 
88  //get a lock object with RAII acquire/release semantics
89  //on the mutex from this factory
90  local_guard get_lock() const;
91  private:
92  mutable std::recursive_mutex mutex;
93  };
94 
95  #define GLOBAL_OMP_GUARD getfem::omp_guard g; GMM_NOPERATION_(abs(&(g) != &(g)));
96 
97 #else
98 
99  class omp_guard{};
100  class local_guard{};
101  struct lock_factory
102  {
103  inline local_guard get_lock() const {return local_guard();}
104  };
105  #define GLOBAL_OMP_GUARD
106 
107 #endif
108 
109  /**set maximum number of OpenMP threads*/
110  void set_num_threads(int n);
111 
112  /**is the program running in the parallel section*/
114 
115  /** is the program is running on a single thread*/
116  bool not_multithreaded();
117 
118  /**Maximum number of threads that can run concurrently*/
120 
121  /**Thread policy, where partitioning is based on true threads*/
123  static size_type this_thread();
124  static size_type num_threads();
125  };
126 
127  /** Thread policy, regulated by partition_master
128  (can be true thread- or partition-based)*/
130  static size_type this_thread();
131  static size_type num_threads();
132  };
133 
134  //implementation classes for omp_distribute
135  namespace detail{
136 
137  struct general_tag{};
138  struct vector_tag{};
139  struct bool_tag{};
140 
141  template<typename T>
142  struct distribute_traits
143  {
144  using type = general_tag;
145  };
146 
147  template<typename T>
148  struct distribute_traits<std::vector<T>>
149  {
150  using type = vector_tag;
151  };
152 
153  template<>
154  struct distribute_traits<bool>
155  {
156  using type = bool_tag;
157  };
158 
159  template<typename T, typename thread_policy, typename tag>
160  class omp_distribute_impl;
161 
162  template<class V>
163  inline auto safe_component(V &v, size_type i) -> decltype(v[i]){
164  GMM_ASSERT2(i < v.size(),
165  i << "-th partition is not available. "
166  "Probably on_thread_update "
167  "should have been called first");
168  return v[i];
169  }
170 
171  template <typename T, typename thread_policy>
172  class omp_distribute_impl<T, thread_policy, general_tag> {
173  private:
174  std::vector<T> thread_values;
175  friend struct all_values_proxy;
176 
177  struct all_values_proxy{
178  omp_distribute_impl& distro;
179  all_values_proxy(omp_distribute_impl& d)
180  : distro(d)
181  {}
182 
183  void operator = (const T& x){
184  for(auto it = distro.thread_values.begin();
185  it != distro.thread_values.end(); ++it){
186  *it=x;
187  }
188  }
189  };
190 
191  public:
192 
193  template <class... args>
194  explicit omp_distribute_impl(args&&... value){
195  thread_values.reserve(num_threads());
196  for (size_type i = 0; i != num_threads(); ++i){
197  thread_values.emplace_back(std::forward<args>(value)...);
198  }
199  }
200 
201  operator T& (){
202  return operator()(this_thread());
203  }
204 
205  operator const T& () const {
206  return operator()(this_thread());
207  }
208 
209  T& thrd_cast(){
210  return operator()(this_thread());
211  }
212 
213  const T& thrd_cast() const {
214  return operator()(this_thread());
215  }
216 
217  T& operator()(size_type i) {
218  return safe_component(thread_values, i);
219  }
220 
221  const T& operator()(size_type i) const {
222  return safe_component(thread_values, i);
223  }
224 
225  void on_thread_update() {
226  if (thread_values.size() == num_threads()) return;
227  GLOBAL_OMP_GUARD
228  if (thread_values.size() != num_threads()) {
229  thread_values.resize(num_threads());
230  }
231  }
232 
233  size_type num_threads() const {
234  return thread_policy::num_threads();
235  }
236 
237  size_type this_thread() const {
238  return thread_policy::this_thread();
239  }
240 
241  T& operator = (const T& x){
243  thrd_cast() = x;
244  }
245  else all_threads() = x;
246 
247  return *this;
248  }
249 
250  all_values_proxy all_threads(){
251  return all_values_proxy(*this);
252  }
253  };
254 
255  /**Specialization for std::vector<T>, adds vector indexing operator*/
256  template <typename T,
257  typename thread_policy>
258  class omp_distribute_impl<std::vector<T>, thread_policy, vector_tag>
259  : public omp_distribute_impl<std::vector<T>, thread_policy, general_tag>
260  {
261  public:
262  using base = omp_distribute_impl<std::vector<T>, thread_policy, general_tag>;
263 
264  template <class... args>
265  explicit omp_distribute_impl(args&&... value)
266  : base(std::forward<args>(value)...)
267  {}
268 
269  T& operator[](size_type i){
270  return base::thrd_cast()[i];
271  }
272  const T& operator[](size_type i) const{
273  return base::thrd_cast()[i];
274  }
275 
276  std::vector<T>& operator = (const std::vector<T>& x){
277  return base::operator=(x);
278  }
279  };
280 
281  /**Specialization for bool, to circumvent the shortcomings
282  of standards library's specialization for std::vector<bool>,
283  we use std::vector<int> instead*/
284  template <typename thread_policy>
285  class omp_distribute_impl<bool, thread_policy, bool_tag>
286  : public omp_distribute_impl<int, thread_policy, general_tag>
287  {
288  public:
289  using base = omp_distribute_impl<int, thread_policy, general_tag>;
290 
291  template <class... Args>
292  explicit omp_distribute_impl(Args&&... value)
293  : base(std::forward<Args>(value)...)
294  {}
295 
296  operator bool () const {
297  return base::operator const int&();
298  }
299 
300  bool operator = (const bool& x){
301  return base::operator=(x);
302  }
303  };
304 
305  } /* end of namespace detail. */
306 
307  template<typename T, typename thread_policy>
308  using od_base = typename detail::omp_distribute_impl
309  <T, thread_policy, typename detail::distribute_traits<T>::type>;
310 
311  /**
312  Use this template class for any object you want to
313  distribute to open_MP threads. The creation of this
314  object should happen in serial, while accessing the individual
315  thread local instances will take place in parallel.
316  Use thread_policy to either distribute the objects between physical
317  threads or a fixed number of partitions, independent of the number
318  of threads. If you change the default policy, remember to also
319  use this_thread() and num_threads() from the corresponding policy
320  for iterating over the thread-specific components.
321  */
322  template<typename T,
323  typename thread_policy = global_thread_policy>
324  class omp_distribute : public od_base<T, thread_policy>
325  {
326  public:
327  using base = od_base<T, thread_policy>;
328 
329  template <class... args>
330  explicit omp_distribute(args&&... value)
331  : base(std::forward<args>(value)...)
332  {}
333 
334  auto operator = (const T& x) -> decltype(std::declval<base>() = x){
335  return base::operator=(x);
336  }
337  };
338 
339  /* Use these macros only in function local context to achieve
340  the effect of thread local storage for any type of objects
341  and their initialization (it's more general and portable
342  than using __declspec(thread))*/
343  #ifdef GETFEM_HAS_OPENMP
344  #define THREAD_SAFE_STATIC thread_local
345  #else
346  #define THREAD_SAFE_STATIC static
347  #endif
348 
349  class partition_master;
350 
351  /**Iterator that runs over partitions on the current
352  thread and sets the global (but thread-specific)
353  partition during incrementation*/
355  {
356  public:
357 
358  partition_iterator operator ++();
359  bool operator==(const partition_iterator&) const;
360  bool operator!=(const partition_iterator&) const;
361  size_type operator*() const;
362 
363  private:
364 
365  friend class partition_master;
366 
367  /**Only partition_master can create one*/
369  std::set<size_type>::const_iterator it);
370 
371  partition_master &master;
372  std::set<size_type>::const_iterator it;
373  };
374 
375  enum class thread_behaviour {true_threads, partition_threads};
376 
377  /**
378  A singleton that Manages partitions on individual threads.
379  */
381  {
382  public:
383 
384  static partition_master &get();
385 
386  /**beginning of the partitions for the current thread*/
388 
389  /**end of the partitions for the current thread*/
391 
392  /**Sets the behaviour for the full program: either partitioning parallel loops
393  according to the number of true threads, specified by the user,
394  or to the number of the fixed partitions equal to the max concurrency of the system.
395  The later makes the partitioning independent of the number of the threads set*/
396  void set_behaviour(thread_behaviour);
397 
398  /**active partition on the thread. If number of threads is equal to the
399  max concurrency of the system, then it's also the index of the actual thread*/
401 
402  /**number of partitions or threads, depending on thread policy*/
404 
405  /**for thread_behaviour::partition_threads set the total number of partitions.
406  This call must be made before all the omp_distribute based classes are created.
407  Otherwise they become invalid*/
409 
410  void check_threads();
411 
412  private:
413 
414  void rewind_partitions();
415 
416  //Parallel execution of a lambda. Please use the macros below
417  friend void parallel_execution(std::function<void(void)> lambda, bool iterate_over_partitions);
418 
419  /**set current partition, which will be also returned in this_thread() call*/
420  void set_current_partition(size_type);
421 
422  friend partition_iterator;
423 
425 
426  void update_partitions();
427 
430  std::atomic<size_type> nb_user_threads;
431  thread_behaviour behaviour = thread_behaviour::partition_threads;
432  std::atomic<bool> partitions_updated{false};
433  size_type nb_partitions;
434  bool partitions_set_by_user = false;
435 
436  static partition_master instance;
437  };
438 
439  class standard_locale;
440  class thread_exception;
441 
442  /**Encapsulates open_mp-related initialization and de-initialization*/
444  {
445  std::unique_ptr<standard_locale> plocale;
446  std::unique_ptr<thread_exception> pexception;
447 
448  public:
450  void run_lambda(std::function<void(void)> lambda);
452  };
453 
454  #ifdef __GNUC__
455  #define pragma_op(arg) _Pragma("arg")
456  #else
457  #define pragma_op(arg) __pragma(arg)
458  #endif
459 
460  /**
461  Organizes a proper parallel omp section:
462  - iteration on thread independent partitions
463  - passing exceptions to the master thread
464  - thread-safe locale
465  */
466  #ifdef GETFEM_HAS_OPENMP
467  #define GETFEM_OMP_PARALLEL(body) getfem::parallel_execution([&](){body;}, true);
468 
469  /**execute in parallel, but do not iterate over partitions*/
470  #define GETFEM_OMP_PARALLEL_NO_PARTITION(body) getfem::parallel_execution([&](){body;}, false);
471 
472  /**execute for loop in parallel. Not iterating over partitions*/
473  #define GETFEM_OMP_FOR(init, check, increment, body) {\
474  auto boilerplate = getfem::parallel_boilerplate{}; \
475  pragma_op(omp parallel for) \
476  for (init; check; increment){ \
477  boilerplate.run_lambda([&](){body;}); \
478  } \
479  }
480 
481  #else
482  #define GETFEM_OMP_PARALLEL(body) body
483  #define GETFEM_OMP_PARALLEL_NO_PARTITION(body) body;
484  #define GETFEM_OMP_FOR(init, check, increment, body)\
485  for (init; check; increment) { \
486  body \
487  }
488 
489  #endif
490 
491 } /* end of namespace getfem. */
defines and typedefs for namespace bgeot
Use this template class for any object you want to distribute to open_MP threads.
Definition: getfem_omp.h:325
Encapsulates open_mp-related initialization and de-initialization.
Definition: getfem_omp.h:444
Iterator that runs over partitions on the current thread and sets the global (but thread-specific) pa...
Definition: getfem_omp.h:355
A singleton that Manages partitions on individual threads.
Definition: getfem_omp.h:381
partition_iterator begin()
beginning of the partitions for the current thread
Definition: getfem_omp.cc:214
size_type get_current_partition() const
active partition on the thread.
Definition: getfem_omp.cc:242
size_type get_nb_partitions() const
number of partitions or threads, depending on thread policy
Definition: getfem_omp.cc:253
partition_iterator end()
end of the partitions for the current thread
Definition: getfem_omp.cc:222
void set_nb_partitions(size_type)
for thread_behaviour::partition_threads set the total number of partitions.
Definition: getfem_omp.cc:197
void set_behaviour(thread_behaviour)
Sets the behaviour for the full program: either partitioning parallel loops according to the number o...
Definition: getfem_omp.cc:226
Identical to gmm::standard_locale, but does not change std::locale in multi-threaded sections of the ...
Definition: getfem_locale.h:49
Allows to re-throw exceptions, generated in OpemMP parallel section.
Definition: getfem_omp.cc:119
size_t size_type
used as the common size type in the library
Definition: bgeot_poly.h:48
GEneric Tool for Finite Element Methods.
bool not_multithreaded()
is the program is running on a single thread
Definition: getfem_omp.cc:109
size_type max_concurrency()
Maximum number of threads that can run concurrently.
Definition: getfem_omp.cc:111
bool me_is_multithreaded_now()
is the program running in the parallel section
Definition: getfem_omp.cc:105
void set_num_threads(int n)
set maximum number of OpenMP threads
Definition: getfem_omp.cc:107
Thread policy, regulated by partition_master (can be true thread- or partition-based)
Definition: getfem_omp.h:129
Thread policy, where partitioning is based on true threads.
Definition: getfem_omp.h:122