Reference documentation for deal.II version 9.3.2
\(\newcommand{\dealvcentcolon}{\mathrel{\mathop{:}}}\) \(\newcommand{\dealcoloneq}{\dealvcentcolon\mathrel{\mkern-1.2mu}=}\) \(\newcommand{\jump}[1]{\left[\!\left[ #1 \right]\!\right]}\) \(\newcommand{\average}[1]{\left\{\!\left\{ #1 \right\}\!\right\}}\)
task_info.cc
Go to the documentation of this file.
1 // ---------------------------------------------------------------------
2 //
3 // Copyright (C) 2018 - 2021 by the deal.II authors
4 //
5 // This file is part of the deal.II library.
6 //
7 // The deal.II library is free software; you can use it, redistribute
8 // it, and/or modify it under the terms of the GNU Lesser General
9 // Public License as published by the Free Software Foundation; either
10 // version 2.1 of the License, or (at your option) any later version.
11 // The full text of the license can be found in the file LICENSE.md at
12 // the top level directory of deal.II.
13 //
14 // ---------------------------------------------------------------------
15 
16 
19 #include <deal.II/base/mpi.h>
21 #include <deal.II/base/parallel.h>
22 #include <deal.II/base/utilities.h>
23 
25 
26 
27 #ifdef DEAL_II_WITH_TBB
28 # include <tbb/blocked_range.h>
29 # include <tbb/parallel_for.h>
30 # include <tbb/task.h>
31 # include <tbb/task_scheduler_init.h>
32 #endif
33 
34 #include <iostream>
35 #include <set>
36 
38 
39 
40 
41 /*-------------------- Implementation of the matrix-free loop --------------*/
42 namespace internal
43 {
44  namespace MatrixFreeFunctions
45  {
46 #ifdef DEAL_II_WITH_TBB
47 
48  // This defines the TBB data structures that are needed to schedule the
49  // partition-partition variant
50 
51  namespace partition
52  {
54  {
55  public:
57  const unsigned int partition,
58  const TaskInfo & task_info)
59  : worker(nullptr)
63  {}
64 
66  const unsigned int partition,
67  const TaskInfo & task_info)
68  : worker(&worker)
69  , worker_pointer(nullptr)
72  {}
73 
74  void
75  operator()() const
76  {
77  MFWorkerInterface *used_worker =
78  worker != nullptr ? worker : *worker_pointer;
79  Assert(used_worker != nullptr, ExcInternalError());
80  used_worker->cell(partition);
81 
82  if (task_info.face_partition_data.empty() == false)
83  {
84  used_worker->face(partition);
85  used_worker->boundary(partition);
86  }
87  }
88 
89  private:
92  const unsigned int partition;
94  };
95 
96  class CellWork : public tbb::task
97  {
98  public:
100  const unsigned int partition,
101  const TaskInfo & task_info,
102  const bool is_blocked)
103  : dummy(nullptr)
104  , work(worker, partition, task_info)
106  {}
107 
108  tbb::task *
109  execute() override
110  {
111  work();
112 
113  if (is_blocked == true)
114  tbb::empty_task::spawn(*dummy);
115  return nullptr;
116  }
117 
118  tbb::empty_task *dummy;
119 
120  private:
122  const bool is_blocked;
123  };
124 
125 
126 
127  class PartitionWork : public tbb::task
128  {
129  public:
131  const unsigned int partition_in,
132  const TaskInfo & task_info_in,
133  const bool is_blocked_in = false)
134  : dummy(nullptr)
135  , function(function_in)
136  , partition(partition_in)
137  , task_info(task_info_in)
138  , is_blocked(is_blocked_in)
139  {}
140 
141  tbb::task *
142  execute() override
143  {
144  tbb::empty_task *root =
145  new (tbb::task::allocate_root()) tbb::empty_task;
146  const unsigned int evens = task_info.partition_evens[partition];
147  const unsigned int odds = task_info.partition_odds[partition];
148  const unsigned int n_blocked_workers =
150  const unsigned int n_workers =
152  std::vector<CellWork *> worker(n_workers);
153  std::vector<CellWork *> blocked_worker(n_blocked_workers);
154 
155  root->set_ref_count(evens + 1);
156  for (unsigned int j = 0; j < evens; j++)
157  {
158  worker[j] = new (root->allocate_child())
159  CellWork(function,
161  task_info,
162  false);
163  if (j > 0)
164  {
165  worker[j]->set_ref_count(2);
166  blocked_worker[j - 1]->dummy =
167  new (worker[j]->allocate_child()) tbb::empty_task;
168  tbb::task::spawn(*blocked_worker[j - 1]);
169  }
170  else
171  worker[j]->set_ref_count(1);
172  if (j < evens - 1)
173  {
174  blocked_worker[j] = new (worker[j]->allocate_child())
175  CellWork(function,
177  1,
178  task_info,
179  true);
180  }
181  else
182  {
183  if (odds == evens)
184  {
185  worker[evens] = new (worker[j]->allocate_child())
186  CellWork(function,
188  2 * j + 1,
189  task_info,
190  false);
191  tbb::task::spawn(*worker[evens]);
192  }
193  else
194  {
195  tbb::empty_task *child =
196  new (worker[j]->allocate_child()) tbb::empty_task();
197  tbb::task::spawn(*child);
198  }
199  }
200  }
201 
202  root->wait_for_all();
203  root->destroy(*root);
204  if (is_blocked == true)
205  tbb::empty_task::spawn(*dummy);
206  return nullptr;
207  }
208 
209  tbb::empty_task *dummy;
210 
211  private:
212  MFWorkerInterface &function;
213  const unsigned int partition;
215  const bool is_blocked;
216  };
217 
218  } // end of namespace partition
219 
220 
221 
222  namespace color
223  {
224  class CellWork
225  {
226  public:
228  const TaskInfo & task_info_in,
229  const unsigned int partition_in)
230  : worker(worker_in)
231  , task_info(task_info_in)
232  , partition(partition_in)
233  {}
234 
235  void
236  operator()(const tbb::blocked_range<unsigned int> &r) const
237  {
238  const unsigned int start_index =
240  task_info.block_size * r.begin();
241  const unsigned int end_index =
242  std::min(start_index + task_info.block_size * (r.end() - r.begin()),
244  worker.cell(std::make_pair(start_index, end_index));
245 
246  if (task_info.face_partition_data.empty() == false)
247  {
248  AssertThrow(false, ExcNotImplemented());
249  }
250  }
251 
252  private:
255  const unsigned int partition;
256  };
257 
258 
259 
260  class PartitionWork : public tbb::task
261  {
262  public:
264  const unsigned int partition_in,
265  const TaskInfo & task_info_in,
266  const bool is_blocked_in)
267  : dummy(nullptr)
268  , worker(worker_in)
269  , partition(partition_in)
270  , task_info(task_info_in)
271  , is_blocked(is_blocked_in)
272  {}
273 
274  tbb::task *
275  execute() override
276  {
277  const unsigned int n_chunks =
280  1) /
282  parallel_for(tbb::blocked_range<unsigned int>(0, n_chunks, 1),
284  if (is_blocked == true)
285  tbb::empty_task::spawn(*dummy);
286  return nullptr;
287  }
288 
289  tbb::empty_task *dummy;
290 
291  private:
293  const unsigned int partition;
295  const bool is_blocked;
296  };
297 
298  } // end of namespace color
299 
300 
301 
302  class MPICommunication : public tbb::task
303  {
304  public:
306  : worker(worker_in)
308  {}
309 
310  tbb::task *
311  execute() override
312  {
313  if (do_compress == false)
315  else
317  return nullptr;
318  }
319 
320  private:
322  const bool do_compress;
323  };
324 
325 #endif // DEAL_II_WITH_TBB
326 
327 
328 
329  void
331  {
332  // If we use thread parallelism, we do not currently support to schedule
333  // pieces of updates within the loop, so this index will collect all
334  // calls in that case and work like a single complete loop over all
335  // cells
336  if (scheme != none)
338  else
339  funct.cell_loop_pre_range(
341 
343 
344 #ifdef DEAL_II_WITH_TBB
345 
346  if (scheme != none)
347  {
349  if (scheme == partition_partition && evens > 0)
350  {
351  tbb::empty_task *root =
352  new (tbb::task::allocate_root()) tbb::empty_task;
353  root->set_ref_count(evens + 1);
354  std::vector<partition::PartitionWork *> worker(n_workers);
355  std::vector<partition::PartitionWork *> blocked_worker(
357  MPICommunication *worker_compr =
358  new (root->allocate_child()) MPICommunication(funct, true);
359  worker_compr->set_ref_count(1);
360  for (unsigned int j = 0; j < evens; j++)
361  {
362  if (j > 0)
363  {
364  worker[j] = new (root->allocate_child())
365  partition::PartitionWork(funct, 2 * j, *this, false);
366  worker[j]->set_ref_count(2);
367  blocked_worker[j - 1]->dummy =
368  new (worker[j]->allocate_child()) tbb::empty_task;
369  tbb::task::spawn(*blocked_worker[j - 1]);
370  }
371  else
372  {
373  worker[j] = new (worker_compr->allocate_child())
374  partition::PartitionWork(funct, 2 * j, *this, false);
375  worker[j]->set_ref_count(2);
376  MPICommunication *worker_dist =
377  new (worker[j]->allocate_child())
378  MPICommunication(funct, false);
379  tbb::task::spawn(*worker_dist);
380  }
381  if (j < evens - 1)
382  {
383  blocked_worker[j] = new (worker[j]->allocate_child())
384  partition::PartitionWork(funct, 2 * j + 1, *this, true);
385  }
386  else
387  {
388  if (odds == evens)
389  {
390  worker[evens] = new (worker[j]->allocate_child())
392  2 * j + 1,
393  *this,
394  false);
395  tbb::task::spawn(*worker[evens]);
396  }
397  else
398  {
399  tbb::empty_task *child =
400  new (worker[j]->allocate_child()) tbb::empty_task();
401  tbb::task::spawn(*child);
402  }
403  }
404  }
405 
406  root->wait_for_all();
407  root->destroy(*root);
408  }
409  else if (scheme == partition_partition)
410  {
411  // catch the case of empty partition list: we still need to call
412  // the vector communication routines to clean up and initiate
413  // things
415  funct.vector_compress_start();
416  }
417  else // end of partition-partition, start of partition-color
418  {
419  // check whether there is only one partition. if not, build up the
420  // tree of partitions
421  if (odds > 0)
422  {
423  tbb::empty_task *root =
424  new (tbb::task::allocate_root()) tbb::empty_task;
425  root->set_ref_count(evens + 1);
426  const unsigned int n_blocked_workers =
427  odds - (odds + evens + 1) % 2;
428  const unsigned int n_workers =
430  std::vector<color::PartitionWork *> worker(n_workers);
431  std::vector<color::PartitionWork *> blocked_worker(
433  unsigned int worker_index = 0, slice_index = 0;
434  int spawn_index_child = -2;
435  MPICommunication *worker_compr =
436  new (root->allocate_child()) MPICommunication(funct, true);
437  worker_compr->set_ref_count(1);
438  for (unsigned int part = 0;
439  part < partition_row_index.size() - 1;
440  part++)
441  {
442  if (part == 0)
443  worker[worker_index] =
444  new (worker_compr->allocate_child())
445  color::PartitionWork(funct,
446  slice_index,
447  *this,
448  false);
449  else
450  worker[worker_index] = new (root->allocate_child())
451  color::PartitionWork(funct,
452  slice_index,
453  *this,
454  false);
455  slice_index++;
456  for (; slice_index < partition_row_index[part + 1];
457  slice_index++)
458  {
459  worker[worker_index]->set_ref_count(1);
460  worker_index++;
461  worker[worker_index] =
462  new (worker[worker_index - 1]->allocate_child())
463  color::PartitionWork(funct,
464  slice_index,
465  *this,
466  false);
467  }
468  worker[worker_index]->set_ref_count(2);
469  if (part > 0)
470  {
471  blocked_worker[(part - 1) / 2]->dummy =
472  new (worker[worker_index]->allocate_child())
473  tbb::empty_task;
474  worker_index++;
475  if (spawn_index_child == -1)
476  tbb::task::spawn(*blocked_worker[(part - 1) / 2]);
477  else
478  {
479  Assert(spawn_index_child >= 0,
480  ExcInternalError());
481  tbb::task::spawn(*worker[spawn_index_child]);
482  }
483  spawn_index_child = -2;
484  }
485  else
486  {
487  MPICommunication *worker_dist =
488  new (worker[worker_index]->allocate_child())
489  MPICommunication(funct, false);
490  tbb::task::spawn(*worker_dist);
491  worker_index++;
492  }
493  part += 1;
494  if (part < partition_row_index.size() - 1)
495  {
496  if (part < partition_row_index.size() - 2)
497  {
498  blocked_worker[part / 2] =
499  new (worker[worker_index - 1]->allocate_child())
500  color::PartitionWork(funct,
501  slice_index,
502  *this,
503  true);
504  slice_index++;
505  if (slice_index < partition_row_index[part + 1])
506  {
507  blocked_worker[part / 2]->set_ref_count(1);
508  worker[worker_index] = new (
509  blocked_worker[part / 2]->allocate_child())
510  color::PartitionWork(funct,
511  slice_index,
512  *this,
513  false);
514  slice_index++;
515  }
516  else
517  {
518  spawn_index_child = -1;
519  continue;
520  }
521  }
522  for (; slice_index < partition_row_index[part + 1];
523  slice_index++)
524  {
525  if (slice_index > partition_row_index[part])
526  {
527  worker[worker_index]->set_ref_count(1);
528  worker_index++;
529  }
530  worker[worker_index] =
531  new (worker[worker_index - 1]->allocate_child())
532  color::PartitionWork(funct,
533  slice_index,
534  *this,
535  false);
536  }
537  spawn_index_child = worker_index;
538  worker_index++;
539  }
540  else
541  {
542  tbb::empty_task *final =
543  new (worker[worker_index - 1]->allocate_child())
544  tbb::empty_task;
545  tbb::task::spawn(*final);
546  spawn_index_child = worker_index - 1;
547  }
548  }
549  if (evens == odds)
550  {
551  Assert(spawn_index_child >= 0, ExcInternalError());
552  tbb::task::spawn(*worker[spawn_index_child]);
553  }
554  root->wait_for_all();
555  root->destroy(*root);
556  }
557  // case when we only have one partition: this is the usual
558  // coloring scheme, and we just schedule a parallel for loop for
559  // each color
560  else
561  {
562  Assert(evens <= 1, ExcInternalError());
564 
565  for (unsigned int color = 0; color < partition_row_index[1];
566  ++color)
567  {
568  tbb::empty_task *root =
569  new (tbb::task::allocate_root()) tbb::empty_task;
570  root->set_ref_count(2);
571  color::PartitionWork *worker =
572  new (root->allocate_child())
573  color::PartitionWork(funct, color, *this, false);
574  tbb::empty_task::spawn(*worker);
575  root->wait_for_all();
576  root->destroy(*root);
577  }
578 
579  funct.vector_compress_start();
580  }
581  }
582  }
583  else
584 #endif
585  // serial loop, go through up to three times and do the MPI transfer at
586  // the beginning/end of the second part
587  {
588  for (unsigned int part = 0; part < partition_row_index.size() - 2;
589  ++part)
590  {
591  if (part == 1)
593 
594  for (unsigned int i = partition_row_index[part];
595  i < partition_row_index[part + 1];
596  ++i)
597  {
598  funct.cell_loop_pre_range(i);
599  funct.zero_dst_vector_range(i);
600  AssertIndexRange(i + 1, cell_partition_data.size());
602  {
603  funct.cell(i);
604  }
605 
606  if (face_partition_data.empty() == false)
607  {
609  funct.face(i);
610  if (boundary_partition_data[i + 1] >
612  funct.boundary(i);
613  }
614  funct.cell_loop_post_range(i);
615  }
616 
617  if (part == 1)
618  funct.vector_compress_start();
619  }
620  }
621  funct.vector_compress_finish();
622 
623  if (scheme != none)
625  else
626  funct.cell_loop_post_range(
628  }
629 
630 
631 
633  {
634  clear();
635  }
636 
637 
638 
639  void
641  {
642  n_active_cells = 0;
643  n_ghost_cells = 0;
645  block_size = 0;
646  n_blocks = 0;
647  scheme = none;
648  partition_row_index.clear();
649  partition_row_index.resize(2);
650  cell_partition_data.clear();
651  face_partition_data.clear();
652  boundary_partition_data.clear();
653  evens = 0;
654  odds = 0;
655  n_blocked_workers = 0;
656  n_workers = 0;
657  partition_evens.clear();
658  partition_odds.clear();
660  partition_n_workers.clear();
661  communicator = MPI_COMM_SELF;
662  my_pid = 0;
663  n_procs = 1;
664  }
665 
666 
667 
668  template <typename StreamType>
669  void
671  const std::size_t data_length) const
672  {
673  Utilities::MPI::MinMaxAvg memory_c =
674  Utilities::MPI::min_max_avg(1e-6 * data_length, communicator);
675  if (n_procs < 2)
676  out << memory_c.min;
677  else
678  out << memory_c.min << "/" << memory_c.avg << "/" << memory_c.max;
679  out << " MB" << std::endl;
680  }
681 
682 
683 
684  std::size_t
686  {
687  return (
688  sizeof(*this) +
697  }
698 
699 
700 
701  void
703  std::vector<unsigned int> &boundary_cells)
704  {
705  // try to make the number of boundary cells divisible by the number of
706  // vectors in vectorization
707  unsigned int fillup_needed =
708  (vectorization_length - boundary_cells.size() % vectorization_length) %
710  if (fillup_needed > 0 && boundary_cells.size() < n_active_cells)
711  {
712  // fill additional cells into the list of boundary cells to get a
713  // balanced number. Go through the indices successively until we
714  // found enough indices
715  std::vector<unsigned int> new_boundary_cells;
716  new_boundary_cells.reserve(boundary_cells.size());
717 
718  unsigned int next_free_slot = 0, bound_index = 0;
719  while (fillup_needed > 0 && bound_index < boundary_cells.size())
720  {
721  if (next_free_slot < boundary_cells[bound_index])
722  {
723  // check if there are enough cells to fill with in the
724  // current slot
725  if (next_free_slot + fillup_needed <=
726  boundary_cells[bound_index])
727  {
728  for (unsigned int j =
729  boundary_cells[bound_index] - fillup_needed;
730  j < boundary_cells[bound_index];
731  ++j)
732  new_boundary_cells.push_back(j);
733  fillup_needed = 0;
734  }
735  // ok, not enough indices, so just take them all up to the
736  // next boundary cell
737  else
738  {
739  for (unsigned int j = next_free_slot;
740  j < boundary_cells[bound_index];
741  ++j)
742  new_boundary_cells.push_back(j);
743  fillup_needed -=
744  boundary_cells[bound_index] - next_free_slot;
745  }
746  }
747  new_boundary_cells.push_back(boundary_cells[bound_index]);
748  next_free_slot = boundary_cells[bound_index] + 1;
749  ++bound_index;
750  }
751  while (fillup_needed > 0 &&
752  (new_boundary_cells.size() == 0 ||
753  new_boundary_cells.back() < n_active_cells - 1))
754  new_boundary_cells.push_back(new_boundary_cells.back() + 1);
755  while (bound_index < boundary_cells.size())
756  new_boundary_cells.push_back(boundary_cells[bound_index++]);
757 
758  boundary_cells.swap(new_boundary_cells);
759  }
760 
761  // set the number of cells
762  std::sort(boundary_cells.begin(), boundary_cells.end());
763 
764  // check that number of boundary cells is divisible by
765  // vectorization_length or that it contains all cells
766  Assert(boundary_cells.size() % vectorization_length == 0 ||
767  boundary_cells.size() == n_active_cells,
768  ExcInternalError());
769  }
770 
771 
772 
773  void
775  const std::vector<unsigned int> &cells_with_comm,
776  const unsigned int dofs_per_cell,
777  const bool categories_are_hp,
778  const std::vector<unsigned int> &cell_vectorization_categories,
779  const bool cell_vectorization_categories_strict,
780  const std::vector<unsigned int> &parent_relation,
781  std::vector<unsigned int> & renumbering,
782  std::vector<unsigned char> & incompletely_filled_vectorization)
783  {
784  Assert(dofs_per_cell > 0, ExcInternalError());
785  // This function is decomposed into several steps to determine a good
786  // ordering that satisfies the following constraints:
787  // a. Only cells belonging to the same category (or next higher if the
788  // cell_vectorization_categories_strict is false) can be grouped into
789  // the same SIMD batch
790  // b. hp-adaptive computations must form contiguous ranges for the same
791  // degree (category) in cell_partition_data
792  // c. We want to group the cells with the same parent in the same SIMD
793  // lane if possible
794  // d. The cell order should be similar to the initial one
795  // e. Form sets without MPI communication and those with to overlap
796  // communication with computation
797  //
798  // These constraints are satisfied by first grouping by the categories
799  // and, within the groups, to distinguish between cells with a parent
800  // and those without. All of this is set up with batches of cells (with
801  // padding if the size does not match). Then we define a vector of
802  // arrays where we define sorting criteria for the cell batches to
803  // satisfy the items b and d together, split by different parts to
804  // satisfy item e.
805 
806  // Give the compiler a chance to detect that vectorization_length is a
807  // power of two, which allows it to replace integer divisions by shifts
808  unsigned int vectorization_length_bits = 0;
809  unsigned int my_length = vectorization_length;
810  while (my_length >>= 1)
811  ++vectorization_length_bits;
812  const unsigned int n_lanes = 1 << vectorization_length_bits;
813 
814  // Step 1: find tight map of categories for not taking exceeding amounts
815  // of memory below. Sort the new categories by the numbers in the
816  // old one to ensure we respect the given rules
817  unsigned int n_categories = 1;
818  std::vector<unsigned int> tight_category_map(n_active_cells, 0);
819  if (cell_vectorization_categories.empty() == false)
820  {
821  AssertDimension(cell_vectorization_categories.size(),
823 
824  std::set<unsigned int> used_categories;
825  for (unsigned int i = 0; i < n_active_cells; ++i)
826  used_categories.insert(cell_vectorization_categories[i]);
827  std::vector<unsigned int> used_categories_vector(
828  used_categories.size());
829  n_categories = 0;
830  for (const auto &it : used_categories)
831  used_categories_vector[n_categories++] = it;
832  for (unsigned int i = 0; i < n_active_cells; ++i)
833  {
834  const unsigned int index =
835  std::lower_bound(used_categories_vector.begin(),
836  used_categories_vector.end(),
837  cell_vectorization_categories[i]) -
838  used_categories_vector.begin();
839  AssertIndexRange(index, used_categories_vector.size());
840  tight_category_map[i] = index;
841  }
842  }
843 
844  // Step 2: Sort the cells by the category. If we want to fill up the
845  // ranges in vectorization, promote some of the cells to a higher
846  // category
847  std::vector<std::vector<unsigned int>> renumbering_category(n_categories);
848  for (unsigned int i = 0; i < n_active_cells; ++i)
849  renumbering_category[tight_category_map[i]].push_back(i);
850 
851  if (cell_vectorization_categories_strict == false && n_categories > 1)
852  for (unsigned int j = n_categories - 1; j > 0; --j)
853  {
854  unsigned int lower_index = j - 1;
855  while (renumbering_category[j].size() % n_lanes)
856  {
857  while (renumbering_category[j].size() % n_lanes &&
858  !renumbering_category[lower_index].empty())
859  {
860  renumbering_category[j].push_back(
861  renumbering_category[lower_index].back());
862  renumbering_category[lower_index].pop_back();
863  }
864  if (lower_index == 0)
865  break;
866  else
867  --lower_index;
868  }
869  }
870 
871  // Step 3: Use the parent relation to find a good grouping of cells. To
872  // do this, we first put cells of each category defined above into two
873  // bins, those which we know can be grouped together by the given parent
874  // relation and those which cannot
875  std::vector<unsigned int> temporary_numbering;
876  temporary_numbering.reserve(n_active_cells +
877  (n_lanes - 1) * n_categories);
878  const unsigned int n_cells_per_parent =
879  std::count(parent_relation.begin(), parent_relation.end(), 0);
880  std::vector<unsigned int> category_size;
881  for (unsigned int j = 0; j < n_categories; ++j)
882  {
883  std::vector<std::pair<unsigned int, unsigned int>> grouped_cells;
884  std::vector<unsigned int> other_cells;
885  for (const unsigned int cell : renumbering_category[j])
886  if (parent_relation.empty() ||
887  parent_relation[cell] == numbers::invalid_unsigned_int)
888  other_cells.push_back(cell);
889  else
890  grouped_cells.emplace_back(parent_relation[cell], cell);
891 
892  // Compute the number of cells per group
893  std::sort(grouped_cells.begin(), grouped_cells.end());
894  std::vector<unsigned int> n_cells_per_group;
895  unsigned int length = 0;
896  for (unsigned int i = 0; i < grouped_cells.size(); ++i, ++length)
897  if (i > 0 && grouped_cells[i].first != grouped_cells[i - 1].first)
898  {
899  n_cells_per_group.push_back(length);
900  length = 0;
901  }
902  if (length > 0)
903  n_cells_per_group.push_back(length);
904 
905  // Move groups that do not have the complete size (due to
906  // categories) to the 'other_cells'. The cells with correct group
907  // size are immediately appended to the temporary cell numbering
908  auto group_it = grouped_cells.begin();
909  for (unsigned int length : n_cells_per_group)
910  if (length < n_cells_per_parent)
911  for (unsigned int j = 0; j < length; ++j)
912  other_cells.push_back((group_it++)->second);
913  else
914  {
915  // we should not have more cells in a group than in the first
916  // check we did above
917  AssertDimension(length, n_cells_per_parent);
918  for (unsigned int j = 0; j < length; ++j)
919  temporary_numbering.push_back((group_it++)->second);
920  }
921 
922  // Sort the remaining cells and append them as well
923  std::sort(other_cells.begin(), other_cells.end());
924  temporary_numbering.insert(temporary_numbering.end(),
925  other_cells.begin(),
926  other_cells.end());
927 
928  while (temporary_numbering.size() % n_lanes != 0)
929  temporary_numbering.push_back(numbers::invalid_unsigned_int);
930 
931  category_size.push_back(temporary_numbering.size());
932  }
933 
934  // Step 4: Identify the batches with cells marked as "comm"
935  std::vector<bool> batch_with_comm(temporary_numbering.size() / n_lanes,
936  false);
937  std::vector<unsigned int> temporary_numbering_inverse(n_active_cells);
938  for (unsigned int i = 0; i < temporary_numbering.size(); ++i)
939  if (temporary_numbering[i] != numbers::invalid_unsigned_int)
940  temporary_numbering_inverse[temporary_numbering[i]] = i;
941  for (const unsigned int cell : cells_with_comm)
942  batch_with_comm[temporary_numbering_inverse[cell] / n_lanes] = true;
943 
944  // Step 5: Sort the batches of cells by their last cell index to get
945  // good locality, assuming that the initial cell order is of good
946  // locality. In case we have hp-calculations with categories, we need to
947  // sort also by the category.
948  std::vector<std::array<unsigned int, 3>> batch_order;
949  std::vector<std::array<unsigned int, 3>> batch_order_comm;
950  for (unsigned int i = 0; i < temporary_numbering.size(); i += n_lanes)
951  {
952  unsigned int max_index = 0;
953  for (unsigned int j = 0; j < n_lanes; ++j)
954  if (temporary_numbering[i + j] < numbers::invalid_unsigned_int)
955  max_index = std::max(temporary_numbering[i + j], max_index);
956  const unsigned int category_hp =
957  categories_are_hp ?
958  std::upper_bound(category_size.begin(), category_size.end(), i) -
959  category_size.begin() :
960  0;
961  const std::array<unsigned int, 3> next{{category_hp, max_index, i}};
962  if (batch_with_comm[i / n_lanes])
963  batch_order_comm.emplace_back(next);
964  else
965  batch_order.emplace_back(next);
966  }
967 
968  std::sort(batch_order.begin(), batch_order.end());
969  std::sort(batch_order_comm.begin(), batch_order_comm.end());
970 
971  // Step 6: Put the cells with communication in the middle of the cell
972  // range. For the MPI case, we need three groups to enable overlap for
973  // communication and computation (part before comm, part with comm, part
974  // after comm), whereas we need one for the other case. And in each
975  // case, we allow for a slot of "ghosted" cells.
976  std::vector<unsigned int> blocks;
977  if (n_procs == 1)
978  {
979  if (batch_order.empty())
980  std::swap(batch_order_comm, batch_order);
981  Assert(batch_order_comm.empty(), ExcInternalError());
982  partition_row_index.resize(3);
983  blocks = {0, static_cast<unsigned int>(batch_order.size())};
984  }
985  else
986  {
987  partition_row_index.resize(5);
988  const unsigned int comm_begin = batch_order.size() / 2;
989  batch_order.insert(batch_order.begin() + comm_begin,
990  batch_order_comm.begin(),
991  batch_order_comm.end());
992  const unsigned int comm_end = comm_begin + batch_order_comm.size();
993  const unsigned int end = batch_order.size();
994  blocks = {0, comm_begin, comm_end, end};
995  }
996 
997  // Step 7: Fill in the data by batches for the locally owned cells.
998  const unsigned int n_cell_batches = batch_order.size();
999  const unsigned int n_ghost_batches =
1000  (n_ghost_cells + n_lanes - 1) / n_lanes;
1001  incompletely_filled_vectorization.resize(n_cell_batches +
1002  n_ghost_batches);
1003 
1004  cell_partition_data.clear();
1005  cell_partition_data.resize(1, 0);
1006 
1007  renumbering.clear();
1008  renumbering.resize(n_active_cells + n_ghost_cells,
1010 
1011  unsigned int counter = 0;
1012  for (unsigned int block = 0; block < blocks.size() - 1; ++block)
1013  {
1014  const unsigned int grain_size =
1015  std::max((2048U / dofs_per_cell) / 8 * 4, 2U);
1016  for (unsigned int k = blocks[block]; k < blocks[block + 1];
1017  k += grain_size)
1018  cell_partition_data.push_back(
1019  std::min(k + grain_size, blocks[block + 1]));
1020  partition_row_index[block + 1] = cell_partition_data.size() - 1;
1021 
1022  // Set the numbering according to the reordered temporary one
1023  for (unsigned int k = blocks[block]; k < blocks[block + 1]; ++k)
1024  {
1025  const unsigned int pos = batch_order[k][2];
1026  unsigned int j = 0;
1027  for (; j < n_lanes && temporary_numbering[pos + j] !=
1029  ++j)
1030  renumbering[counter++] = temporary_numbering[pos + j];
1031  if (j < n_lanes)
1032  incompletely_filled_vectorization[k] = j;
1033  }
1034  }
1035  AssertDimension(counter, n_active_cells);
1036 
1037  // Step 8: Treat the ghost cells
1038  for (unsigned int cell = n_active_cells;
1039  cell < n_active_cells + n_ghost_cells;
1040  ++cell)
1041  {
1042  if (!cell_vectorization_categories.empty())
1043  AssertDimension(cell_vectorization_categories[cell],
1044  cell_vectorization_categories[n_active_cells]);
1045  renumbering[cell] = cell;
1046  }
1047  if (n_ghost_cells % n_lanes)
1048  incompletely_filled_vectorization.back() = n_ghost_cells % n_lanes;
1049  cell_partition_data.push_back(n_cell_batches + n_ghost_batches);
1050  partition_row_index.back() = cell_partition_data.size() - 1;
1051 
1052 #ifdef DEBUG
1053  std::vector<unsigned int> renumber_cpy(renumbering);
1054  std::sort(renumber_cpy.begin(), renumber_cpy.end());
1055  for (unsigned int i = 0; i < renumber_cpy.size(); ++i)
1056  AssertDimension(i, renumber_cpy[i]);
1057 #endif
1058  }
1059 
1060 
1061 
1062  void
1064  const std::vector<unsigned int> &boundary_cells,
1065  std::vector<unsigned int> & renumbering,
1066  std::vector<unsigned char> & incompletely_filled_vectorization)
1067  {
1068  const unsigned int n_cell_batches =
1070  const unsigned int n_ghost_slots =
1072  incompletely_filled_vectorization.resize(n_cell_batches + n_ghost_slots);
1073  if (n_cell_batches * vectorization_length > n_active_cells)
1074  incompletely_filled_vectorization[n_cell_batches - 1] =
1076  (n_cell_batches * vectorization_length - n_active_cells);
1077  if (n_ghost_slots * vectorization_length > n_ghost_cells)
1078  incompletely_filled_vectorization[n_cell_batches + n_ghost_slots - 1] =
1080  (n_ghost_slots * vectorization_length - n_ghost_cells);
1081 
1082  std::vector<unsigned int> reverse_numbering(
1084  for (unsigned int j = 0; j < boundary_cells.size(); ++j)
1085  reverse_numbering[boundary_cells[j]] = j;
1086  unsigned int counter = boundary_cells.size();
1087  for (unsigned int j = 0; j < n_active_cells; ++j)
1088  if (reverse_numbering[j] == numbers::invalid_unsigned_int)
1089  reverse_numbering[j] = counter++;
1090 
1091  AssertDimension(counter, n_active_cells);
1092  renumbering = Utilities::invert_permutation(reverse_numbering);
1093 
1094  for (unsigned int j = n_active_cells; j < n_active_cells + n_ghost_cells;
1095  ++j)
1096  renumbering.push_back(j);
1097 
1098  // TODO: might be able to simplify this code by not relying on the cell
1099  // partition data while computing the thread graph
1100  cell_partition_data.clear();
1101  cell_partition_data.push_back(0);
1102  if (n_procs > 1)
1103  {
1104  const unsigned int n_macro_boundary_cells =
1105  (boundary_cells.size() + vectorization_length - 1) /
1107  cell_partition_data.push_back(
1108  (n_cell_batches - n_macro_boundary_cells) / 2);
1110  n_macro_boundary_cells);
1111  }
1112  else
1113  AssertDimension(boundary_cells.size(), 0);
1114  cell_partition_data.push_back(n_cell_batches);
1115  cell_partition_data.push_back(cell_partition_data.back() + n_ghost_slots);
1116  partition_row_index.resize(n_procs > 1 ? 4 : 2);
1117  partition_row_index[0] = 0;
1118  partition_row_index[1] = 1;
1119  if (n_procs > 1)
1120  {
1121  partition_row_index[2] = 2;
1122  partition_row_index[3] = 3;
1123  }
1124  }
1125 
1126 
1127 
1128  void
1129  TaskInfo::guess_block_size(const unsigned int dofs_per_cell)
1130  {
1131  // user did not say a positive number, so we have to guess
1132  if (block_size == 0)
1133  {
1134  // we would like to have enough work to do, so as first guess, try
1135  // to get 16 times as many chunks as we have threads on the system.
1138 
1139  // if there are too few degrees of freedom per cell, need to
1140  // increase the block size
1141  const unsigned int minimum_parallel_grain_size = 200;
1142  if (dofs_per_cell * block_size < minimum_parallel_grain_size)
1143  block_size = (minimum_parallel_grain_size / dofs_per_cell + 1);
1144  if (dofs_per_cell * block_size > 10000)
1145  block_size /= 4;
1146 
1147  block_size =
1148  1 << static_cast<unsigned int>(std::log2(block_size + 1));
1149  }
1150  if (block_size > n_active_cells)
1152  }
1153 
1154 
1155 
1156  void
1158  DynamicSparsityPattern & connectivity_large,
1159  std::vector<unsigned int> & renumbering,
1160  std::vector<unsigned char> &irregular_cells,
1161  const bool)
1162  {
1163  const unsigned int n_cell_batches = *(cell_partition_data.end() - 2);
1164  if (n_cell_batches == 0)
1165  return;
1166 
1168 
1169  unsigned int partition = 0, counter = 0;
1170 
1171  // Create connectivity graph for blocks based on connectivity graph for
1172  // cells.
1173  DynamicSparsityPattern connectivity(n_blocks, n_blocks);
1174  make_connectivity_cells_to_blocks(irregular_cells,
1175  connectivity_large,
1176  connectivity);
1177 
1178  // Create cell-block partitioning.
1179 
1180  // For each block of cells, this variable saves to which partitions the
1181  // block belongs. Initialize all to -1 to mark them as not yet assigned
1182  // a partition.
1183  std::vector<unsigned int> cell_partition(n_blocks,
1185 
1186  // In element j of this variable, one puts the old number of the block
1187  // that should be the jth block in the new numeration.
1188  std::vector<unsigned int> partition_list(n_blocks, 0);
1189  std::vector<unsigned int> partition_color_list(n_blocks, 0);
1190 
1191  // This vector points to the start of each partition.
1192  std::vector<unsigned int> partition_size(2, 0);
1193 
1194  // blocking_connectivity = true;
1195 
1196  // The cluster_size in make_partitioning defines that the no. of cells
1197  // in each partition should be a multiple of cluster_size.
1198  unsigned int cluster_size = 1;
1199 
1200  // Make the partitioning of the first layer of the blocks of cells.
1201  make_partitioning(connectivity,
1202  cluster_size,
1203  cell_partition,
1204  partition_list,
1205  partition_size,
1206  partition);
1207 
1208  // Color the cells within each partition
1210  partition,
1211  cell_partition,
1212  partition_list,
1213  partition_size,
1214  partition_color_list);
1215 
1216  partition_list = renumbering;
1217 
1218 #ifdef DEBUG
1219  // in debug mode, check that the partition color list is one-to-one
1220  {
1221  std::vector<unsigned int> sorted_pc_list(partition_color_list);
1222  std::sort(sorted_pc_list.begin(), sorted_pc_list.end());
1223  for (unsigned int i = 0; i < sorted_pc_list.size(); ++i)
1224  Assert(sorted_pc_list[i] == i, ExcInternalError());
1225  }
1226 #endif
1227 
1228  // set the start list for each block and compute the renumbering of
1229  // cells
1230  std::vector<unsigned int> block_start(n_cell_batches + 1);
1231  std::vector<unsigned char> irregular(n_cell_batches);
1232 
1233  unsigned int mcell_start = 0;
1234  block_start[0] = 0;
1235  for (unsigned int block = 0; block < n_blocks; block++)
1236  {
1237  block_start[block + 1] = block_start[block];
1238  for (unsigned int mcell = mcell_start;
1239  mcell < std::min(mcell_start + block_size, n_cell_batches);
1240  ++mcell)
1241  {
1242  unsigned int n_comp = (irregular_cells[mcell] > 0) ?
1243  irregular_cells[mcell] :
1245  block_start[block + 1] += n_comp;
1246  ++counter;
1247  }
1248  mcell_start += block_size;
1249  }
1250  counter = 0;
1251  unsigned int counter_macro = 0;
1252  unsigned int block_size_last =
1253  n_cell_batches - block_size * (n_blocks - 1);
1254  if (block_size_last == 0)
1255  block_size_last = block_size;
1256 
1257  unsigned int tick = 0;
1258  for (unsigned int block = 0; block < n_blocks; block++)
1259  {
1260  unsigned int present_block = partition_color_list[block];
1261  for (unsigned int cell = block_start[present_block];
1262  cell < block_start[present_block + 1];
1263  ++cell)
1264  renumbering[counter++] = partition_list[cell];
1265  unsigned int this_block_size =
1266  (present_block == n_blocks - 1) ? block_size_last : block_size;
1267 
1268  // Also re-compute the content of cell_partition_data to
1269  // contain the numbers of cells, not blocks
1270  if (cell_partition_data[tick] == block)
1271  cell_partition_data[tick++] = counter_macro;
1272 
1273  for (unsigned int j = 0; j < this_block_size; j++)
1274  irregular[counter_macro++] =
1275  irregular_cells[present_block * block_size + j];
1276  }
1277  AssertDimension(tick + 1, cell_partition_data.size());
1278  cell_partition_data.back() = counter_macro;
1279 
1280  irregular_cells.swap(irregular);
1281  AssertDimension(counter, n_active_cells);
1282  AssertDimension(counter_macro, n_cell_batches);
1283 
1284  // check that the renumbering is one-to-one
1285 #ifdef DEBUG
1286  {
1287  std::vector<unsigned int> sorted_renumbering(renumbering);
1288  std::sort(sorted_renumbering.begin(), sorted_renumbering.end());
1289  for (unsigned int i = 0; i < sorted_renumbering.size(); ++i)
1290  Assert(sorted_renumbering[i] == i, ExcInternalError());
1291  }
1292 #endif
1293 
1294 
1296  partition); // Actually sets too much for partition color case
1297 
1298  AssertDimension(cell_partition_data.back(), n_cell_batches);
1299  }
1300 
1301 
1302 
1303  void
1305  const std::vector<unsigned int> &cell_active_fe_index,
1306  DynamicSparsityPattern & connectivity,
1307  std::vector<unsigned int> & renumbering,
1308  std::vector<unsigned char> & irregular_cells,
1309  const bool hp_bool)
1310  {
1311  const unsigned int n_cell_batches = *(cell_partition_data.end() - 2);
1312  if (n_cell_batches == 0)
1313  return;
1314 
1316 
1317  // if we want to block before partitioning, create connectivity graph
1318  // for blocks based on connectivity graph for cells.
1319  DynamicSparsityPattern connectivity_blocks(n_blocks, n_blocks);
1320  make_connectivity_cells_to_blocks(irregular_cells,
1321  connectivity,
1322  connectivity_blocks);
1323 
1324  unsigned int n_blocks = 0;
1325  if (scheme == partition_color ||
1326  scheme == color) // blocking_connectivity == true
1327  n_blocks = this->n_blocks;
1328  else
1329  n_blocks = n_active_cells;
1330 
1331  // For each block of cells, this variable saves to which partitions the
1332  // block belongs. Initialize all to -1 to mark them as not yet assigned
1333  // a partition.
1334  std::vector<unsigned int> cell_partition(n_blocks,
1336 
1337  // In element j of this variable, one puts the old number (but after
1338  // renumbering according to the input renumbering) of the block that
1339  // should be the jth block in the new numeration.
1340  std::vector<unsigned int> partition_list(n_blocks, 0);
1341  std::vector<unsigned int> partition_2layers_list(n_blocks, 0);
1342 
1343  // This vector points to the start of each partition.
1344  std::vector<unsigned int> partition_size(2, 0);
1345 
1346  unsigned int partition = 0;
1347 
1348  // Within the partitions we want to be able to block for the case that
1349  // we do not block already in the connectivity. The cluster_size in
1350  // make_partitioning defines that the no. of cells in each partition
1351  // should be a multiple of cluster_size.
1352  unsigned int cluster_size = 1;
1353  if (scheme == partition_partition)
1354  cluster_size = block_size * vectorization_length;
1355 
1356  // Make the partitioning of the first layer of the blocks of cells.
1357  if (scheme == partition_color || scheme == color)
1358  make_partitioning(connectivity_blocks,
1359  cluster_size,
1360  cell_partition,
1361  partition_list,
1362  partition_size,
1363  partition);
1364  else
1365  make_partitioning(connectivity,
1366  cluster_size,
1367  cell_partition,
1368  partition_list,
1369  partition_size,
1370  partition);
1371 
1372  // Partition or color second layer
1373  if (scheme == partition_partition)
1374 
1375  {
1376  // Partition within partitions.
1378  connectivity,
1379  cell_active_fe_index,
1380  partition,
1381  cluster_size,
1382  hp_bool,
1383  cell_partition,
1384  partition_list,
1385  partition_size,
1386  partition_2layers_list,
1387  irregular_cells);
1388  }
1389  else if (scheme == partition_color || scheme == color)
1390  {
1391  make_coloring_within_partitions_pre_blocked(connectivity_blocks,
1392  partition,
1393  cell_partition,
1394  partition_list,
1395  partition_size,
1396  partition_2layers_list);
1397  }
1398 
1399  // in debug mode, check that the partition_2layers_list is one-to-one
1400 #ifdef DEBUG
1401  {
1402  std::vector<unsigned int> sorted_pc_list(partition_2layers_list);
1403  std::sort(sorted_pc_list.begin(), sorted_pc_list.end());
1404  for (unsigned int i = 0; i < sorted_pc_list.size(); ++i)
1405  Assert(sorted_pc_list[i] == i, ExcInternalError());
1406  }
1407 #endif
1408 
1409  // Set the new renumbering
1410  std::vector<unsigned int> renumbering_in(n_active_cells, 0);
1411  renumbering_in.swap(renumbering);
1412  if (scheme == partition_partition) // blocking_connectivity == false
1413  {
1414  // This is the simple case. The renumbering is just a combination of
1415  // the renumbering that we were given as an input and the
1416  // renumbering of partition/coloring given in partition_2layers_list
1417  for (unsigned int j = 0; j < renumbering.size(); j++)
1418  renumbering[j] = renumbering_in[partition_2layers_list[j]];
1419  // Account for the ghost cells, finally.
1420  for (unsigned int i = 0; i < n_ghost_cells; ++i)
1421  renumbering.push_back(i + n_active_cells);
1422  }
1423  else
1424  {
1425  // set the start list for each block and compute the renumbering of
1426  // cells
1427  std::vector<unsigned int> block_start(n_cell_batches + 1);
1428  std::vector<unsigned char> irregular(n_cell_batches);
1429 
1430  unsigned int counter = 0;
1431  unsigned int mcell_start = 0;
1432  block_start[0] = 0;
1433  for (unsigned int block = 0; block < n_blocks; block++)
1434  {
1435  block_start[block + 1] = block_start[block];
1436  for (unsigned int mcell = mcell_start;
1437  mcell < std::min(mcell_start + block_size, n_cell_batches);
1438  ++mcell)
1439  {
1440  unsigned int n_comp = (irregular_cells[mcell] > 0) ?
1441  irregular_cells[mcell] :
1443  block_start[block + 1] += n_comp;
1444  ++counter;
1445  }
1446  mcell_start += block_size;
1447  }
1448  counter = 0;
1449  unsigned int counter_macro = 0;
1450  unsigned int block_size_last =
1451  n_cell_batches - block_size * (n_blocks - 1);
1452  if (block_size_last == 0)
1453  block_size_last = block_size;
1454 
1455  unsigned int tick = 0;
1456  for (unsigned int block = 0; block < n_blocks; block++)
1457  {
1458  unsigned int present_block = partition_2layers_list[block];
1459  for (unsigned int cell = block_start[present_block];
1460  cell < block_start[present_block + 1];
1461  ++cell)
1462  renumbering[counter++] = renumbering_in[cell];
1463  unsigned int this_block_size =
1464  (present_block == n_blocks - 1) ? block_size_last : block_size;
1465 
1466  // Also re-compute the content of cell_partition_data to
1467  // contain the numbers of cells, not blocks
1468  if (cell_partition_data[tick] == block)
1469  cell_partition_data[tick++] = counter_macro;
1470 
1471  for (unsigned int j = 0; j < this_block_size; j++)
1472  irregular[counter_macro++] =
1473  irregular_cells[present_block * block_size + j];
1474  }
1475  AssertDimension(tick + 1, cell_partition_data.size());
1476  cell_partition_data.back() = counter_macro;
1477 
1478  irregular_cells.swap(irregular);
1479  AssertDimension(counter, n_active_cells);
1480  AssertDimension(counter_macro, n_cell_batches);
1481  // check that the renumbering is one-to-one
1482 #ifdef DEBUG
1483  {
1484  std::vector<unsigned int> sorted_renumbering(renumbering);
1485  std::sort(sorted_renumbering.begin(), sorted_renumbering.end());
1486  for (unsigned int i = 0; i < sorted_renumbering.size(); ++i)
1487  Assert(sorted_renumbering[i] == i, ExcInternalError());
1488  }
1489 #endif
1490  }
1491 
1492  // Update the task_info with the more information for the thread graph.
1494  }
1495 
1496 
1497 
1498  void
1500  const std::vector<unsigned int> &cell_active_fe_index,
1501  DynamicSparsityPattern & connectivity,
1502  std::vector<unsigned int> & renumbering,
1503  std::vector<unsigned char> & irregular_cells,
1504  const bool hp_bool)
1505  {
1506  const unsigned int n_cell_batches = *(cell_partition_data.end() - 2);
1507  if (n_cell_batches == 0)
1508  return;
1509 
1510  const unsigned int cluster_size = block_size * vectorization_length;
1511 
1512  // Create cell-block partitioning.
1513 
1514  // For each block of cells, this variable saves to which partitions the
1515  // block belongs. Initialize all to n_cell_batches to mark them as not
1516  // yet assigned a partition.
1517  std::vector<unsigned int> cell_partition(n_active_cells,
1519 
1520 
1521  // In element j of this variable, one puts the old number of the block
1522  // that should be the jth block in the new numeration.
1523  std::vector<unsigned int> partition_list(n_active_cells, 0);
1524  std::vector<unsigned int> partition_partition_list(n_active_cells, 0);
1525 
1526  // This vector points to the start of each partition.
1527  std::vector<unsigned int> partition_size(2, 0);
1528 
1529  unsigned int partition = 0;
1530  // Here, we do not block inside the connectivity graph
1531  // blocking_connectivity = false;
1532 
1533  // Make the partitioning of the first layer of the blocks of cells.
1534  make_partitioning(connectivity,
1535  cluster_size,
1536  cell_partition,
1537  partition_list,
1538  partition_size,
1539  partition);
1540 
1541  // Partition within partitions.
1543  cell_active_fe_index,
1544  partition,
1545  cluster_size,
1546  hp_bool,
1547  cell_partition,
1548  partition_list,
1549  partition_size,
1550  partition_partition_list,
1551  irregular_cells);
1552 
1553  partition_list.swap(renumbering);
1554 
1555  for (unsigned int j = 0; j < renumbering.size(); j++)
1556  renumbering[j] = partition_list[partition_partition_list[j]];
1557 
1558  for (unsigned int i = 0; i < n_ghost_cells; ++i)
1559  renumbering.push_back(i + n_active_cells);
1560 
1562  }
1563 
1564 
1565 
1566  void
1568  const std::vector<unsigned char> &irregular_cells,
1569  const DynamicSparsityPattern & connectivity_cells,
1570  DynamicSparsityPattern & connectivity_blocks) const
1571  {
1572  std::vector<std::vector<unsigned int>> cell_blocks(n_blocks);
1573  std::vector<unsigned int> touched_cells(n_active_cells);
1574  unsigned int cell = 0;
1575  for (unsigned int i = 0, mcell = 0; i < n_blocks; ++i)
1576  {
1577  for (unsigned int c = 0;
1578  c < block_size && mcell < *(cell_partition_data.end() - 2);
1579  ++c, ++mcell)
1580  {
1581  unsigned int ncomp = (irregular_cells[mcell] > 0) ?
1582  irregular_cells[mcell] :
1584  for (unsigned int c = 0; c < ncomp; ++c, ++cell)
1585  {
1586  cell_blocks[i].push_back(cell);
1587  touched_cells[cell] = i;
1588  }
1589  }
1590  }
1592  for (unsigned int i = 0; i < cell_blocks.size(); ++i)
1593  for (unsigned int col = 0; col < cell_blocks[i].size(); ++col)
1594  {
1596  connectivity_cells.begin(cell_blocks[i][col]);
1597  it != connectivity_cells.end(cell_blocks[i][col]);
1598  ++it)
1599  {
1600  if (touched_cells[it->column()] != i)
1601  connectivity_blocks.add(i, touched_cells[it->column()]);
1602  }
1603  }
1604  }
1605 
1606 
1607 
1608  // Function to create partitioning on the second layer within each
1609  // partition. Version without preblocking.
1610  void
1612  const DynamicSparsityPattern & connectivity,
1613  const std::vector<unsigned int> &cell_active_fe_index,
1614  const unsigned int partition,
1615  const unsigned int cluster_size,
1616  const bool hp_bool,
1617  const std::vector<unsigned int> &cell_partition,
1618  const std::vector<unsigned int> &partition_list,
1619  const std::vector<unsigned int> &partition_size,
1620  std::vector<unsigned int> & partition_partition_list,
1621  std::vector<unsigned char> & irregular_cells)
1622  {
1623  const unsigned int n_cell_batches = *(cell_partition_data.end() - 2);
1624  const unsigned int n_ghost_slots =
1625  *(cell_partition_data.end() - 1) - n_cell_batches;
1626 
1627  // List of cells in previous partition
1628  std::vector<unsigned int> neighbor_list;
1629  // List of cells in current partition for use as neighbors in next
1630  // partition
1631  std::vector<unsigned int> neighbor_neighbor_list;
1632 
1633  std::vector<unsigned int> renumbering(n_active_cells);
1634 
1635  irregular_cells.back() = 0;
1636  irregular_cells.resize(n_active_cells + n_ghost_slots);
1637 
1638  unsigned int max_fe_index = 0;
1639  for (const unsigned int fe_index : cell_active_fe_index)
1640  max_fe_index = std::max(fe_index, max_fe_index);
1641 
1642  Assert(!hp_bool || cell_active_fe_index.size() == n_active_cells,
1643  ExcInternalError());
1644 
1645  {
1646  unsigned int n_cell_batches_before = 0;
1647  // Create partitioning within partitions.
1648 
1649  // For each block of cells, this variable saves to which partitions
1650  // the block belongs. Initialize all to n_cell_batches to mark them as
1651  // not yet assigned a partition.
1652  std::vector<unsigned int> cell_partition_l2(
1654  partition_row_index.clear();
1655  partition_row_index.resize(partition + 1, 0);
1656  cell_partition_data.resize(1, 0);
1657 
1658  unsigned int counter = 0;
1659  unsigned int missing_macros;
1660  for (unsigned int part = 0; part < partition; ++part)
1661  {
1662  neighbor_neighbor_list.resize(0);
1663  neighbor_list.resize(0);
1664  bool work = true;
1665  unsigned int partition_l2 = 0;
1666  unsigned int start_up = partition_size[part];
1667  unsigned int partition_counter = 0;
1668  while (work)
1669  {
1670  if (neighbor_list.size() == 0)
1671  {
1672  work = false;
1673  partition_counter = 0;
1674  for (unsigned int j = start_up;
1675  j < partition_size[part + 1];
1676  ++j)
1677  if (cell_partition[partition_list[j]] == part &&
1678  cell_partition_l2[partition_list[j]] ==
1680  {
1681  start_up = j;
1682  work = true;
1683  partition_counter = 1;
1684  // To start up, set the start_up cell to partition
1685  // and list all its neighbors.
1686  AssertIndexRange(start_up, partition_size[part + 1]);
1687  cell_partition_l2[partition_list[start_up]] =
1688  partition_l2;
1689  neighbor_neighbor_list.push_back(
1690  partition_list[start_up]);
1691  partition_partition_list[counter++] =
1692  partition_list[start_up];
1693  start_up++;
1694  break;
1695  }
1696  }
1697  else
1698  {
1699  partition_counter = 0;
1700  for (const unsigned int neighbor : neighbor_list)
1701  {
1702  Assert(cell_partition[neighbor] == part,
1703  ExcInternalError());
1704  Assert(cell_partition_l2[neighbor] == partition_l2 - 1,
1705  ExcInternalError());
1706  auto neighbor_it = connectivity.begin(neighbor);
1707  const auto end_it = connectivity.end(neighbor);
1708  for (; neighbor_it != end_it; ++neighbor_it)
1709  {
1710  if (cell_partition[neighbor_it->column()] == part &&
1711  cell_partition_l2[neighbor_it->column()] ==
1713  {
1714  cell_partition_l2[neighbor_it->column()] =
1715  partition_l2;
1716  neighbor_neighbor_list.push_back(
1717  neighbor_it->column());
1718  partition_partition_list[counter++] =
1719  neighbor_it->column();
1720  partition_counter++;
1721  }
1722  }
1723  }
1724  }
1725  if (partition_counter > 0)
1726  {
1727  int index_before = neighbor_neighbor_list.size(),
1728  index = index_before;
1729  {
1730  // put the cells into separate lists for each FE index
1731  // within one partition-partition
1732  missing_macros = 0;
1733  std::vector<unsigned int> remaining_per_cell_batch(
1734  max_fe_index + 1);
1735  std::vector<std::vector<unsigned int>>
1736  renumbering_fe_index;
1737  unsigned int cell;
1738  bool filled = true;
1739  if (hp_bool == true)
1740  {
1741  renumbering_fe_index.resize(max_fe_index + 1);
1742  for (cell = counter - partition_counter;
1743  cell < counter;
1744  ++cell)
1745  {
1746  renumbering_fe_index
1747  [cell_active_fe_index.empty() ?
1748  0 :
1749  cell_active_fe_index
1750  [partition_partition_list[cell]]]
1751  .push_back(partition_partition_list[cell]);
1752  }
1753  // check how many more cells are needed in the lists
1754  for (unsigned int j = 0; j < max_fe_index + 1; j++)
1755  {
1756  remaining_per_cell_batch[j] =
1757  renumbering_fe_index[j].size() %
1759  if (remaining_per_cell_batch[j] != 0)
1760  filled = false;
1761  missing_macros +=
1762  ((renumbering_fe_index[j].size() +
1763  vectorization_length - 1) /
1765  }
1766  }
1767  else
1768  {
1769  remaining_per_cell_batch.resize(1);
1770  remaining_per_cell_batch[0] =
1771  partition_counter % vectorization_length;
1772  missing_macros =
1773  partition_counter / vectorization_length;
1774  if (remaining_per_cell_batch[0] != 0)
1775  {
1776  filled = false;
1777  missing_macros++;
1778  }
1779  }
1780  missing_macros =
1781  cluster_size - (missing_macros % cluster_size);
1782 
1783  // now we realized that there are some cells missing.
1784  while (missing_macros > 0 || filled == false)
1785  {
1786  if (index == 0)
1787  {
1788  index = neighbor_neighbor_list.size();
1789  if (index == index_before)
1790  {
1791  if (missing_macros != 0)
1792  {
1793  neighbor_neighbor_list.resize(0);
1794  }
1795  start_up--;
1796  break; // not connected - start again
1797  }
1798  index_before = index;
1799  }
1800  index--;
1801  unsigned int additional =
1802  neighbor_neighbor_list[index];
1803 
1804  // go through the neighbors of the last cell in the
1805  // current partition and check if we find some to
1806  // fill up with.
1808  connectivity.begin(
1809  additional),
1810  end =
1811  connectivity.end(
1812  additional);
1813  for (; neighbor != end; ++neighbor)
1814  {
1815  if (cell_partition[neighbor->column()] == part &&
1816  cell_partition_l2[neighbor->column()] ==
1818  {
1819  unsigned int this_index = 0;
1820  if (hp_bool == true)
1821  this_index =
1822  cell_active_fe_index.empty() ?
1823  0 :
1824  cell_active_fe_index[neighbor
1825  ->column()];
1826 
1827  // Only add this cell if we need more macro
1828  // cells in the current block or if there is
1829  // a macro cell with the FE index that is
1830  // not yet fully populated
1831  if (missing_macros > 0 ||
1832  remaining_per_cell_batch[this_index] > 0)
1833  {
1834  cell_partition_l2[neighbor->column()] =
1835  partition_l2;
1836  neighbor_neighbor_list.push_back(
1837  neighbor->column());
1838  if (hp_bool == true)
1839  renumbering_fe_index[this_index]
1840  .push_back(neighbor->column());
1841  partition_partition_list[counter] =
1842  neighbor->column();
1843  counter++;
1844  partition_counter++;
1845  if (remaining_per_cell_batch
1846  [this_index] == 0 &&
1847  missing_macros > 0)
1848  missing_macros--;
1849  remaining_per_cell_batch[this_index]++;
1850  if (remaining_per_cell_batch
1851  [this_index] ==
1853  {
1854  remaining_per_cell_batch[this_index] =
1855  0;
1856  }
1857  if (missing_macros == 0)
1858  {
1859  filled = true;
1860  for (unsigned int fe_ind = 0;
1861  fe_ind < max_fe_index + 1;
1862  ++fe_ind)
1863  if (remaining_per_cell_batch
1864  [fe_ind] != 0)
1865  filled = false;
1866  }
1867  if (filled == true)
1868  break;
1869  }
1870  }
1871  }
1872  }
1873  if (hp_bool == true)
1874  {
1875  // set the renumbering according to their active FE
1876  // index within one partition-partition which was
1877  // implicitly assumed above
1878  cell = counter - partition_counter;
1879  for (unsigned int j = 0; j < max_fe_index + 1; j++)
1880  {
1881  for (const unsigned int jj :
1882  renumbering_fe_index[j])
1883  renumbering[cell++] = jj;
1884  if (renumbering_fe_index[j].size() %
1886  0)
1887  irregular_cells[renumbering_fe_index[j].size() /
1889  n_cell_batches_before] =
1890  renumbering_fe_index[j].size() %
1892  n_cell_batches_before +=
1893  (renumbering_fe_index[j].size() +
1894  vectorization_length - 1) /
1896  renumbering_fe_index[j].resize(0);
1897  }
1898  }
1899  else
1900  {
1901  n_cell_batches_before +=
1902  partition_counter / vectorization_length;
1903  if (partition_counter % vectorization_length != 0)
1904  {
1905  irregular_cells[n_cell_batches_before] =
1906  partition_counter % vectorization_length;
1907  n_cell_batches_before++;
1908  }
1909  }
1910  }
1911  cell_partition_data.push_back(n_cell_batches_before);
1912  partition_l2++;
1913  }
1914  neighbor_list = neighbor_neighbor_list;
1915  neighbor_neighbor_list.resize(0);
1916  }
1917  partition_row_index[part + 1] =
1918  partition_row_index[part] + partition_l2;
1919  }
1920  }
1921  if (hp_bool == true)
1922  {
1923  partition_partition_list.swap(renumbering);
1924  }
1925  }
1926 
1927 
1928 
1929  // Function to create coloring on the second layer within each partition.
1930  // Version assumes preblocking.
1931  void
1933  const DynamicSparsityPattern & connectivity,
1934  const unsigned int partition,
1935  const std::vector<unsigned int> &cell_partition,
1936  const std::vector<unsigned int> &partition_list,
1937  const std::vector<unsigned int> &partition_size,
1938  std::vector<unsigned int> & partition_color_list)
1939  {
1940  const unsigned int n_cell_batches = *(cell_partition_data.end() - 2);
1941  std::vector<unsigned int> cell_color(n_blocks, n_cell_batches);
1942  std::vector<bool> color_finder;
1943 
1944  partition_row_index.resize(partition + 1);
1945  cell_partition_data.clear();
1946  unsigned int color_counter = 0, index_counter = 0;
1947  for (unsigned int part = 0; part < partition; part++)
1948  {
1949  partition_row_index[part] = index_counter;
1950  unsigned int max_color = 0;
1951  for (unsigned int k = partition_size[part];
1952  k < partition_size[part + 1];
1953  k++)
1954  {
1955  unsigned int cell = partition_list[k];
1956  unsigned int n_neighbors = connectivity.row_length(cell);
1957 
1958  // In the worst case, each neighbor has a different color. So we
1959  // find at least one available color between 0 and n_neighbors.
1960  color_finder.resize(n_neighbors + 1);
1961  for (unsigned int j = 0; j <= n_neighbors; ++j)
1962  color_finder[j] = true;
1964  connectivity.begin(cell),
1965  end = connectivity.end(cell);
1966  for (; neighbor != end; ++neighbor)
1967  {
1968  // Mark the color that a neighbor within the partition has
1969  // as taken
1970  if (cell_partition[neighbor->column()] == part &&
1971  cell_color[neighbor->column()] <= n_neighbors)
1972  color_finder[cell_color[neighbor->column()]] = false;
1973  }
1974  // Choose the smallest color that is not taken for the block
1975  cell_color[cell] = 0;
1976  while (color_finder[cell_color[cell]] == false)
1977  cell_color[cell]++;
1978  if (cell_color[cell] > max_color)
1979  max_color = cell_color[cell];
1980  }
1981  // Reorder within partition: First, all blocks that belong the 0 and
1982  // then so on until those with color max (Note that the smaller the
1983  // number the larger the partition)
1984  for (unsigned int color = 0; color <= max_color; color++)
1985  {
1986  cell_partition_data.push_back(color_counter);
1987  index_counter++;
1988  for (unsigned int k = partition_size[part];
1989  k < partition_size[part + 1];
1990  k++)
1991  {
1992  unsigned int cell = partition_list[k];
1993  if (cell_color[cell] == color)
1994  {
1995  partition_color_list[color_counter++] = cell;
1996  }
1997  }
1998  }
1999  }
2000  cell_partition_data.push_back(n_blocks);
2001  partition_row_index[partition] = index_counter;
2002  AssertDimension(color_counter, n_blocks);
2003  }
2004 
2005 
2006  // Function to create partitioning on the first layer.
2007  void
2009  const unsigned int cluster_size,
2010  std::vector<unsigned int> & cell_partition,
2011  std::vector<unsigned int> & partition_list,
2012  std::vector<unsigned int> & partition_size,
2013  unsigned int & partition) const
2014 
2015  {
2016  // For each block of cells, this variable saves to which partitions the
2017  // block belongs. Initialize all to n_cell_batches to mark them as not
2018  // yet assigned a partition.
2019  // std::vector<unsigned int> cell_partition (n_active_cells,
2020  // numbers::invalid_unsigned_int);
2021  // List of cells in previous partition
2022  std::vector<unsigned int> neighbor_list;
2023  // List of cells in current partition for use as neighbors in next
2024  // partition
2025  std::vector<unsigned int> neighbor_neighbor_list;
2026 
2027  // In element j of this variable, one puts the old number of the block
2028  // that should be the jth block in the new numeration.
2029  // std::vector<unsigned int> partition_list(n_active_cells,0);
2030 
2031  // This vector points to the start of each partition.
2032  // std::vector<unsigned int> partition_size(2,0);
2033 
2034  partition = 0;
2035  unsigned int counter = 0;
2036  unsigned int start_nonboundary =
2037  cell_partition_data.size() == 5 ?
2040  0;
2041 
2042  const unsigned int n_cell_batches = *(cell_partition_data.end() - 2);
2043  if (n_cell_batches == 0)
2044  return;
2045  if (scheme == color)
2046  start_nonboundary = n_cell_batches;
2047  if (scheme == partition_color ||
2048  scheme == color) // blocking_connectivity == true
2049  start_nonboundary = ((start_nonboundary + block_size - 1) / block_size);
2050  unsigned int n_blocks;
2051  if (scheme == partition_color ||
2052  scheme == color) // blocking_connectivity == true
2053  n_blocks = this->n_blocks;
2054  else
2055  n_blocks = n_active_cells;
2056 
2057  if (start_nonboundary > n_blocks)
2058  start_nonboundary = n_blocks;
2059 
2060 
2061  unsigned int start_up = 0;
2062  bool work = true;
2063  unsigned int remainder = cluster_size;
2064 
2065  // this performs a classical breath-first search in the connectivity
2066  // graph of the cells under the restriction that the size of the
2067  // partitions should be a multiple of the given block size
2068  while (work)
2069  {
2070  // put the cells with neighbors on remote MPI processes up front
2071  if (start_nonboundary > 0)
2072  {
2073  for (unsigned int cell = 0; cell < start_nonboundary; ++cell)
2074  {
2075  const unsigned int cell_nn = cell;
2076  cell_partition[cell_nn] = partition;
2077  neighbor_list.push_back(cell_nn);
2078  partition_list[counter++] = cell_nn;
2079  partition_size.back()++;
2080  }
2081  start_nonboundary = 0;
2082  remainder -= (start_nonboundary % cluster_size);
2083  if (remainder == cluster_size)
2084  remainder = 0;
2085  }
2086  else
2087  {
2088  // To start up, set the start_up cell to partition and list all
2089  // its neighbors.
2090  cell_partition[start_up] = partition;
2091  neighbor_list.push_back(start_up);
2092  partition_list[counter++] = start_up;
2093  partition_size.back()++;
2094  start_up++;
2095  remainder--;
2096  if (remainder == cluster_size)
2097  remainder = 0;
2098  }
2099  int index_before = neighbor_list.size(), index = index_before,
2100  index_stop = 0;
2101  while (remainder > 0)
2102  {
2103  if (index == index_stop)
2104  {
2105  index = neighbor_list.size();
2106  if (index == index_before)
2107  {
2108  neighbor_list.resize(0);
2109  goto not_connect;
2110  }
2111  index_stop = index_before;
2112  index_before = index;
2113  }
2114  index--;
2115  unsigned int additional = neighbor_list[index];
2117  connectivity.begin(additional),
2118  end =
2119  connectivity.end(additional);
2120  for (; neighbor != end; ++neighbor)
2121  {
2122  if (cell_partition[neighbor->column()] ==
2124  {
2125  partition_size.back()++;
2126  cell_partition[neighbor->column()] = partition;
2127  neighbor_list.push_back(neighbor->column());
2128  partition_list[counter++] = neighbor->column();
2129  remainder--;
2130  if (remainder == 0)
2131  break;
2132  }
2133  }
2134  }
2135 
2136  while (neighbor_list.size() > 0)
2137  {
2138  partition++;
2139 
2140  // counter for number of cells so far in current partition
2141  unsigned int partition_counter = 0;
2142 
2143  // Mark the start of the new partition
2144  partition_size.push_back(partition_size.back());
2145 
2146  // Loop through the list of cells in previous partition and put
2147  // all their neighbors in current partition
2148  for (const unsigned int cell : neighbor_list)
2149  {
2150  Assert(cell_partition[cell] == partition - 1,
2151  ExcInternalError());
2152  auto neighbor = connectivity.begin(cell);
2153  const auto end = connectivity.end(cell);
2154  for (; neighbor != end; ++neighbor)
2155  {
2156  if (cell_partition[neighbor->column()] ==
2158  {
2159  partition_size.back()++;
2160  cell_partition[neighbor->column()] = partition;
2161 
2162  // collect the cells of the current partition for
2163  // use as neighbors in next partition
2164  neighbor_neighbor_list.push_back(neighbor->column());
2165  partition_list[counter++] = neighbor->column();
2166  partition_counter++;
2167  }
2168  }
2169  }
2170  remainder = cluster_size - (partition_counter % cluster_size);
2171  if (remainder == cluster_size)
2172  remainder = 0;
2173  int index_stop = 0;
2174  int index_before = neighbor_neighbor_list.size(),
2175  index = index_before;
2176  while (remainder > 0)
2177  {
2178  if (index == index_stop)
2179  {
2180  index = neighbor_neighbor_list.size();
2181  if (index == index_before)
2182  {
2183  neighbor_neighbor_list.resize(0);
2184  break;
2185  }
2186  index_stop = index_before;
2187  index_before = index;
2188  }
2189  index--;
2190  unsigned int additional = neighbor_neighbor_list[index];
2192  connectivity.begin(
2193  additional),
2194  end = connectivity.end(
2195  additional);
2196  for (; neighbor != end; ++neighbor)
2197  {
2198  if (cell_partition[neighbor->column()] ==
2200  {
2201  partition_size.back()++;
2202  cell_partition[neighbor->column()] = partition;
2203  neighbor_neighbor_list.push_back(neighbor->column());
2204  partition_list[counter++] = neighbor->column();
2205  remainder--;
2206  if (remainder == 0)
2207  break;
2208  }
2209  }
2210  }
2211 
2212  neighbor_list = neighbor_neighbor_list;
2213  neighbor_neighbor_list.resize(0);
2214  }
2215  not_connect:
2216  // One has to check if the graph is not connected so we have to find
2217  // another partition.
2218  work = false;
2219  for (unsigned int j = start_up; j < n_blocks; ++j)
2220  if (cell_partition[j] == numbers::invalid_unsigned_int)
2221  {
2222  start_up = j;
2223  work = true;
2224  if (remainder == 0)
2225  remainder = cluster_size;
2226  break;
2227  }
2228  }
2229  if (remainder != 0)
2230  partition++;
2231 
2232  AssertDimension(partition_size[partition], n_blocks);
2233  }
2234 
2235 
2236  void
2238  {
2239  evens = (partition + 1) / 2;
2240  odds = partition / 2;
2241  n_blocked_workers = odds - (odds + evens + 1) % 2;
2243  // From here only used for partition partition option.
2244  partition_evens.resize(partition);
2245  partition_odds.resize(partition);
2248  for (unsigned int part = 0; part < partition; part++)
2249  {
2250  partition_evens[part] =
2251  (partition_row_index[part + 1] - partition_row_index[part] + 1) / 2;
2252  partition_odds[part] =
2253  (partition_row_index[part + 1] - partition_row_index[part]) / 2;
2255  partition_odds[part] -
2256  (partition_odds[part] + partition_evens[part] + 1) % 2;
2257  partition_n_workers[part] = partition_evens[part] +
2258  partition_odds[part] -
2260  }
2261  }
2262  } // namespace MatrixFreeFunctions
2263 } // namespace internal
2264 
2265 
2266 
2267 // explicit instantiations of template functions
2268 template void
2269 internal::MatrixFreeFunctions::TaskInfo::print_memory_statistics<std::ostream>(
2270  std::ostream &,
2271  const std::size_t) const;
2272 template void
2274  ConditionalOStream>(ConditionalOStream &, const std::size_t) const;
2275 
2276 
static unsigned int n_threads()
MPICommunication(MFWorkerInterface &worker_in, const bool do_compress)
Definition: task_info.cc:305
CellWork(MFWorkerInterface &worker_in, const TaskInfo &task_info_in, const unsigned int partition_in)
Definition: task_info.cc:227
void operator()(const tbb::blocked_range< unsigned int > &r) const
Definition: task_info.cc:236
PartitionWork(MFWorkerInterface &worker_in, const unsigned int partition_in, const TaskInfo &task_info_in, const bool is_blocked_in)
Definition: task_info.cc:263
ActualCellWork(MFWorkerInterface **worker_pointer, const unsigned int partition, const TaskInfo &task_info)
Definition: task_info.cc:56
ActualCellWork(MFWorkerInterface &worker, const unsigned int partition, const TaskInfo &task_info)
Definition: task_info.cc:65
CellWork(MFWorkerInterface &worker, const unsigned int partition, const TaskInfo &task_info, const bool is_blocked)
Definition: task_info.cc:99
PartitionWork(MFWorkerInterface &function_in, const unsigned int partition_in, const TaskInfo &task_info_in, const bool is_blocked_in=false)
Definition: task_info.cc:130
#define DEAL_II_NAMESPACE_OPEN
Definition: config.h:396
#define DEAL_II_NAMESPACE_CLOSE
Definition: config.h:397
Point< 2 > first
Definition: grid_out.cc:4587
static ::ExceptionBase & ExcInternalError()
#define Assert(cond, exc)
Definition: exceptions.h:1465
static ::ExceptionBase & ExcNotImplemented()
#define AssertDimension(dim1, dim2)
Definition: exceptions.h:1622
#define AssertIndexRange(index, range)
Definition: exceptions.h:1690
#define AssertThrow(cond, exc)
Definition: exceptions.h:1575
size_type row_length(const size_type row) const
void add(const size_type i, const size_type j)
std::enable_if< std::is_fundamental< T >::value, std::size_t >::type memory_consumption(const T &t)
void swap(MemorySpaceData< Number, MemorySpace > &, MemorySpaceData< Number, MemorySpace > &)
SymmetricTensor< 2, dim, Number > e(const Tensor< 2, dim, Number > &F)
void partition(const SparsityPattern &sparsity_pattern, const unsigned int n_partitions, std::vector< unsigned int > &partition_indices, const Partitioner partitioner=Partitioner::metis)
VectorType::value_type * end(VectorType &V)
MinMaxAvg min_max_avg(const double my_value, const MPI_Comm &mpi_communicator)
Definition: mpi.cc:91
Iterator lower_bound(Iterator first, Iterator last, const T &val)
Definition: utilities.h:1111
std::vector< Integer > invert_permutation(const std::vector< Integer > &permutation)
Definition: utilities.h:1482
unsigned int minimum_parallel_grain_size
Definition: parallel.cc:34
static const unsigned int invalid_unsigned_int
Definition: types.h:196
void parallel_for(Iterator x_begin, Iterator x_end, const Functor &functor, const unsigned int grainsize)
Definition: parallel.h:158
virtual void face(const unsigned int range_index)=0
virtual void zero_dst_vector_range(const unsigned int range_index)=0
virtual void cell(const std::pair< unsigned int, unsigned int > &cell_range)=0
virtual void boundary(const unsigned int range_index)=0
virtual void vector_compress_start()=0
Starts the communication for the vector compress operation.
virtual void cell_loop_post_range(const unsigned int range_index)=0
virtual void vector_update_ghosts_start()=0
Starts the communication for the update ghost values operation.
virtual void cell_loop_pre_range(const unsigned int range_index)=0
virtual void vector_update_ghosts_finish()=0
Finishes the communication for the update ghost values operation.
virtual void vector_compress_finish()=0
Finishes the communication for the vector compress operation.
std::size_t memory_consumption() const
Definition: task_info.cc:685
std::vector< unsigned int > boundary_partition_data
Definition: task_info.h:518
void loop(MFWorkerInterface &worker) const
Definition: task_info.cc:330
void make_thread_graph_partition_color(DynamicSparsityPattern &connectivity, std::vector< unsigned int > &renumbering, std::vector< unsigned char > &irregular_cells, const bool hp_bool)
Definition: task_info.cc:1157
std::vector< unsigned int > partition_n_workers
Definition: task_info.h:576
void create_blocks_serial(const std::vector< unsigned int > &cells_with_comm, const unsigned int dofs_per_cell, const bool categories_are_hp, const std::vector< unsigned int > &cell_vectorization_categories, const bool cell_vectorization_categories_strict, const std::vector< unsigned int > &parent_relation, std::vector< unsigned int > &renumbering, std::vector< unsigned char > &incompletely_filled_vectorization)
Definition: task_info.cc:774
void make_connectivity_cells_to_blocks(const std::vector< unsigned char > &irregular_cells, const DynamicSparsityPattern &connectivity_cells, DynamicSparsityPattern &connectivity_blocks) const
Definition: task_info.cc:1567
void make_partitioning_within_partitions_post_blocked(const DynamicSparsityPattern &connectivity, const std::vector< unsigned int > &cell_active_fe_index, const unsigned int partition, const unsigned int cluster_size, const bool hp_bool, const std::vector< unsigned int > &cell_partition, const std::vector< unsigned int > &partition_list, const std::vector< unsigned int > &partition_size, std::vector< unsigned int > &partition_partition_list, std::vector< unsigned char > &irregular_cells)
Definition: task_info.cc:1611
void initial_setup_blocks_tasks(const std::vector< unsigned int > &boundary_cells, std::vector< unsigned int > &renumbering, std::vector< unsigned char > &incompletely_filled_vectorization)
Definition: task_info.cc:1063
void print_memory_statistics(StreamType &out, std::size_t data_length) const
Definition: task_info.cc:670
void make_coloring_within_partitions_pre_blocked(const DynamicSparsityPattern &connectivity, const unsigned int partition, const std::vector< unsigned int > &cell_partition, const std::vector< unsigned int > &partition_list, const std::vector< unsigned int > &partition_size, std::vector< unsigned int > &partition_color_list)
Definition: task_info.cc:1932
std::vector< unsigned int > partition_row_index
Definition: task_info.h:466
void make_thread_graph_partition_partition(const std::vector< unsigned int > &cell_active_fe_index, DynamicSparsityPattern &connectivity, std::vector< unsigned int > &renumbering, std::vector< unsigned char > &irregular_cells, const bool hp_bool)
Definition: task_info.cc:1499
std::vector< unsigned int > partition_evens
Definition: task_info.h:558
void make_thread_graph(const std::vector< unsigned int > &cell_active_fe_index, DynamicSparsityPattern &connectivity, std::vector< unsigned int > &renumbering, std::vector< unsigned char > &irregular_cells, const bool hp_bool)
Definition: task_info.cc:1304
std::vector< unsigned int > partition_n_blocked_workers
Definition: task_info.h:570
std::vector< unsigned int > cell_partition_data
Definition: task_info.h:474
void make_partitioning(const DynamicSparsityPattern &connectivity, const unsigned int cluster_size, std::vector< unsigned int > &cell_partition, std::vector< unsigned int > &partition_list, std::vector< unsigned int > &partition_size, unsigned int &partition) const
Definition: task_info.cc:2008
void update_task_info(const unsigned int partition)
Definition: task_info.cc:2237
void make_boundary_cells_divisible(std::vector< unsigned int > &boundary_cells)
Definition: task_info.cc:702
void guess_block_size(const unsigned int dofs_per_cell)
Definition: task_info.cc:1129
std::vector< unsigned int > partition_odds
Definition: task_info.h:564
std::vector< unsigned int > face_partition_data
Definition: task_info.h:496