Reference documentation for deal.II version 9.3.2
\(\newcommand{\dealvcentcolon}{\mathrel{\mathop{:}}}\) \(\newcommand{\dealcoloneq}{\dealvcentcolon\mathrel{\mkern-1.2mu}=}\) \(\newcommand{\jump}[1]{\left[\!\left[ #1 \right]\!\right]}\) \(\newcommand{\average}[1]{\left\{\!\left\{ #1 \right\}\!\right\}}\)
dof_info.cc
Go to the documentation of this file.
1 // ---------------------------------------------------------------------
2 //
3 // Copyright (C) 2020 by the deal.II authors
4 //
5 // This file is part of the deal.II library.
6 //
7 // The deal.II library is free software; you can use it, redistribute
8 // it, and/or modify it under the terms of the GNU Lesser General
9 // Public License as published by the Free Software Foundation; either
10 // version 2.1 of the License, or (at your option) any later version.
11 // The full text of the license can be found in the file LICENSE.md at
12 // the top level directory of deal.II.
13 //
14 // ---------------------------------------------------------------------
15 
16 
18 #include <deal.II/base/utilities.h>
20 
21 #include <deal.II/matrix_free/dof_info.templates.h>
22 
23 #include <iostream>
24 
26 
27 namespace internal
28 {
29  namespace MatrixFreeFunctions
30  {
32  {
33  clear();
34  }
35 
36 
37 
38  void
40  {
41  row_starts.clear();
42  dof_indices.clear();
43  constraint_indicator.clear();
44  vector_partitioner.reset();
45  ghost_dofs.clear();
46  dofs_per_cell.clear();
47  dofs_per_face.clear();
49  dimension = 2;
51  n_base_elements = 0;
52  n_components.clear();
53  start_components.clear();
55  plain_dof_indices.clear();
57  for (unsigned int i = 0; i < 3; ++i)
58  {
59  index_storage_variants[i].clear();
60  dof_indices_contiguous[i].clear();
63  }
64  store_plain_indices = false;
65  cell_active_fe_index.clear();
66  max_fe_index = 0;
67  fe_index_conversion.clear();
68  }
69 
70 
71 
72  void
73  DoFInfo::get_dof_indices_on_cell_batch(std::vector<unsigned int> &my_rows,
74  const unsigned int cell,
75  const bool apply_constraints) const
76  {
77  const unsigned int n_fe_components = start_components.back();
78  const unsigned int fe_index =
79  dofs_per_cell.size() == 1 ? 0 : cell_active_fe_index[cell];
80  const unsigned int dofs_this_cell = dofs_per_cell[fe_index];
81 
82  const unsigned int n_vectorization = vectorization_length;
83  constexpr auto dof_access_index = dof_access_cell;
84  AssertIndexRange(cell,
85  n_vectorization_lanes_filled[dof_access_index].size());
86  const unsigned int n_vectorization_actual =
87  n_vectorization_lanes_filled[dof_access_index][cell];
88 
89  // we might have constraints, so the final number
90  // of indices is not known a priori.
91  // conservatively reserve the maximum without constraints
92  my_rows.reserve(n_vectorization * dofs_this_cell);
93  my_rows.resize(0);
94  unsigned int total_size = 0;
95  for (unsigned int v = 0; v < n_vectorization_actual; ++v)
96  {
97  const unsigned int ib =
98  (cell * n_vectorization + v) * n_fe_components;
99  const unsigned int ie =
100  (cell * n_vectorization + v + 1) * n_fe_components;
101 
102  // figure out constraints by comparing constraint_indicator row
103  // shift for this cell within the block as compared to the next
104  // one
105  const bool has_constraints =
106  row_starts[ib].second != row_starts[ib + n_fe_components].second;
107 
108  auto do_copy = [&](const unsigned int *begin,
109  const unsigned int *end) {
110  const unsigned int shift = total_size;
111  total_size += (end - begin);
112  my_rows.resize(total_size);
113  std::copy(begin, end, my_rows.begin() + shift);
114  };
115 
116  if (!has_constraints || apply_constraints)
117  {
118  const unsigned int *begin =
119  dof_indices.data() + row_starts[ib].first;
120  const unsigned int *end =
121  dof_indices.data() + row_starts[ie].first;
122  do_copy(begin, end);
123  }
124  else
125  {
126  Assert(row_starts_plain_indices[cell * n_vectorization + v] !=
129  const unsigned int *begin =
130  plain_dof_indices.data() +
131  row_starts_plain_indices[cell * n_vectorization + v];
132  const unsigned int *end = begin + dofs_this_cell;
133  do_copy(begin, end);
134  }
135  }
136  }
137 
138 
139 
140  void
141  DoFInfo::assign_ghosts(const std::vector<unsigned int> &boundary_cells,
142  const MPI_Comm & communicator_sm,
143  const bool use_vector_data_exchanger_full)
144  {
145  Assert(boundary_cells.size() < row_starts.size(), ExcInternalError());
146 
147  // sort ghost dofs and compress out duplicates
148  const unsigned int n_owned = (vector_partitioner->local_range().second -
149  vector_partitioner->local_range().first);
150  const std::size_t n_ghosts = ghost_dofs.size();
151 #ifdef DEBUG
152  for (const auto dof_index : dof_indices)
153  AssertIndexRange(dof_index, n_owned + n_ghosts);
154 #endif
155 
156  const unsigned int n_components = start_components.back();
157  std::vector<unsigned int> ghost_numbering(n_ghosts);
158  IndexSet ghost_indices(vector_partitioner->size());
159  if (n_ghosts > 0)
160  {
161  unsigned int n_unique_ghosts = 0;
162  // since we need to go back to the local_to_global indices and
163  // replace the temporary numbering of ghosts by the real number in
164  // the index set, we need to store these values
165  std::vector<std::pair<types::global_dof_index, unsigned int>>
166  ghost_origin(n_ghosts);
167  for (std::size_t i = 0; i < n_ghosts; ++i)
168  {
169  ghost_origin[i].first = ghost_dofs[i];
170  ghost_origin[i].second = i;
171  }
172  std::sort(ghost_origin.begin(), ghost_origin.end());
173 
174  types::global_dof_index last_contiguous_start = ghost_origin[0].first;
175  ghost_numbering[ghost_origin[0].second] = 0;
176  for (std::size_t i = 1; i < n_ghosts; i++)
177  {
178  if (ghost_origin[i].first > ghost_origin[i - 1].first + 1)
179  {
180  ghost_indices.add_range(last_contiguous_start,
181  ghost_origin[i - 1].first + 1);
182  last_contiguous_start = ghost_origin[i].first;
183  }
184  if (ghost_origin[i].first > ghost_origin[i - 1].first)
185  ++n_unique_ghosts;
186  ghost_numbering[ghost_origin[i].second] = n_unique_ghosts;
187  }
188  ++n_unique_ghosts;
189  ghost_indices.add_range(last_contiguous_start,
190  ghost_origin.back().first + 1);
191  ghost_indices.compress();
192 
193  // make sure that we got the correct local numbering of the ghost
194  // dofs. the ghost index set should store the same number
195  {
196  AssertDimension(n_unique_ghosts, ghost_indices.n_elements());
197  for (std::size_t i = 0; i < n_ghosts; ++i)
198  Assert(ghost_numbering[i] ==
199  ghost_indices.index_within_set(ghost_dofs[i]),
200  ExcInternalError());
201  }
202 
203  // apply correct numbering for ghost indices: We previously just
204  // enumerated them according to their appearance in the
205  // local_to_global structure. Above, we derived a relation between
206  // this enumeration and the actual number
207  const unsigned int n_boundary_cells = boundary_cells.size();
208  for (unsigned int i = 0; i < n_boundary_cells; ++i)
209  {
210  unsigned int *data_ptr =
211  dof_indices.data() +
212  row_starts[boundary_cells[i] * n_components].first;
213  const unsigned int *row_end =
214  dof_indices.data() +
215  row_starts[(boundary_cells[i] + 1) * n_components].first;
216  for (; data_ptr != row_end; ++data_ptr)
217  *data_ptr = ((*data_ptr < n_owned) ?
218  *data_ptr :
219  n_owned + ghost_numbering[*data_ptr - n_owned]);
220 
221  // now the same procedure for plain indices
222  if (store_plain_indices == true)
223  {
224  if (row_starts[boundary_cells[i] * n_components].second !=
225  row_starts[(boundary_cells[i] + 1) * n_components].second)
226  {
227  unsigned int *data_ptr =
228  plain_dof_indices.data() +
229  row_starts_plain_indices[boundary_cells[i]];
230  const unsigned int fe_index =
231  (cell_active_fe_index.size() == 0 ||
232  dofs_per_cell.size() == 1) ?
233  0 :
234  cell_active_fe_index[boundary_cells[i]];
235  AssertIndexRange(fe_index, dofs_per_cell.size());
236  const unsigned int *row_end =
237  data_ptr + dofs_per_cell[fe_index];
238  for (; data_ptr != row_end; ++data_ptr)
239  *data_ptr =
240  ((*data_ptr < n_owned) ?
241  *data_ptr :
242  n_owned + ghost_numbering[*data_ptr - n_owned]);
243  }
244  }
245  }
246  }
247 
248  std::vector<types::global_dof_index> empty;
249  ghost_dofs.swap(empty);
250 
251  // set the ghost indices now. need to cast away constness here, but that
252  // is uncritical since we reset the Partitioner in the same initialize
253  // call as this call here.
254  Utilities::MPI::Partitioner *vec_part =
255  const_cast<Utilities::MPI::Partitioner *>(vector_partitioner.get());
256  vec_part->set_ghost_indices(ghost_indices);
257 
258  if (use_vector_data_exchanger_full == false)
259  vector_exchanger = std::make_shared<
262  else
264  std::make_shared<MatrixFreeFunctions::VectorDataExchange::Full>(
265  vector_partitioner, communicator_sm);
266  }
267 
268 
269 
270  void
272  const TaskInfo & task_info,
273  const std::vector<unsigned int> & renumbering,
274  const std::vector<unsigned int> & constraint_pool_row_index,
275  const std::vector<unsigned char> &irregular_cells)
276  {
277  (void)constraint_pool_row_index;
278 
279  // first reorder the active FE index.
280  const bool have_hp = dofs_per_cell.size() > 1;
281  if (cell_active_fe_index.size() > 0)
282  {
283  std::vector<unsigned int> new_active_fe_index;
284  new_active_fe_index.reserve(task_info.cell_partition_data.back());
285  unsigned int position_cell = 0;
286  for (unsigned int cell = 0;
287  cell < task_info.cell_partition_data.back();
288  ++cell)
289  {
290  const unsigned int n_comp =
291  (irregular_cells[cell] > 0 ? irregular_cells[cell] :
293 
294  // take maximum FE index among the ones present (we might have
295  // lumped some lower indices into higher ones)
296  unsigned int fe_index =
297  cell_active_fe_index[renumbering[position_cell]];
298  for (unsigned int j = 1; j < n_comp; ++j)
299  fe_index = std::max(
300  fe_index,
301  cell_active_fe_index[renumbering[position_cell + j]]);
302 
303  new_active_fe_index.push_back(fe_index);
304  position_cell += n_comp;
305  }
306  std::swap(new_active_fe_index, cell_active_fe_index);
307  }
308  if (have_hp)
310  task_info.cell_partition_data.back());
311 
312  const unsigned int n_components = start_components.back();
313 
314  std::vector<std::pair<unsigned int, unsigned int>> new_row_starts(
316  task_info.cell_partition_data.back() +
317  1);
318  std::vector<unsigned int> new_dof_indices;
319  std::vector<std::pair<unsigned short, unsigned short>>
320  new_constraint_indicator;
321  std::vector<unsigned int> new_plain_indices, new_rowstart_plain;
322  unsigned int position_cell = 0;
323  new_dof_indices.reserve(dof_indices.size());
324  new_constraint_indicator.reserve(constraint_indicator.size());
325  if (store_plain_indices == true)
326  {
327  new_rowstart_plain.resize(vectorization_length *
328  task_info.cell_partition_data.back() +
329  1,
331  new_plain_indices.reserve(plain_dof_indices.size());
332  }
333 
334  // copy the indices and the constraint indicators to the new data field,
335  // where we will go through the cells in the renumbered way. in case the
336  // vectorization length does not exactly match up, we fill invalid
337  // numbers to the rowstart data. for contiguous cell indices, we skip
338  // the rowstarts field completely and directly go into the
339  // new_dof_indices field (this layout is used in FEEvaluation).
340  for (unsigned int i = 0; i < task_info.cell_partition_data.back(); ++i)
341  {
342  const unsigned int n_vect =
343  (irregular_cells[i] > 0 ? irregular_cells[i] :
345  const unsigned int dofs_per_cell =
346  have_hp ? this->dofs_per_cell[cell_active_fe_index[i]] :
347  this->dofs_per_cell[0];
348 
349  for (unsigned int j = 0; j < n_vect; ++j)
350  {
351  const unsigned int cell_no =
352  renumbering[position_cell + j] * n_components;
353  for (unsigned int comp = 0; comp < n_components; ++comp)
354  {
355  new_row_starts[(i * vectorization_length + j) * n_components +
356  comp]
357  .first = new_dof_indices.size();
358  new_row_starts[(i * vectorization_length + j) * n_components +
359  comp]
360  .second = new_constraint_indicator.size();
361 
362  new_dof_indices.insert(
363  new_dof_indices.end(),
364  dof_indices.data() + row_starts[cell_no + comp].first,
365  dof_indices.data() + row_starts[cell_no + comp + 1].first);
366  for (unsigned int index = row_starts[cell_no + comp].second;
367  index != row_starts[cell_no + comp + 1].second;
368  ++index)
369  new_constraint_indicator.push_back(
370  constraint_indicator[index]);
371  }
372  if (store_plain_indices &&
373  row_starts[cell_no].second !=
374  row_starts[cell_no + n_components].second)
375  {
376  new_rowstart_plain[i * vectorization_length + j] =
377  new_plain_indices.size();
378  new_plain_indices.insert(
379  new_plain_indices.end(),
380  plain_dof_indices.data() +
382  plain_dof_indices.data() +
384  dofs_per_cell);
385  }
386  }
387  for (unsigned int j = n_vect; j < vectorization_length; ++j)
388  for (unsigned int comp = 0; comp < n_components; ++comp)
389  {
390  new_row_starts[(i * vectorization_length + j) * n_components +
391  comp]
392  .first = new_dof_indices.size();
393  new_row_starts[(i * vectorization_length + j) * n_components +
394  comp]
395  .second = new_constraint_indicator.size();
396  }
397  position_cell += n_vect;
398  }
399  AssertDimension(position_cell * n_components + 1, row_starts.size());
400 
401  AssertDimension(dof_indices.size(), new_dof_indices.size());
402  new_row_starts[task_info.cell_partition_data.back() *
404  .first = new_dof_indices.size();
405  new_row_starts[task_info.cell_partition_data.back() *
407  .second = new_constraint_indicator.size();
408 
410  new_constraint_indicator.size());
411 
412  new_row_starts.swap(row_starts);
413  new_dof_indices.swap(dof_indices);
414  new_constraint_indicator.swap(constraint_indicator);
415  new_plain_indices.swap(plain_dof_indices);
416  new_rowstart_plain.swap(row_starts_plain_indices);
417 
418 #ifdef DEBUG
419  // sanity check 1: all indices should be smaller than the number of dofs
420  // locally owned plus the number of ghosts
421  const unsigned int index_range =
422  (vector_partitioner->local_range().second -
423  vector_partitioner->local_range().first) +
424  vector_partitioner->ghost_indices().n_elements();
425  for (const auto dof_index : dof_indices)
426  AssertIndexRange(dof_index, index_range);
427 
428  // sanity check 2: for the constraint indicators, the first index should
429  // be smaller than the number of indices in the row, and the second
430  // index should be smaller than the number of constraints in the
431  // constraint pool.
432  for (unsigned int row = 0; row < task_info.cell_partition_data.back();
433  ++row)
434  {
435  const unsigned int row_length_ind =
440  constraint_indicator.size() + 1);
441  const std::pair<unsigned short, unsigned short> *
442  con_it =
443  constraint_indicator.data() +
445  *end_con =
446  constraint_indicator.data() +
448  for (; con_it != end_con; ++con_it)
449  {
450  AssertIndexRange(con_it->first, row_length_ind + 1);
451  AssertIndexRange(con_it->second,
452  constraint_pool_row_index.size() - 1);
453  }
454  }
455 
456  // sanity check 3: check the number of cells once again
457  unsigned int n_active_cells = 0;
458  for (unsigned int c = 0; c < *(task_info.cell_partition_data.end() - 2);
459  ++c)
460  if (irregular_cells[c] > 0)
461  n_active_cells += irregular_cells[c];
462  else
465 #endif
466 
467  compute_cell_index_compression(irregular_cells);
468  }
469 
470 
471 
472  void
474  const std::vector<unsigned char> &irregular_cells)
475  {
476  const bool have_hp = dofs_per_cell.size() > 1;
477  const unsigned int n_components = start_components.back();
478 
480  row_starts.size() % vectorization_length == 1,
481  ExcInternalError());
482  if (vectorization_length > 1)
484  irregular_cells.size());
486  irregular_cells.size(), IndexStorageVariants::full);
488  irregular_cells.size());
489  for (unsigned int i = 0; i < irregular_cells.size(); ++i)
490  if (irregular_cells[i] > 0)
491  n_vectorization_lanes_filled[dof_access_cell][i] = irregular_cells[i];
492  else
495 
497  irregular_cells.size() * vectorization_length,
499  dof_indices_interleaved.resize(dof_indices.size(),
502  irregular_cells.size() * vectorization_length,
504 
505  std::vector<unsigned int> index_kinds(
506  static_cast<unsigned int>(
508  1);
509  std::vector<unsigned int> offsets(vectorization_length);
510  for (unsigned int i = 0; i < irregular_cells.size(); ++i)
511  {
512  const unsigned int ndofs =
513  dofs_per_cell[have_hp ? cell_active_fe_index[i] : 0];
514  const unsigned int n_comp =
516 
517  // check 1: Check if there are constraints -> no compression possible
518  bool has_constraints = false;
519  for (unsigned int j = 0; j < n_comp; ++j)
520  {
521  const unsigned int cell_no = i * vectorization_length + j;
522  if (row_starts[cell_no * n_components].second !=
523  row_starts[(cell_no + 1) * n_components].second)
524  {
525  has_constraints = true;
526  break;
527  }
528  }
529  if (has_constraints)
532  else
533  {
534  bool indices_are_contiguous = true;
535  for (unsigned int j = 0; j < n_comp; ++j)
536  {
537  const unsigned int cell_no = i * vectorization_length + j;
538  const unsigned int *dof_indices =
539  this->dof_indices.data() +
540  row_starts[cell_no * n_components].first;
542  ndofs,
543  row_starts[(cell_no + 1) * n_components].first -
544  row_starts[cell_no * n_components].first);
545  for (unsigned int i = 1; i < ndofs; ++i)
546  if (dof_indices[i] != dof_indices[0] + i)
547  {
548  indices_are_contiguous = false;
549  break;
550  }
551  }
552 
553  bool indices_are_interleaved_and_contiguous =
554  (ndofs > 1 && n_comp == vectorization_length);
555 
556  {
557  const unsigned int *dof_indices =
558  this->dof_indices.data() +
560  for (unsigned int k = 0; k < ndofs; ++k)
561  for (unsigned int j = 0; j < n_comp; ++j)
562  if (dof_indices[j * ndofs + k] !=
563  dof_indices[0] + k * n_comp + j)
564  {
565  indices_are_interleaved_and_contiguous = false;
566  break;
567  }
568  }
569 
570  if (indices_are_contiguous ||
571  indices_are_interleaved_and_contiguous)
572  {
573  for (unsigned int j = 0; j < n_comp; ++j)
577  j) *
578  n_components]
579  .first];
580  }
581 
582  if (indices_are_interleaved_and_contiguous)
583  {
587  for (unsigned int j = 0; j < n_comp; ++j)
589  j] = n_comp;
590  }
591  else if (indices_are_contiguous)
592  {
595  for (unsigned int j = 0; j < n_comp; ++j)
597  j] = 1;
598  }
599  else
600  {
601  int indices_are_interleaved_and_mixed = 2;
602  const unsigned int *dof_indices =
603  &this->dof_indices[row_starts[i * vectorization_length *
604  n_components]
605  .first];
606  for (unsigned int j = 0; j < n_comp; ++j)
607  offsets[j] =
608  dof_indices[j * ndofs + 1] - dof_indices[j * ndofs];
609  for (unsigned int k = 0; k < ndofs; ++k)
610  for (unsigned int j = 0; j < n_comp; ++j)
611  // the first if case is to avoid negative offsets
612  // (invalid)
613  if (dof_indices[j * ndofs + 1] < dof_indices[j * ndofs] ||
614  dof_indices[j * ndofs + k] !=
615  dof_indices[j * ndofs] + k * offsets[j])
616  {
617  indices_are_interleaved_and_mixed = 0;
618  break;
619  }
620  if (indices_are_interleaved_and_mixed == 2)
621  {
622  for (unsigned int j = 0; j < n_comp; ++j)
625  offsets[j];
626  for (unsigned int j = 0; j < n_comp; ++j)
628  [i * vectorization_length + j] =
629  dof_indices[j * ndofs];
630  for (unsigned int j = 0; j < n_comp; ++j)
631  if (offsets[j] != vectorization_length)
632  {
633  indices_are_interleaved_and_mixed = 1;
634  break;
635  }
636  if (indices_are_interleaved_and_mixed == 1 ||
637  n_comp != vectorization_length)
641  else
644  }
645  else
646  {
647  const unsigned int *dof_indices =
648  this->dof_indices.data() +
650  .first;
651  if (n_comp == vectorization_length)
654  else
657 
658  // do not use interleaved storage if two vectorized
659  // components point to the same field (scatter not
660  // possible)
661  for (unsigned int k = 0; k < ndofs; ++k)
662  for (unsigned int l = 0; l < n_comp; ++l)
663  for (unsigned int j = l + 1; j < n_comp; ++j)
664  if (dof_indices[j * ndofs + k] ==
665  dof_indices[l * ndofs + k])
666  {
669  break;
670  }
671  }
672  }
673  }
674  index_kinds[static_cast<unsigned int>(
676  }
677 
678  // Cleanup phase: we want to avoid single cells with different properties
679  // than the bulk of the domain in order to avoid extra checks in the face
680  // identification.
681 
682  // Step 1: check whether the interleaved indices were only assigned to
683  // the single cell within a vectorized array.
684  auto fix_single_interleaved_indices =
685  [&](const IndexStorageVariants variant) {
686  if (index_kinds[static_cast<unsigned int>(
688  0 &&
689  index_kinds[static_cast<unsigned int>(variant)] > 0)
690  for (unsigned int i = 0; i < irregular_cells.size(); ++i)
691  {
694  interleaved_contiguous_mixed_strides &&
696  (variant != IndexStorageVariants::contiguous ||
698  [i * vectorization_length] ==
699  1))
700  {
702  index_kinds[static_cast<unsigned int>(
705  index_kinds[static_cast<unsigned int>(variant)]++;
706  }
707  }
708  };
709 
710  fix_single_interleaved_indices(IndexStorageVariants::full);
711  fix_single_interleaved_indices(IndexStorageVariants::contiguous);
712  fix_single_interleaved_indices(IndexStorageVariants::interleaved);
713 
714  unsigned int n_interleaved =
715  index_kinds[static_cast<unsigned int>(
717  index_kinds[static_cast<unsigned int>(
719  index_kinds[static_cast<unsigned int>(
721 
722  // Step 2: fix single contiguous cell among others with interleaved
723  // storage
724  if (n_interleaved > 0 && index_kinds[static_cast<unsigned int>(
726  for (unsigned int i = 0; i < irregular_cells.size(); ++i)
729  {
732  index_kinds[static_cast<unsigned int>(
734  index_kinds[static_cast<unsigned int>(
736  }
737 
738  // Step 3: Interleaved cells are left but also some non-contiguous ones
739  // -> revert all to full storage
740  if (n_interleaved > 0 &&
741  index_kinds[static_cast<unsigned int>(IndexStorageVariants::full)] +
742  index_kinds[static_cast<unsigned int>(
744  0)
745  for (unsigned int i = 0; i < irregular_cells.size(); ++i)
748  {
749  index_kinds[static_cast<unsigned int>(
750  index_storage_variants[2][i])]--;
755  else
758  index_kinds[static_cast<unsigned int>(
760  }
761 
762  // Step 4: Copy the interleaved indices into their own data structure
763  for (unsigned int i = 0; i < irregular_cells.size(); ++i)
766  {
769  {
772  continue;
773  }
774  const unsigned int ndofs =
775  dofs_per_cell[have_hp ? cell_active_fe_index[i] : 0];
776  const unsigned int *dof_indices =
777  &this->dof_indices
779  unsigned int *interleaved_dof_indices =
781  [row_starts[i * vectorization_length * n_components].first];
782  AssertDimension(this->dof_indices.size(),
783  this->dof_indices_interleaved.size());
788  this->dof_indices_interleaved.size());
790  row_starts[i * vectorization_length * n_components].first +
791  ndofs * vectorization_length,
792  this->dof_indices_interleaved.size() + 1);
793  for (unsigned int k = 0; k < ndofs; ++k)
794  for (unsigned int j = 0; j < vectorization_length; ++j)
795  interleaved_dof_indices[k * vectorization_length + j] =
796  dof_indices[j * ndofs + k];
797  }
798  }
799 
800 
801 
802  void
804  const Table<2, ShapeInfo<double>> & shape_info,
805  const unsigned int n_owned_cells,
806  const unsigned int n_lanes,
807  const std::vector<FaceToCellTopology<1>> &inner_faces,
808  const std::vector<FaceToCellTopology<1>> &ghosted_faces,
809  const bool fill_cell_centric,
810  const MPI_Comm & communicator_sm,
811  const bool use_vector_data_exchanger_full)
812  {
814 
815  // partitioner 0: no face integrals, simply use the indices present
816  // on the cells
817  std::vector<types::global_dof_index> ghost_indices;
818  {
819  const unsigned int n_components = start_components.back();
820  for (unsigned int cell = 0; cell < n_owned_cells; ++cell)
821  {
822  for (unsigned int i = row_starts[cell * n_components].first;
823  i < row_starts[(cell + 1) * n_components].first;
824  ++i)
825  if (dof_indices[i] >= part.local_size())
826  ghost_indices.push_back(part.local_to_global(dof_indices[i]));
827 
828  const unsigned int fe_index =
829  dofs_per_cell.size() == 1 ? 0 :
830  cell_active_fe_index[cell / n_lanes];
831  const unsigned int dofs_this_cell = dofs_per_cell[fe_index];
832 
833  for (unsigned int i = row_starts_plain_indices[cell];
834  i < row_starts_plain_indices[cell] + dofs_this_cell;
835  ++i)
836  if (plain_dof_indices[i] >= part.local_size())
837  ghost_indices.push_back(
839  }
840  std::sort(ghost_indices.begin(), ghost_indices.end());
841  ghost_indices.erase(std::unique(ghost_indices.begin(),
842  ghost_indices.end()),
843  ghost_indices.end());
844  IndexSet compressed_set(part.size());
845  compressed_set.add_indices(ghost_indices.begin(), ghost_indices.end());
846  compressed_set.subtract_set(part.locally_owned_range());
847  const bool all_ghosts_equal =
848  Utilities::MPI::min<int>(static_cast<int>(
849  compressed_set.n_elements() ==
850  part.ghost_indices().n_elements()),
851  part.get_mpi_communicator()) != 0;
852 
853  std::shared_ptr<const Utilities::MPI::Partitioner> temp_0;
854 
855  if (all_ghosts_equal)
856  temp_0 = vector_partitioner;
857  else
858  {
859  temp_0 = std::make_shared<Utilities::MPI::Partitioner>(
861  const_cast<Utilities::MPI::Partitioner *>(temp_0.get())
862  ->set_ghost_indices(compressed_set, part.ghost_indices());
863  }
864 
865  if (use_vector_data_exchanger_full == false)
866  vector_exchanger_face_variants[0] = std::make_shared<
868  temp_0);
869  else
871  std::make_shared<MatrixFreeFunctions::VectorDataExchange::Full>(
872  temp_0, communicator_sm);
873  }
874 
875  // construct a numbering of faces
876  std::vector<FaceToCellTopology<1>> all_faces(inner_faces);
877  all_faces.insert(all_faces.end(),
878  ghosted_faces.begin(),
879  ghosted_faces.end());
880  Table<2, unsigned int> cell_and_face_to_faces(
881  (row_starts.size() - 1) / start_components.back(),
882  2 * shape_info(0, 0).n_dimensions);
883  cell_and_face_to_faces.fill(numbers::invalid_unsigned_int);
884  for (unsigned int f = 0; f < all_faces.size(); ++f)
885  {
886  cell_and_face_to_faces(all_faces[f].cells_interior[0],
887  all_faces[f].interior_face_no) = f;
888  Assert(all_faces[f].cells_exterior[0] !=
890  ExcInternalError());
891  cell_and_face_to_faces(all_faces[f].cells_exterior[0],
892  all_faces[f].exterior_face_no) = f;
893  }
894 
895  // lambda function to detect objects on face pairs
896  const auto loop_over_faces =
897  [&](const std::function<
898  void(const unsigned int, const unsigned int, const bool)> &fu) {
899  for (const auto &face : inner_faces)
900  {
901  AssertIndexRange(face.cells_interior[0], n_owned_cells);
902  fu(face.cells_exterior[0], face.exterior_face_no, false /*flag*/);
903  }
904  };
905 
906  const auto loop_over_all_faces =
907  [&](const std::function<
908  void(const unsigned int, const unsigned int, const bool)> &fu) {
909  for (unsigned int c = 0; c < cell_and_face_to_faces.size(0); ++c)
910  for (unsigned int d = 0; d < cell_and_face_to_faces.size(1); ++d)
911  {
912  const unsigned int f = cell_and_face_to_faces(c, d);
914  continue;
915 
916  const unsigned int cell_m = all_faces[f].cells_interior[0];
917  const unsigned int cell_p = all_faces[f].cells_exterior[0];
918 
919  const bool ext = c == cell_m;
920 
921  if (ext && cell_p == numbers::invalid_unsigned_int)
922  continue;
923 
924  const unsigned int p = ext ? cell_p : cell_m;
925  const unsigned int face_no = ext ?
926  all_faces[f].exterior_face_no :
927  all_faces[f].interior_face_no;
928 
929  fu(p, face_no, true);
930  }
931  };
932 
933  const auto process_values =
934  [&](
935  std::shared_ptr<const Utilities::MPI::Partitioner>
936  &vector_partitioner_values,
937  const std::function<void(
938  const std::function<void(
939  const unsigned int, const unsigned int, const bool)> &)> &loop) {
940  bool all_nodal_and_tensorial = true;
941  for (unsigned int c = 0; c < n_base_elements; ++c)
942  {
943  const auto &si =
944  shape_info(global_base_element_offset + c, 0).data.front();
945  if (!si.nodal_at_cell_boundaries ||
946  (si.element_type ==
948  all_nodal_and_tensorial = false;
949  }
950  if (all_nodal_and_tensorial == false)
951  vector_partitioner_values = vector_partitioner;
952  else
953  {
954  bool has_noncontiguous_cell = false;
955 
956  loop([&](const unsigned int cell_no,
957  const unsigned int face_no,
958  const bool flag) {
959  const unsigned int index =
961  if (flag || (index != numbers::invalid_unsigned_int &&
962  index >= part.local_size()))
963  {
964  const unsigned int stride =
965  dof_indices_interleave_strides[dof_access_cell][cell_no];
966  unsigned int i = 0;
967  for (unsigned int e = 0; e < n_base_elements; ++e)
968  for (unsigned int c = 0; c < n_components[e]; ++c)
969  {
970  const ShapeInfo<double> &shape =
971  shape_info(global_base_element_offset + e, 0);
972  for (unsigned int j = 0;
973  j < shape.dofs_per_component_on_face;
974  ++j)
975  ghost_indices.push_back(part.local_to_global(
976  index + i +
977  shape.face_to_cell_index_nodal(face_no, j) *
978  stride));
979  i += shape.dofs_per_component_on_cell * stride;
980  }
981  AssertDimension(i, dofs_per_cell[0] * stride);
982  }
983  else if (index == numbers::invalid_unsigned_int)
984  has_noncontiguous_cell = true;
985  });
986  has_noncontiguous_cell =
987  Utilities::MPI::min<int>(static_cast<int>(
988  has_noncontiguous_cell),
989  part.get_mpi_communicator()) != 0;
990 
991  std::sort(ghost_indices.begin(), ghost_indices.end());
992  ghost_indices.erase(std::unique(ghost_indices.begin(),
993  ghost_indices.end()),
994  ghost_indices.end());
995  IndexSet compressed_set(part.size());
996  compressed_set.add_indices(ghost_indices.begin(),
997  ghost_indices.end());
998  compressed_set.subtract_set(part.locally_owned_range());
999  const bool all_ghosts_equal =
1000  Utilities::MPI::min<int>(static_cast<int>(
1001  compressed_set.n_elements() ==
1002  part.ghost_indices().n_elements()),
1003  part.get_mpi_communicator()) != 0;
1004  if (all_ghosts_equal || has_noncontiguous_cell)
1005  vector_partitioner_values = vector_partitioner;
1006  else
1007  {
1008  vector_partitioner_values =
1009  std::make_shared<Utilities::MPI::Partitioner>(
1010  part.locally_owned_range(), part.get_mpi_communicator());
1011  const_cast<Utilities::MPI::Partitioner *>(
1012  vector_partitioner_values.get())
1013  ->set_ghost_indices(compressed_set, part.ghost_indices());
1014  }
1015  }
1016  };
1017 
1018 
1019  const auto process_gradients =
1020  [&](
1021  const std::shared_ptr<const Utilities::MPI::Partitioner>
1022  &vector_partitoner_values,
1023  std::shared_ptr<const Utilities::MPI::Partitioner>
1024  &vector_partitioner_gradients,
1025  const std::function<void(
1026  const std::function<void(
1027  const unsigned int, const unsigned int, const bool)> &)> &loop) {
1028  bool all_hermite = true;
1029  for (unsigned int c = 0; c < n_base_elements; ++c)
1030  if (shape_info(global_base_element_offset + c, 0).element_type !=
1032  all_hermite = false;
1033  if (all_hermite == false ||
1034  vector_partitoner_values.get() == vector_partitioner.get())
1035  vector_partitioner_gradients = vector_partitioner;
1036  else
1037  {
1038  loop([&](const unsigned int cell_no,
1039  const unsigned int face_no,
1040  const bool flag) {
1041  const unsigned int index =
1042  dof_indices_contiguous[dof_access_cell][cell_no];
1043  if (flag || (index != numbers::invalid_unsigned_int &&
1044  index >= part.local_size()))
1045  {
1046  const unsigned int stride =
1047  dof_indices_interleave_strides[dof_access_cell][cell_no];
1048  unsigned int i = 0;
1049  for (unsigned int e = 0; e < n_base_elements; ++e)
1050  for (unsigned int c = 0; c < n_components[e]; ++c)
1051  {
1052  const ShapeInfo<double> &shape =
1053  shape_info(global_base_element_offset + e, 0);
1054  for (unsigned int j = 0;
1055  j < 2 * shape.dofs_per_component_on_face;
1056  ++j)
1057  ghost_indices.push_back(part.local_to_global(
1058  index + i +
1059  shape.face_to_cell_index_hermite(face_no, j) *
1060  stride));
1061  i += shape.dofs_per_component_on_cell * stride;
1062  }
1063  AssertDimension(i, dofs_per_cell[0] * stride);
1064  }
1065  });
1066  std::sort(ghost_indices.begin(), ghost_indices.end());
1067  ghost_indices.erase(std::unique(ghost_indices.begin(),
1068  ghost_indices.end()),
1069  ghost_indices.end());
1070  IndexSet compressed_set(part.size());
1071  compressed_set.add_indices(ghost_indices.begin(),
1072  ghost_indices.end());
1073  compressed_set.subtract_set(part.locally_owned_range());
1074  const bool all_ghosts_equal =
1075  Utilities::MPI::min<int>(static_cast<int>(
1076  compressed_set.n_elements() ==
1077  part.ghost_indices().n_elements()),
1078  part.get_mpi_communicator()) != 0;
1079  if (all_ghosts_equal)
1080  vector_partitioner_gradients = vector_partitioner;
1081  else
1082  {
1083  vector_partitioner_gradients =
1084  std::make_shared<Utilities::MPI::Partitioner>(
1085  part.locally_owned_range(), part.get_mpi_communicator());
1086  const_cast<Utilities::MPI::Partitioner *>(
1087  vector_partitioner_gradients.get())
1088  ->set_ghost_indices(compressed_set, part.ghost_indices());
1089  }
1090  }
1091  };
1092 
1093  std::shared_ptr<const Utilities::MPI::Partitioner> temp_1, temp_2, temp_3,
1094  temp_4;
1095 
1096  // partitioner 1: values on faces
1097  process_values(temp_1, loop_over_faces);
1098 
1099  // partitioner 2: values and gradients on faces
1100  process_gradients(temp_1, temp_2, loop_over_faces);
1101 
1102  if (fill_cell_centric)
1103  {
1104  ghost_indices.clear();
1105  // partitioner 3: values on all faces
1106  process_values(temp_3, loop_over_all_faces);
1107  // partitioner 4: values and gradients on faces
1108  process_gradients(temp_3, temp_4, loop_over_all_faces);
1109  }
1110  else
1111  {
1112  temp_3 = std::make_shared<Utilities::MPI::Partitioner>(
1113  part.locally_owned_range(), part.get_mpi_communicator());
1114  temp_4 = std::make_shared<Utilities::MPI::Partitioner>(
1115  part.locally_owned_range(), part.get_mpi_communicator());
1116  }
1117 
1118  if (use_vector_data_exchanger_full == false)
1119  {
1120  vector_exchanger_face_variants[1] = std::make_shared<
1121  MatrixFreeFunctions::VectorDataExchange::PartitionerWrapper>(
1122  temp_1);
1123  vector_exchanger_face_variants[2] = std::make_shared<
1124  MatrixFreeFunctions::VectorDataExchange::PartitionerWrapper>(
1125  temp_2);
1126  vector_exchanger_face_variants[3] = std::make_shared<
1127  MatrixFreeFunctions::VectorDataExchange::PartitionerWrapper>(
1128  temp_3);
1129  vector_exchanger_face_variants[4] = std::make_shared<
1130  MatrixFreeFunctions::VectorDataExchange::PartitionerWrapper>(
1131  temp_4);
1132  }
1133  else
1134  {
1135  vector_exchanger_face_variants[1] =
1136  std::make_shared<MatrixFreeFunctions::VectorDataExchange::Full>(
1137  temp_1, communicator_sm);
1138  vector_exchanger_face_variants[2] =
1139  std::make_shared<MatrixFreeFunctions::VectorDataExchange::Full>(
1140  temp_2, communicator_sm);
1141  vector_exchanger_face_variants[3] =
1142  std::make_shared<MatrixFreeFunctions::VectorDataExchange::Full>(
1143  temp_3, communicator_sm);
1144  vector_exchanger_face_variants[4] =
1145  std::make_shared<MatrixFreeFunctions::VectorDataExchange::Full>(
1146  temp_4, communicator_sm);
1147  }
1148  }
1149 
1150 
1151 
1152  void
1153  DoFInfo::compute_shared_memory_contiguous_indices(
1154  std::array<std::vector<std::pair<unsigned int, unsigned int>>, 3>
1155  &cell_indices_contiguous_sm)
1156  {
1157  AssertDimension(dofs_per_cell.size(), 1);
1158 
1159  for (unsigned int i = 0; i < 3; ++i)
1160  {
1161  dof_indices_contiguous_sm[i].resize(
1162  cell_indices_contiguous_sm[i].size());
1163 
1164  for (unsigned int j = 0; j < cell_indices_contiguous_sm[i].size();
1165  ++j)
1166  if (cell_indices_contiguous_sm[i][j].first !=
1168  dof_indices_contiguous_sm[i][j] = {
1169  cell_indices_contiguous_sm[i][j].first,
1170  cell_indices_contiguous_sm[i][j].second * dofs_per_cell[0]};
1171  else
1172  dof_indices_contiguous_sm[i][j] = {numbers::invalid_unsigned_int,
1174  }
1175  }
1176 
1177 
1178 
1179  namespace internal
1180  {
1181  // We construct the connectivity graph in parallel. we use one lock for
1182  // 256 degrees of freedom to keep the number of locks down to a
1183  // reasonable level and reduce the cost of locking to some extent.
1184  static constexpr unsigned int bucket_size_threading = 256;
1185 
1186 
1187 
1188  void
1189  compute_row_lengths(const unsigned int begin,
1190  const unsigned int end,
1191  const DoFInfo & dof_info,
1192  std::vector<std::mutex> & mutexes,
1193  std::vector<unsigned int> &row_lengths)
1194  {
1195  std::vector<unsigned int> scratch;
1196  const unsigned int n_components = dof_info.start_components.back();
1197  for (unsigned int block = begin; block < end; ++block)
1198  {
1199  scratch.clear();
1200  scratch.insert(
1201  scratch.end(),
1202  dof_info.dof_indices.data() +
1203  dof_info.row_starts[block * n_components].first,
1204  dof_info.dof_indices.data() +
1205  dof_info.row_starts[(block + 1) * n_components].first);
1206  std::sort(scratch.begin(), scratch.end());
1207  std::vector<unsigned int>::const_iterator end_unique =
1208  std::unique(scratch.begin(), scratch.end());
1209  std::vector<unsigned int>::const_iterator it = scratch.begin();
1210  while (it != end_unique)
1211  {
1212  // In this code, the procedure is that we insert all elements
1213  // that are within the range of one lock at once
1214  const unsigned int next_bucket =
1216  std::lock_guard<std::mutex> lock(
1217  mutexes[*it / bucket_size_threading]);
1218  for (; it != end_unique && *it < next_bucket; ++it)
1219  {
1220  AssertIndexRange(*it, row_lengths.size());
1221  row_lengths[*it]++;
1222  }
1223  }
1224  }
1225  }
1226 
1227  void
1228  fill_connectivity_dofs(const unsigned int begin,
1229  const unsigned int end,
1230  const DoFInfo & dof_info,
1231  const std::vector<unsigned int> &row_lengths,
1232  std::vector<std::mutex> & mutexes,
1233  ::SparsityPattern & connectivity_dof)
1234  {
1235  std::vector<unsigned int> scratch;
1236  const unsigned int n_components = dof_info.start_components.back();
1237  for (unsigned int block = begin; block < end; ++block)
1238  {
1239  scratch.clear();
1240  scratch.insert(
1241  scratch.end(),
1242  dof_info.dof_indices.data() +
1243  dof_info.row_starts[block * n_components].first,
1244  dof_info.dof_indices.data() +
1245  dof_info.row_starts[(block + 1) * n_components].first);
1246  std::sort(scratch.begin(), scratch.end());
1247  std::vector<unsigned int>::const_iterator end_unique =
1248  std::unique(scratch.begin(), scratch.end());
1249  std::vector<unsigned int>::const_iterator it = scratch.begin();
1250  while (it != end_unique)
1251  {
1252  const unsigned int next_bucket =
1254  std::lock_guard<std::mutex> lock(
1255  mutexes[*it / bucket_size_threading]);
1256  for (; it != end_unique && *it < next_bucket; ++it)
1257  if (row_lengths[*it] > 0)
1258  connectivity_dof.add(*it, block);
1259  }
1260  }
1261  }
1262 
1263 
1264 
1265  void
1266  fill_connectivity(const unsigned int begin,
1267  const unsigned int end,
1268  const DoFInfo & dof_info,
1269  const std::vector<unsigned int> &renumbering,
1270  const ::SparsityPattern & connectivity_dof,
1271  DynamicSparsityPattern & connectivity)
1272  {
1273  ordered_vector row_entries;
1274  const unsigned int n_components = dof_info.start_components.back();
1275  for (unsigned int block = begin; block < end; ++block)
1276  {
1277  row_entries.clear();
1278 
1279  const unsigned int
1280  *it = dof_info.dof_indices.data() +
1281  dof_info.row_starts[block * n_components].first,
1282  *end_cell = dof_info.dof_indices.data() +
1283  dof_info.row_starts[(block + 1) * n_components].first;
1284  for (; it != end_cell; ++it)
1285  {
1286  SparsityPattern::iterator sp = connectivity_dof.begin(*it);
1287  std::vector<types::global_dof_index>::iterator insert_pos =
1288  row_entries.begin();
1289  for (; sp != connectivity_dof.end(*it); ++sp)
1290  if (sp->column() != block)
1291  row_entries.insert(renumbering[sp->column()], insert_pos);
1292  }
1293  connectivity.add_entries(renumbering[block],
1294  row_entries.begin(),
1295  row_entries.end());
1296  }
1297  }
1298 
1299  } // namespace internal
1300 
1301  void
1302  DoFInfo::make_connectivity_graph(
1303  const TaskInfo & task_info,
1304  const std::vector<unsigned int> &renumbering,
1305  DynamicSparsityPattern & connectivity) const
1306  {
1307  unsigned int n_rows = (vector_partitioner->local_range().second -
1308  vector_partitioner->local_range().first) +
1309  vector_partitioner->ghost_indices().n_elements();
1310 
1311  // Avoid square sparsity patterns that allocate the diagonal entry
1312  if (n_rows == task_info.n_active_cells)
1313  ++n_rows;
1314 
1315  // first determine row lengths
1316  std::vector<unsigned int> row_lengths(n_rows);
1317  std::vector<std::mutex> mutexes(n_rows / internal::bucket_size_threading +
1318  1);
1320  0,
1321  task_info.n_active_cells,
1322  [this, &mutexes, &row_lengths](const unsigned int begin,
1323  const unsigned int end) {
1324  internal::compute_row_lengths(
1325  begin, end, *this, mutexes, row_lengths);
1326  },
1327  20);
1328 
1329  // disregard dofs that only sit on a single cell because they cannot
1330  // couple
1331  for (unsigned int row = 0; row < n_rows; ++row)
1332  if (row_lengths[row] <= 1)
1333  row_lengths[row] = 0;
1334 
1335  // Create a temporary sparsity pattern that holds to each degree of
1336  // freedom on which cells it appears, i.e., store the connectivity
1337  // between cells and dofs
1338  SparsityPattern connectivity_dof(n_rows,
1339  task_info.n_active_cells,
1340  row_lengths);
1342  0,
1343  task_info.n_active_cells,
1344  [this, &row_lengths, &mutexes, &connectivity_dof](
1345  const unsigned int begin, const unsigned int end) {
1346  internal::fill_connectivity_dofs(
1347  begin, end, *this, row_lengths, mutexes, connectivity_dof);
1348  },
1349  20);
1350  connectivity_dof.compress();
1351 
1352 
1353  // Invert renumbering for use in fill_connectivity.
1354  std::vector<unsigned int> reverse_numbering(task_info.n_active_cells);
1355  reverse_numbering = Utilities::invert_permutation(renumbering);
1356 
1357  // From the above connectivity between dofs and cells, we can finally
1358  // create a connectivity list between cells. The connectivity graph
1359  // should apply the renumbering, i.e., the entry for cell j is the entry
1360  // for cell renumbering[j] in the original ordering.
1362  0,
1363  task_info.n_active_cells,
1364  [this, &reverse_numbering, &connectivity_dof, &connectivity](
1365  const unsigned int begin, const unsigned int end) {
1366  internal::fill_connectivity(begin,
1367  end,
1368  *this,
1369  reverse_numbering,
1370  connectivity_dof,
1371  connectivity);
1372  },
1373  20);
1374  }
1375 
1376 
1377 
1378  void
1379  DoFInfo::compute_dof_renumbering(
1380  std::vector<types::global_dof_index> &renumbering)
1381  {
1382  const unsigned int locally_owned_size =
1383  vector_partitioner->locally_owned_size();
1384  renumbering.resize(0);
1385  renumbering.resize(locally_owned_size, numbers::invalid_dof_index);
1386 
1387  types::global_dof_index counter = 0;
1388  const unsigned int n_components = start_components.back();
1389  const unsigned int n_cell_batches =
1390  n_vectorization_lanes_filled[dof_access_cell].size();
1391  Assert(n_cell_batches <=
1392  (row_starts.size() - 1) / vectorization_length / n_components,
1393  ExcInternalError());
1394  for (unsigned int cell_no = 0; cell_no < n_cell_batches; ++cell_no)
1395  {
1396  // do not renumber in case we have constraints
1397  if (row_starts[cell_no * n_components * vectorization_length]
1398  .second ==
1399  row_starts[(cell_no + 1) * n_components * vectorization_length]
1400  .second)
1401  {
1402  const unsigned int ndofs =
1403  dofs_per_cell.size() == 1 ?
1404  dofs_per_cell[0] :
1405  (dofs_per_cell[cell_active_fe_index.size() > 0 ?
1406  cell_active_fe_index[cell_no] :
1407  0]);
1408  const unsigned int *dof_ind =
1409  dof_indices.data() +
1410  row_starts[cell_no * n_components * vectorization_length].first;
1411  for (unsigned int i = 0; i < ndofs; ++i)
1412  for (unsigned int j = 0;
1413  j < n_vectorization_lanes_filled[dof_access_cell][cell_no];
1414  ++j)
1415  if (dof_ind[j * ndofs + i] < locally_owned_size)
1416  if (renumbering[dof_ind[j * ndofs + i]] ==
1418  renumbering[dof_ind[j * ndofs + i]] = counter++;
1419  }
1420  }
1421 
1422  AssertIndexRange(counter, locally_owned_size + 1);
1423  for (types::global_dof_index &dof_index : renumbering)
1424  if (dof_index == numbers::invalid_dof_index)
1425  dof_index = counter++;
1426 
1427  // transform indices to global index space
1428  for (types::global_dof_index &dof_index : renumbering)
1429  dof_index = vector_partitioner->local_to_global(dof_index);
1430 
1431  AssertDimension(counter, renumbering.size());
1432  }
1433 
1434 
1435 
1436  std::size_t
1438  {
1439  std::size_t memory = sizeof(*this);
1440  memory +=
1441  (row_starts.capacity() * sizeof(std::pair<unsigned int, unsigned int>));
1442  memory += MemoryConsumption::memory_consumption(dof_indices);
1443  memory += MemoryConsumption::memory_consumption(row_starts_plain_indices);
1444  memory += MemoryConsumption::memory_consumption(plain_dof_indices);
1445  memory += MemoryConsumption::memory_consumption(constraint_indicator);
1446  memory += MemoryConsumption::memory_consumption(*vector_partitioner);
1447  return memory;
1448  }
1449  } // namespace MatrixFreeFunctions
1450 } // namespace internal
1451 
1452 namespace internal
1453 {
1454  namespace MatrixFreeFunctions
1455  {
1456  template struct ConstraintValues<double>;
1457  template struct ConstraintValues<float>;
1458 
1459  template void
1460  DoFInfo::read_dof_indices<double>(
1461  const std::vector<types::global_dof_index> &,
1462  const std::vector<unsigned int> &,
1463  const ::AffineConstraints<double> &,
1464  const unsigned int,
1465  ConstraintValues<double> &,
1466  bool &);
1467 
1468  template void
1469  DoFInfo::read_dof_indices<float>(
1470  const std::vector<types::global_dof_index> &,
1471  const std::vector<unsigned int> &,
1472  const ::AffineConstraints<float> &,
1473  const unsigned int,
1474  ConstraintValues<double> &,
1475  bool &);
1476 
1477  template void
1478  DoFInfo::compute_face_index_compression<1>(
1479  const std::vector<FaceToCellTopology<1>> &);
1480  template void
1481  DoFInfo::compute_face_index_compression<2>(
1482  const std::vector<FaceToCellTopology<2>> &);
1483  template void
1484  DoFInfo::compute_face_index_compression<4>(
1485  const std::vector<FaceToCellTopology<4>> &);
1486  template void
1487  DoFInfo::compute_face_index_compression<8>(
1488  const std::vector<FaceToCellTopology<8>> &);
1489  template void
1490  DoFInfo::compute_face_index_compression<16>(
1491  const std::vector<FaceToCellTopology<16>> &);
1492 
1493  template void
1494  DoFInfo::compute_vector_zero_access_pattern<1>(
1495  const TaskInfo &,
1496  const std::vector<FaceToCellTopology<1>> &);
1497  template void
1498  DoFInfo::compute_vector_zero_access_pattern<2>(
1499  const TaskInfo &,
1500  const std::vector<FaceToCellTopology<2>> &);
1501  template void
1502  DoFInfo::compute_vector_zero_access_pattern<4>(
1503  const TaskInfo &,
1504  const std::vector<FaceToCellTopology<4>> &);
1505  template void
1506  DoFInfo::compute_vector_zero_access_pattern<8>(
1507  const TaskInfo &,
1508  const std::vector<FaceToCellTopology<8>> &);
1509  template void
1510  DoFInfo::compute_vector_zero_access_pattern<16>(
1511  const TaskInfo &,
1512  const std::vector<FaceToCellTopology<16>> &);
1513 
1514  template void
1515  DoFInfo::print_memory_consumption<std::ostream>(std::ostream &,
1516  const TaskInfo &) const;
1517  template void
1518  DoFInfo::print_memory_consumption<ConditionalOStream>(
1520  const TaskInfo &) const;
1521 
1522  template void
1523  DoFInfo::print<double>(const std::vector<double> &,
1524  const std::vector<unsigned int> &,
1525  std::ostream &) const;
1526 
1527  template void
1528  DoFInfo::print<float>(const std::vector<float> &,
1529  const std::vector<unsigned int> &,
1530  std::ostream &) const;
1531  } // namespace MatrixFreeFunctions
1532 } // namespace internal
1533 
size_type index_within_set(const size_type global_index) const
Definition: index_set.h:1921
size_type n_elements() const
Definition: index_set.h:1832
void subtract_set(const IndexSet &other)
Definition: index_set.cc:258
void add_range(const size_type begin, const size_type end)
Definition: index_set.h:1673
void compress() const
Definition: index_set.h:1642
void add_indices(const ForwardIterator &begin, const ForwardIterator &end)
Definition: index_set.h:1703
unsigned int local_size() const
types::global_dof_index local_to_global(const unsigned int local_index) const
void set_ghost_indices(const IndexSet &ghost_indices, const IndexSet &larger_ghost_index_set=IndexSet())
virtual const MPI_Comm & get_mpi_communicator() const override
types::global_dof_index size() const
const IndexSet & locally_owned_range() const
const IndexSet & ghost_indices() const
#define DEAL_II_NAMESPACE_OPEN
Definition: config.h:396
#define DEAL_II_NAMESPACE_CLOSE
Definition: config.h:397
Point< 2 > second
Definition: grid_out.cc:4588
Point< 2 > first
Definition: grid_out.cc:4587
static ::ExceptionBase & ExcInternalError()
static ::ExceptionBase & ExcNotInitialized()
#define Assert(cond, exc)
Definition: exceptions.h:1465
#define AssertDimension(dim1, dim2)
Definition: exceptions.h:1622
#define AssertIndexRange(index, range)
Definition: exceptions.h:1690
void loop(ITERATOR begin, typename identity< ITERATOR >::type end, DOFINFO &dinfo, INFOBOX &info, const std::function< void(DOFINFO &, typename INFOBOX::CellInfo &)> &cell_worker, const std::function< void(DOFINFO &, typename INFOBOX::CellInfo &)> &boundary_worker, const std::function< void(DOFINFO &, DOFINFO &, typename INFOBOX::CellInfo &, typename INFOBOX::CellInfo &)> &face_worker, ASSEMBLER &assembler, const LoopControl &lctrl=LoopControl())
Definition: loop.h:439
void add_entries(const size_type row, ForwardIterator begin, ForwardIterator end, const bool indices_are_unique_and_sorted=false)
void add(const size_type i, const size_type j)
void shift(const Tensor< 1, spacedim > &shift_vector, Triangulation< dim, spacedim > &triangulation)
Definition: grid_tools.cc:2022
std::enable_if< std::is_fundamental< T >::value, std::size_t >::type memory_consumption(const T &t)
void swap(MemorySpaceData< Number, MemorySpace > &, MemorySpaceData< Number, MemorySpace > &)
SymmetricTensor< 2, dim, Number > d(const Tensor< 2, dim, Number > &F, const Tensor< 2, dim, Number > &dF_dt)
Tensor< 2, dim, Number > l(const Tensor< 2, dim, Number > &F, const Tensor< 2, dim, Number > &dF_dt)
VectorType::value_type * begin(VectorType &V)
VectorType::value_type * end(VectorType &V)
std::vector< Integer > invert_permutation(const std::vector< Integer > &permutation)
Definition: utilities.h:1482
void fill_connectivity_dofs(const unsigned int begin, const unsigned int end, const DoFInfo &dof_info, const std::vector< unsigned int > &row_lengths, std::vector< std::mutex > &mutexes, ::SparsityPattern &connectivity_dof)
Definition: dof_info.cc:1228
void fill_connectivity(const unsigned int begin, const unsigned int end, const DoFInfo &dof_info, const std::vector< unsigned int > &renumbering, const ::SparsityPattern &connectivity_dof, DynamicSparsityPattern &connectivity)
Definition: dof_info.cc:1266
void compute_row_lengths(const unsigned int begin, const unsigned int end, const DoFInfo &dof_info, std::vector< std::mutex > &mutexes, std::vector< unsigned int > &row_lengths)
Definition: dof_info.cc:1189
static constexpr unsigned int bucket_size_threading
Definition: dof_info.cc:1184
unsigned int n_active_cells(const internal::TriangulationImplementation::NumberCache< 1 > &c)
Definition: tria.cc:12594
void copy(const T *begin, const T *end, U *dest)
static const unsigned int invalid_unsigned_int
Definition: types.h:196
const types::global_dof_index invalid_dof_index
Definition: types.h:211
void apply_to_subranges(const tbb::blocked_range< RangeType > &range, const Function &f)
Definition: parallel.h:367
unsigned int global_dof_index
Definition: types.h:76
void reorder_cells(const TaskInfo &task_info, const std::vector< unsigned int > &renumbering, const std::vector< unsigned int > &constraint_pool_row_index, const std::vector< unsigned char > &irregular_cells)
Definition: dof_info.cc:271
std::vector< std::pair< unsigned short, unsigned short > > constraint_indicator
Definition: dof_info.h:510
std::vector< std::pair< unsigned int, unsigned int > > row_starts
Definition: dof_info.h:481
void compute_cell_index_compression(const std::vector< unsigned char > &irregular_cells)
Definition: dof_info.cc:473
std::vector< unsigned int > dofs_per_cell
Definition: dof_info.h:667
std::shared_ptr< const internal::MatrixFreeFunctions::VectorDataExchange::Base > vector_exchanger
Definition: dof_info.h:572
std::vector< std::vector< unsigned int > > fe_index_conversion
Definition: dof_info.h:695
std::vector< unsigned int > dof_indices
Definition: dof_info.h:498
std::shared_ptr< const Utilities::MPI::Partitioner > vector_partitioner
Definition: dof_info.h:565
std::array< std::vector< unsigned int >, 3 > dof_indices_interleave_strides
Definition: dof_info.h:546
std::vector< unsigned int > row_starts_plain_indices
Definition: dof_info.h:609
std::vector< unsigned int > dofs_per_face
Definition: dof_info.h:672
std::vector< unsigned int > n_components
Definition: dof_info.h:637
void assign_ghosts(const std::vector< unsigned int > &boundary_cells, const MPI_Comm &communicator_sm, const bool use_vector_data_exchanger_full)
Definition: dof_info.cc:141
std::vector< types::global_dof_index > ghost_dofs
Definition: dof_info.h:702
std::array< std::vector< unsigned int >, 3 > dof_indices_contiguous
Definition: dof_info.h:525
void compute_tight_partitioners(const Table< 2, ShapeInfo< double >> &shape_info, const unsigned int n_owned_cells, const unsigned int n_lanes, const std::vector< FaceToCellTopology< 1 >> &inner_faces, const std::vector< FaceToCellTopology< 1 >> &ghosted_faces, const bool fill_cell_centric, const MPI_Comm &communicator_sm, const bool use_vector_data_exchanger_full)
Definition: dof_info.cc:803
void get_dof_indices_on_cell_batch(std::vector< unsigned int > &locall_indices, const unsigned int cell, const bool with_constraints=true) const
Definition: dof_info.cc:73
std::vector< unsigned int > cell_active_fe_index
Definition: dof_info.h:682
std::array< std::shared_ptr< const internal::MatrixFreeFunctions::VectorDataExchange::Base >, 5 > vector_exchanger_face_variants
Definition: dof_info.h:597
std::vector< unsigned int > plain_dof_indices
Definition: dof_info.h:619
std::array< std::vector< unsigned char >, 3 > n_vectorization_lanes_filled
Definition: dof_info.h:557
std::vector< unsigned int > start_components
Definition: dof_info.h:643
std::vector< unsigned int > dof_indices_interleaved
Definition: dof_info.h:515
std::array< std::vector< IndexStorageVariants >, 3 > index_storage_variants
Definition: dof_info.h:473
std::vector< unsigned int > cell_partition_data
Definition: task_info.h:474