Reference documentation for deal.II version 9.3.2
\(\newcommand{\dealvcentcolon}{\mathrel{\mathop{:}}}\) \(\newcommand{\dealcoloneq}{\dealvcentcolon\mathrel{\mkern-1.2mu}=}\) \(\newcommand{\jump}[1]{\left[\!\left[ #1 \right]\!\right]}\) \(\newcommand{\average}[1]{\left\{\!\left\{ #1 \right\}\!\right\}}\)
cuda_hanging_nodes_internal.h
Go to the documentation of this file.
1 // ---------------------------------------------------------------------
2 //
3 // Copyright (C) 2018 - 2020 by the deal.II authors
4 //
5 // This file is part of the deal.II library.
6 //
7 // The deal.II library is free software; you can use it, redistribute
8 // it, and/or modify it under the terms of the GNU Lesser General
9 // Public License as published by the Free Software Foundation; either
10 // version 2.1 of the License, or (at your option) any later version.
11 // The full text of the license can be found in the file LICENSE at
12 // the top level of the deal.II distribution.
13 //
14 // ---------------------------------------------------------------------
15 
16 #ifndef dealii_cuda_hanging_nodes_internal_h
17 #define dealii_cuda_hanging_nodes_internal_h
18 
19 #include <deal.II/base/config.h>
20 
21 #ifdef DEAL_II_COMPILER_CUDA_AWARE
22 
23 # include <deal.II/base/cuda_size.h>
24 # include <deal.II/base/utilities.h>
25 
28 
29 # include <deal.II/fe/fe_q.h>
30 # include <deal.II/fe/fe_tools.h>
31 
33 
34 namespace CUDAWrappers
35 {
36  namespace internal
37  {
46  template <int dim>
48  {
49  public:
53  HangingNodes(unsigned int fe_degree,
55  const std::vector<unsigned int> &lexicographic_mapping);
56 
60  template <typename CellIterator>
61  void
63  std::vector<types::global_dof_index> & dof_indices,
64  const CellIterator & cell,
65  const std::shared_ptr<const Utilities::MPI::Partitioner> &partitioner,
66  unsigned int & mask) const;
67 
68  private:
72  void
74 
75  void
76  rotate_subface_index(int times, unsigned int &subface_index) const;
77 
78  void
79  rotate_face(int times,
80  unsigned int n_dofs_1d,
81  std::vector<types::global_dof_index> &dofs) const;
82 
83  unsigned int
84  line_dof_idx(int local_line,
85  unsigned int dof,
86  unsigned int n_dofs_1d) const;
87 
88  void
89  transpose_face(std::vector<types::global_dof_index> &dofs) const;
90 
91  void
92  transpose_subface_index(unsigned int &subface) const;
93 
97  const unsigned int n_raw_lines;
98  std::vector<std::vector<std::pair<cell_iterator, unsigned int>>>
100  const std::vector<unsigned int> &lexicographic_mapping;
101  const unsigned int fe_degree;
103  };
104 
105  namespace internal
106  {
107  __constant__ double
109 
110  // Here is the system for how we store constraint types in a binary mask.
111  // This is not a complete contradiction-free system, i.e., there are
112  // invalid states that we just assume that we never get.
113 
114  // If the mask is zero, there are no constraints. Then, there are three
115  // different fields with one bit per dimension. The first field determines
116  // the type, or the position of an element along each direction. The
117  // second field determines if there is a constrained face with that
118  // direction as normal. The last field determines if there is a
119  // constrained edge of a given pair of coordinate planes, but where
120  // neither of the corresponding faces are constrained (only valid in 3D).
121 
122  // The element is placed in the 'first position' along *-axis. These also
123  // determine which face is constrained. For example, in 2D, if
124  // constr_face_x and constr_type are set, then x = 0 is constrained.
125  constexpr unsigned int constr_type_x = 1 << 0;
126  constexpr unsigned int constr_type_y = 1 << 1;
127  constexpr unsigned int constr_type_z = 1 << 2;
128 
129  // Element has as a constraint at * = 0 or * = fe_degree face
130  constexpr unsigned int constr_face_x = 1 << 3;
131  constexpr unsigned int constr_face_y = 1 << 4;
132  constexpr unsigned int constr_face_z = 1 << 5;
133 
134  // Element has as a constraint at * = 0 or * = fe_degree edge
135  constexpr unsigned int constr_edge_xy = 1 << 6;
136  constexpr unsigned int constr_edge_yz = 1 << 7;
137  constexpr unsigned int constr_edge_zx = 1 << 8;
138 
139  template <int dim>
140  void
141  setup_constraint_weigths(unsigned int fe_degree)
142  {
143  const unsigned int face_no =
144  0; // we assume that all faces have the same number of dofs
145 
146  FE_Q<2> fe_q(fe_degree);
147  FullMatrix<double> interpolation_matrix(fe_q.n_dofs_per_face(face_no),
148  fe_q.n_dofs_per_face(face_no));
150  0,
151  interpolation_matrix,
152  face_no);
153 
154  std::vector<unsigned int> mapping =
155  FETools::lexicographic_to_hierarchic_numbering<1>(fe_degree);
156 
157  FullMatrix<double> mapped_matrix(fe_q.n_dofs_per_face(face_no),
158  fe_q.n_dofs_per_face(face_no));
159  for (unsigned int i = 0; i < fe_q.n_dofs_per_face(face_no); ++i)
160  for (unsigned int j = 0; j < fe_q.n_dofs_per_face(face_no); ++j)
161  mapped_matrix(i, j) = interpolation_matrix(mapping[i], mapping[j]);
162 
163  cudaError_t error_code =
164  cudaMemcpyToSymbol(internal::constraint_weights,
165  &mapped_matrix[0][0],
166  sizeof(double) * fe_q.n_dofs_per_face(face_no) *
167  fe_q.n_dofs_per_face(face_no));
168  AssertCuda(error_code);
169  }
170  } // namespace internal
171 
172 
173 
174  template <int dim>
176  unsigned int fe_degree,
177  const DoFHandler<dim> & dof_handler,
178  const std::vector<unsigned int> &lexicographic_mapping)
179  : n_raw_lines(dof_handler.get_triangulation().n_raw_lines())
180  , line_to_cells(dim == 3 ? n_raw_lines : 0)
181  , lexicographic_mapping(lexicographic_mapping)
182  , fe_degree(fe_degree)
183  , dof_handler(dof_handler)
184  {
185  // Set up line-to-cell mapping for edge constraints (only if dim = 3)
187 
188  internal::setup_constraint_weigths<dim>(fe_degree);
189  }
190 
191 
192 
193  template <int dim>
194  inline void
196  {}
197 
198 
199 
200  template <>
201  inline void
203  {
204  // In 3D, we can have DoFs on only an edge being constrained (e.g. in a
205  // cartesian 2x2x2 grid, where only the upper left 2 cells are refined).
206  // This sets up a helper data structure in the form of a mapping from
207  // edges (i.e. lines) to neighboring cells.
208 
209  // Mapping from an edge to which children that share that edge.
210  const unsigned int line_to_children[12][2] = {{0, 2},
211  {1, 3},
212  {0, 1},
213  {2, 3},
214  {4, 6},
215  {5, 7},
216  {4, 5},
217  {6, 7},
218  {0, 4},
219  {1, 5},
220  {2, 6},
221  {3, 7}};
222 
223  std::vector<std::vector<std::pair<cell_iterator, unsigned int>>>
224  line_to_inactive_cells(n_raw_lines);
225 
226  // First add active and inactive cells to their lines:
227  for (const auto &cell : dof_handler.cell_iterators())
228  {
229  for (unsigned int line = 0; line < GeometryInfo<3>::lines_per_cell;
230  ++line)
231  {
232  const unsigned int line_idx = cell->line(line)->index();
233  if (cell->is_active())
234  line_to_cells[line_idx].push_back(std::make_pair(cell, line));
235  else
236  line_to_inactive_cells[line_idx].push_back(
237  std::make_pair(cell, line));
238  }
239  }
240 
241  // Now, we can access edge-neighboring active cells on same level to also
242  // access of an edge to the edges "children". These are found from looking
243  // at the corresponding edge of children of inactive edge neighbors.
244  for (unsigned int line_idx = 0; line_idx < n_raw_lines; ++line_idx)
245  {
246  if ((line_to_cells[line_idx].size() > 0) &&
247  line_to_inactive_cells[line_idx].size() > 0)
248  {
249  // We now have cells to add (active ones) and edges to which they
250  // should be added (inactive cells).
251  const cell_iterator &inactive_cell =
252  line_to_inactive_cells[line_idx][0].first;
253  const unsigned int neighbor_line =
254  line_to_inactive_cells[line_idx][0].second;
255 
256  for (unsigned int c = 0; c < 2; ++c)
257  {
258  const cell_iterator &child =
259  inactive_cell->child(line_to_children[neighbor_line][c]);
260  const unsigned int child_line_idx =
261  child->line(neighbor_line)->index();
262 
263  // Now add all active cells
264  for (const auto cl : line_to_cells[line_idx])
265  line_to_cells[child_line_idx].push_back(cl);
266  }
267  }
268  }
269  }
270 
271 
272 
273  template <int dim>
274  template <typename CellIterator>
275  inline void
277  std::vector<types::global_dof_index> & dof_indices,
278  const CellIterator & cell,
279  const std::shared_ptr<const Utilities::MPI::Partitioner> &partitioner,
280  unsigned int & mask) const
281  {
282  mask = 0;
283  const unsigned int n_dofs_1d = fe_degree + 1;
284  const unsigned int dofs_per_face =
285  Utilities::fixed_power<dim - 1>(n_dofs_1d);
286 
287  std::vector<types::global_dof_index> neighbor_dofs(dofs_per_face);
288 
289  const auto lex_face_mapping =
291 
292  for (const unsigned int face : GeometryInfo<dim>::face_indices())
293  {
294  if ((!cell->at_boundary(face)) &&
295  (cell->neighbor(face)->has_children() == false))
296  {
297  const active_cell_iterator &neighbor = cell->neighbor(face);
298 
299  // Neighbor is coarser than us, i.e., face is constrained
300  if (neighbor->level() < cell->level())
301  {
302  const unsigned int neighbor_face =
303  cell->neighbor_face_no(face);
304 
305  // Find position of face on neighbor
306  unsigned int subface = 0;
307  for (; subface < GeometryInfo<dim>::max_children_per_face;
308  ++subface)
309  if (neighbor->neighbor_child_on_subface(neighbor_face,
310  subface) == cell)
311  break;
312 
313  // Get indices to read
314  neighbor->face(neighbor_face)->get_dof_indices(neighbor_dofs);
315  // If the vector is distributed, we need to transform the
316  // global indices to local ones.
317  if (partitioner)
318  for (auto &index : neighbor_dofs)
319  index = partitioner->global_to_local(index);
320 
321  if (dim == 2)
322  {
323  if (face < 2)
324  {
325  mask |= internal::constr_face_x;
326  if (face == 0)
327  mask |= internal::constr_type_x;
328  if (subface == 0)
329  mask |= internal::constr_type_y;
330  }
331  else
332  {
333  mask |= internal::constr_face_y;
334  if (face == 2)
335  mask |= internal::constr_type_y;
336  if (subface == 0)
337  mask |= internal::constr_type_x;
338  }
339 
340  // Reorder neighbor_dofs and copy into faceth face of
341  // dof_indices
342 
343  // Offset if upper/right face
344  unsigned int offset = (face % 2 == 1) ? fe_degree : 0;
345 
346  for (unsigned int i = 0; i < n_dofs_1d; ++i)
347  {
348  unsigned int idx = 0;
349  // If X-line, i.e., if y = 0 or y = fe_degree
350  if (face > 1)
351  idx = n_dofs_1d * offset + i;
352  // If Y-line, i.e., if x = 0 or x = fe_degree
353  else
354  idx = n_dofs_1d * i + offset;
355 
356  dof_indices[idx] = neighbor_dofs[lex_face_mapping[i]];
357  }
358  }
359  else if (dim == 3)
360  {
361  const bool transpose = !(cell->face_orientation(face));
362 
363  int rotate = 0;
364 
365  if (cell->face_rotation(face))
366  rotate -= 1;
367  if (cell->face_flip(face))
368  rotate -= 2;
369 
370  rotate_face(rotate, n_dofs_1d, neighbor_dofs);
371  rotate_subface_index(rotate, subface);
372 
373  if (transpose)
374  {
375  transpose_face(neighbor_dofs);
376  transpose_subface_index(subface);
377  }
378 
379  // YZ-plane
380  if (face < 2)
381  {
382  mask |= internal::constr_face_x;
383  if (face == 0)
384  mask |= internal::constr_type_x;
385  if (subface % 2 == 0)
386  mask |= internal::constr_type_y;
387  if (subface / 2 == 0)
388  mask |= internal::constr_type_z;
389  }
390  // XZ-plane
391  else if (face < 4)
392  {
393  mask |= internal::constr_face_y;
394  if (face == 2)
395  mask |= internal::constr_type_y;
396  if (subface % 2 == 0)
397  mask |= internal::constr_type_z;
398  if (subface / 2 == 0)
399  mask |= internal::constr_type_x;
400  }
401  // XY-plane
402  else
403  {
404  mask |= internal::constr_face_z;
405  if (face == 4)
406  mask |= internal::constr_type_z;
407  if (subface % 2 == 0)
408  mask |= internal::constr_type_x;
409  if (subface / 2 == 0)
410  mask |= internal::constr_type_y;
411  }
412 
413  // Offset if upper/right/back face
414  unsigned int offset = (face % 2 == 1) ? fe_degree : 0;
415 
416  for (unsigned int i = 0; i < n_dofs_1d; ++i)
417  {
418  for (unsigned int j = 0; j < n_dofs_1d; ++j)
419  {
420  unsigned int idx = 0;
421  // If YZ-plane, i.e., if x = 0 or x = fe_degree,
422  // and orientation standard
423  if (face < 2)
424  idx = n_dofs_1d * n_dofs_1d * i +
425  n_dofs_1d * j + offset;
426  // If XZ-plane, i.e., if y = 0 or y = fe_degree,
427  // and orientation standard
428  else if (face < 4)
429  idx = n_dofs_1d * n_dofs_1d * j +
430  n_dofs_1d * offset + i;
431  // If XY-plane, i.e., if z = 0 or z = fe_degree,
432  // and orientation standard
433  else
434  idx = n_dofs_1d * n_dofs_1d * offset +
435  n_dofs_1d * i + j;
436 
437  dof_indices[idx] =
438  neighbor_dofs[lex_face_mapping[n_dofs_1d * i +
439  j]];
440  }
441  }
442  }
443  else
445  }
446  }
447  }
448 
449  // In 3D we can have a situation where only DoFs on an edge are
450  // constrained. Append these here.
451  if (dim == 3)
452  {
453  // For each line on cell, which faces does it belong to, what is the
454  // edge mask, what is the types of the faces it belong to, and what is
455  // the type along the edge.
456  const unsigned int line_to_edge[12][4] = {
479  0,
487  0,
503  0,
505 
506  for (unsigned int local_line = 0;
507  local_line < GeometryInfo<dim>::lines_per_cell;
508  ++local_line)
509  {
510  // If we don't already have a constraint for as part of a face
511  if (!(mask & line_to_edge[local_line][0]))
512  {
513  // For each cell which share that edge
514  const unsigned int line = cell->line(local_line)->index();
515  for (const auto edge_neighbor : line_to_cells[line])
516  {
517  // If one of them is coarser than us
518  const cell_iterator neighbor_cell = edge_neighbor.first;
519  if (neighbor_cell->level() < cell->level())
520  {
521  const unsigned int local_line_neighbor =
522  edge_neighbor.second;
523  mask |= line_to_edge[local_line][1] |
524  line_to_edge[local_line][2];
525 
526  bool flipped = false;
527  if (cell->line(local_line)->vertex_index(0) ==
528  neighbor_cell->line(local_line_neighbor)
529  ->vertex_index(0))
530  {
531  // Assuming line directions match axes directions,
532  // we have an unflipped edge of first type
533  mask |= line_to_edge[local_line][3];
534  }
535  else if (cell->line(local_line)->vertex_index(1) ==
536  neighbor_cell->line(local_line_neighbor)
537  ->vertex_index(1))
538  {
539  // We have an unflipped edge of second type
540  }
541  else if (cell->line(local_line)->vertex_index(1) ==
542  neighbor_cell->line(local_line_neighbor)
543  ->vertex_index(0))
544  {
545  // We have a flipped edge of second type
546  flipped = true;
547  }
548  else if (cell->line(local_line)->vertex_index(0) ==
549  neighbor_cell->line(local_line_neighbor)
550  ->vertex_index(1))
551  {
552  // We have a flipped edge of first type
553  mask |= line_to_edge[local_line][3];
554  flipped = true;
555  }
556  else
558 
559  // Copy the unconstrained values
560  neighbor_dofs.resize(n_dofs_1d * n_dofs_1d *
561  n_dofs_1d);
562  neighbor_cell->get_dof_indices(neighbor_dofs);
563  // If the vector is distributed, we need to transform
564  // the global indices to local ones.
565  if (partitioner)
566  for (auto &index : neighbor_dofs)
567  index = partitioner->global_to_local(index);
568 
569  for (unsigned int i = 0; i < n_dofs_1d; ++i)
570  {
571  // Get local dof index along line
572  const unsigned int idx =
573  line_dof_idx(local_line, i, n_dofs_1d);
574  dof_indices[idx] = neighbor_dofs
575  [lexicographic_mapping[line_dof_idx(
576  local_line_neighbor,
577  flipped ? fe_degree - i : i,
578  n_dofs_1d)]];
579  }
580 
581  // Stop looping over edge neighbors
582  break;
583  }
584  }
585  }
586  }
587  }
588  }
589 
590 
591 
592  template <int dim>
593  inline void
595  unsigned int &subface_index) const
596  {
597  const unsigned int rot_mapping[4] = {2, 0, 3, 1};
598 
599  times = times % 4;
600  times = times < 0 ? times + 4 : times;
601  for (int t = 0; t < times; ++t)
602  subface_index = rot_mapping[subface_index];
603  }
604 
605 
606 
607  template <int dim>
608  inline void
610  int times,
611  unsigned int n_dofs_1d,
612  std::vector<types::global_dof_index> &dofs) const
613  {
614  const unsigned int rot_mapping[4] = {2, 0, 3, 1};
615 
616  times = times % 4;
617  times = times < 0 ? times + 4 : times;
618 
619  std::vector<types::global_dof_index> copy(dofs.size());
620  for (int t = 0; t < times; ++t)
621  {
622  std::swap(copy, dofs);
623 
624  // Vertices
625  for (unsigned int i = 0; i < 4; ++i)
626  dofs[rot_mapping[i]] = copy[i];
627 
628  // Edges
629  const unsigned int n_int = n_dofs_1d - 2;
630  unsigned int offset = 4;
631  for (unsigned int i = 0; i < n_int; ++i)
632  {
633  // Left edge
634  dofs[offset + i] = copy[offset + 2 * n_int + (n_int - 1 - i)];
635  // Right edge
636  dofs[offset + n_int + i] =
637  copy[offset + 3 * n_int + (n_int - 1 - i)];
638  // Bottom edge
639  dofs[offset + 2 * n_int + i] = copy[offset + n_int + i];
640  // Top edge
641  dofs[offset + 3 * n_int + i] = copy[offset + i];
642  }
643 
644  // Interior points
645  offset += 4 * n_int;
646 
647  for (unsigned int i = 0; i < n_int; ++i)
648  for (unsigned int j = 0; j < n_int; ++j)
649  dofs[offset + i * n_int + j] =
650  copy[offset + j * n_int + (n_int - 1 - i)];
651  }
652  }
653 
654 
655 
656  template <int dim>
657  inline unsigned int
659  unsigned int dof,
660  unsigned int n_dofs_1d) const
661  {
662  unsigned int x, y, z;
663 
664  if (local_line < 8)
665  {
666  x =
667  (local_line % 4 == 0) ? 0 : (local_line % 4 == 1) ? fe_degree : dof;
668  y =
669  (local_line % 4 == 2) ? 0 : (local_line % 4 == 3) ? fe_degree : dof;
670  z = (local_line / 4) * fe_degree;
671  }
672  else
673  {
674  x = ((local_line - 8) % 2) * fe_degree;
675  y = ((local_line - 8) / 2) * fe_degree;
676  z = dof;
677  }
678 
679  return n_dofs_1d * n_dofs_1d * z + n_dofs_1d * y + x;
680  }
681 
682 
683 
684  template <int dim>
685  inline void
687  std::vector<types::global_dof_index> &dofs) const
688  {
689  const std::vector<types::global_dof_index> copy(dofs);
690 
691  // Vertices
692  dofs[1] = copy[2];
693  dofs[2] = copy[1];
694 
695  // Edges
696  const unsigned int n_int = fe_degree - 1;
697  unsigned int offset = 4;
698  for (unsigned int i = 0; i < n_int; ++i)
699  {
700  // Right edge
701  dofs[offset + i] = copy[offset + 2 * n_int + i];
702  // Left edge
703  dofs[offset + n_int + i] = copy[offset + 3 * n_int + i];
704  // Bottom edge
705  dofs[offset + 2 * n_int + i] = copy[offset + i];
706  // Top edge
707  dofs[offset + 3 * n_int + i] = copy[offset + n_int + i];
708  }
709 
710  // Interior
711  offset += 4 * n_int;
712  for (unsigned int i = 0; i < n_int; ++i)
713  for (unsigned int j = 0; j < n_int; ++j)
714  dofs[offset + i * n_int + j] = copy[offset + j * n_int + i];
715  }
716 
717 
718 
719  template <int dim>
720  void
721  HangingNodes<dim>::transpose_subface_index(unsigned int &subface) const
722  {
723  if (subface == 1)
724  subface = 2;
725  else if (subface == 2)
726  subface = 1;
727  }
728 
729 
730 
731  //-------------------------------------------------------------------------//
732  // Functions for resolving the hanging node constraints //
733  //-------------------------------------------------------------------------//
734  namespace internal
735  {
736  template <unsigned int size>
737  __device__ inline unsigned int
738  index2(unsigned int i, unsigned int j)
739  {
740  return i + size * j;
741  }
742 
743 
744 
745  template <unsigned int size>
746  __device__ inline unsigned int
747  index3(unsigned int i, unsigned int j, unsigned int k)
748  {
749  return i + size * j + size * size * k;
750  }
751 
752 
753 
754  template <unsigned int fe_degree,
755  unsigned int direction,
756  bool transpose,
757  typename Number>
758  __device__ inline void
759  interpolate_boundary_2d(const unsigned int constraint_mask,
760  Number * values)
761  {
762  const unsigned int x_idx = threadIdx.x % (fe_degree + 1);
763  const unsigned int y_idx = threadIdx.y;
764 
765  const unsigned int this_type =
767 
768  const unsigned int interp_idx = (direction == 0) ? x_idx : y_idx;
769 
770  Number t = 0;
771  // Flag is true if dof is constrained for the given direction and the
772  // given face.
773  const bool constrained_face =
774  (constraint_mask &
775  (((direction == 0) ? internal::constr_face_y : 0) |
776  ((direction == 1) ? internal::constr_face_x : 0)));
777 
778  // Flag is true if for the given direction, the dof is constrained with
779  // the right type and is on the correct side (left (= 0) or right (=
780  // fe_degree))
781  const bool constrained_dof =
782  ((direction == 0) && ((constraint_mask & internal::constr_type_y) ?
783  (y_idx == 0) :
784  (y_idx == fe_degree))) ||
785  ((direction == 1) && ((constraint_mask & internal::constr_type_x) ?
786  (x_idx == 0) :
787  (x_idx == fe_degree)));
788 
789  if (constrained_face && constrained_dof)
790  {
791  const bool type = constraint_mask & this_type;
792 
793  if (type)
794  {
795  for (unsigned int i = 0; i <= fe_degree; ++i)
796  {
797  const unsigned int real_idx =
798  (direction == 0) ? index2<fe_degree + 1>(i, y_idx) :
799  index2<fe_degree + 1>(x_idx, i);
800 
801  const Number w =
802  transpose ?
803  internal::constraint_weights[i * (fe_degree + 1) +
804  interp_idx] :
805  internal::constraint_weights[interp_idx *
806  (fe_degree + 1) +
807  i];
808  t += w * values[real_idx];
809  }
810  }
811  else
812  {
813  for (unsigned int i = 0; i <= fe_degree; ++i)
814  {
815  const unsigned int real_idx =
816  (direction == 0) ? index2<fe_degree + 1>(i, y_idx) :
817  index2<fe_degree + 1>(x_idx, i);
818 
819  const Number w =
820  transpose ?
821  internal::constraint_weights[(fe_degree - i) *
822  (fe_degree + 1) +
823  fe_degree - interp_idx] :
824  internal::constraint_weights[(fe_degree - interp_idx) *
825  (fe_degree + 1) +
826  fe_degree - i];
827  t += w * values[real_idx];
828  }
829  }
830  }
831 
832  // The synchronization is done for all the threads in one block with
833  // each block being assigned to one element.
834  __syncthreads();
835  if (constrained_face && constrained_dof)
836  values[index2<fe_degree + 1>(x_idx, y_idx)] = t;
837 
838  __syncthreads();
839  }
840 
841 
842 
843  template <unsigned int fe_degree,
844  unsigned int direction,
845  bool transpose,
846  typename Number>
847  __device__ inline void
848  interpolate_boundary_3d(const unsigned int constraint_mask,
849  Number * values)
850  {
851  const unsigned int x_idx = threadIdx.x % (fe_degree + 1);
852  const unsigned int y_idx = threadIdx.y;
853  const unsigned int z_idx = threadIdx.z;
854 
855  const unsigned int this_type =
856  (direction == 0) ? internal::constr_type_x :
857  (direction == 1) ? internal::constr_type_y :
859  const unsigned int face1_type =
860  (direction == 0) ? internal::constr_type_y :
861  (direction == 1) ? internal::constr_type_z :
863  const unsigned int face2_type =
864  (direction == 0) ? internal::constr_type_z :
865  (direction == 1) ? internal::constr_type_x :
867 
868  // If computing in x-direction, need to match against constr_face_y or
869  // constr_face_z
870  const unsigned int face1 = (direction == 0) ? internal::constr_face_y :
871  (direction == 1) ?
874  const unsigned int face2 = (direction == 0) ? internal::constr_face_z :
875  (direction == 1) ?
878  const unsigned int edge = (direction == 0) ? internal::constr_edge_yz :
879  (direction == 1) ?
882  const unsigned int constrained_face =
883  constraint_mask & (face1 | face2 | edge);
884 
885  const unsigned int interp_idx =
886  (direction == 0) ? x_idx : (direction == 1) ? y_idx : z_idx;
887  const unsigned int face1_idx =
888  (direction == 0) ? y_idx : (direction == 1) ? z_idx : x_idx;
889  const unsigned int face2_idx =
890  (direction == 0) ? z_idx : (direction == 1) ? x_idx : y_idx;
891 
892  Number t = 0;
893  const bool on_face1 = (constraint_mask & face1_type) ?
894  (face1_idx == 0) :
895  (face1_idx == fe_degree);
896  const bool on_face2 = (constraint_mask & face2_type) ?
897  (face2_idx == 0) :
898  (face2_idx == fe_degree);
899  const bool constrained_dof =
900  (((constraint_mask & face1) && on_face1) ||
901  ((constraint_mask & face2) && on_face2) ||
902  ((constraint_mask & edge) && on_face1 && on_face2));
903 
904  if (constrained_face && constrained_dof)
905  {
906  const bool type = constraint_mask & this_type;
907  if (type)
908  {
909  for (unsigned int i = 0; i <= fe_degree; ++i)
910  {
911  const unsigned int real_idx =
912  (direction == 0) ?
913  index3<fe_degree + 1>(i, y_idx, z_idx) :
914  (direction == 1) ?
915  index3<fe_degree + 1>(x_idx, i, z_idx) :
916  index3<fe_degree + 1>(x_idx, y_idx, i);
917 
918  const Number w =
919  transpose ?
920  internal::constraint_weights[i * (fe_degree + 1) +
921  interp_idx] :
922  internal::constraint_weights[interp_idx *
923  (fe_degree + 1) +
924  i];
925  t += w * values[real_idx];
926  }
927  }
928  else
929  {
930  for (unsigned int i = 0; i <= fe_degree; ++i)
931  {
932  const unsigned int real_idx =
933  (direction == 0) ?
934  index3<fe_degree + 1>(i, y_idx, z_idx) :
935  (direction == 1) ?
936  index3<fe_degree + 1>(x_idx, i, z_idx) :
937  index3<fe_degree + 1>(x_idx, y_idx, i);
938 
939  const Number w =
940  transpose ?
941  internal::constraint_weights[(fe_degree - i) *
942  (fe_degree + 1) +
943  fe_degree - interp_idx] :
944  internal::constraint_weights[(fe_degree - interp_idx) *
945  (fe_degree + 1) +
946  fe_degree - i];
947  t += w * values[real_idx];
948  }
949  }
950  }
951 
952  // The synchronization is done for all the threads in one block with
953  // each block being assigned to one element.
954  __syncthreads();
955 
956  if (constrained_face && constrained_dof)
957  values[index3<fe_degree + 1>(x_idx, y_idx, z_idx)] = t;
958 
959  __syncthreads();
960  }
961  } // namespace internal
962 
963 
964 
973  template <int dim, int fe_degree, bool transpose, typename Number>
974  __device__ void
975  resolve_hanging_nodes(const unsigned int constraint_mask, Number *values)
976  {
977  if (dim == 2)
978  {
979  internal::interpolate_boundary_2d<fe_degree, 0, transpose>(
980  constraint_mask, values);
981 
982  internal::interpolate_boundary_2d<fe_degree, 1, transpose>(
983  constraint_mask, values);
984  }
985  else if (dim == 3)
986  {
987  // Interpolate y and z faces (x-direction)
988  internal::interpolate_boundary_3d<fe_degree, 0, transpose>(
989  constraint_mask, values);
990  // Interpolate x and z faces (y-direction)
991  internal::interpolate_boundary_3d<fe_degree, 1, transpose>(
992  constraint_mask, values);
993  // Interpolate x and y faces (z-direction)
994  internal::interpolate_boundary_3d<fe_degree, 2, transpose>(
995  constraint_mask, values);
996  }
997  }
998  } // namespace internal
999 } // namespace CUDAWrappers
1000 
1002 
1003 #endif
1004 
1005 #endif
typename DoFHandler< dim >::active_cell_iterator active_cell_iterator
std::vector< std::vector< std::pair< cell_iterator, unsigned int > > > line_to_cells
void transpose_subface_index(unsigned int &subface) const
const std::vector< unsigned int > & lexicographic_mapping
void rotate_subface_index(int times, unsigned int &subface_index) const
void setup_constraints(std::vector< types::global_dof_index > &dof_indices, const CellIterator &cell, const std::shared_ptr< const Utilities::MPI::Partitioner > &partitioner, unsigned int &mask) const
void rotate_face(int times, unsigned int n_dofs_1d, std::vector< types::global_dof_index > &dofs) const
unsigned int line_dof_idx(int local_line, unsigned int dof, unsigned int n_dofs_1d) const
void transpose_face(std::vector< types::global_dof_index > &dofs) const
HangingNodes(unsigned int fe_degree, const DoFHandler< dim > &dof_handler, const std::vector< unsigned int > &lexicographic_mapping)
typename DoFHandler< dim >::cell_iterator cell_iterator
virtual void get_subface_interpolation_matrix(const FiniteElement< dim, spacedim > &source, const unsigned int subface, FullMatrix< double > &matrix, const unsigned int face_no=0) const override
Definition: fe_q_base.cc:625
Definition: fe_q.h:549
unsigned int n_dofs_per_face(unsigned int face_no=0, unsigned int child=0) const
#define DEAL_II_NAMESPACE_OPEN
Definition: config.h:396
#define DEAL_II_NAMESPACE_CLOSE
Definition: config.h:397
DerivativeForm< 1, spacedim, dim, Number > transpose(const DerivativeForm< 1, dim, spacedim, Number > &DF)
static ::ExceptionBase & ExcInternalError()
static ::ExceptionBase & ExcNotImplemented()
#define AssertCuda(error_code)
Definition: exceptions.h:1772
typename ActiveSelector::cell_iterator cell_iterator
Definition: dof_handler.h:466
typename ActiveSelector::active_cell_iterator active_cell_iterator
Definition: dof_handler.h:438
void interpolate_boundary_2d(const unsigned int constraint_mask, Number *values)
void interpolate_boundary_3d(const unsigned int constraint_mask, Number *values)
void setup_constraint_weigths(unsigned int fe_degree)
__constant__ double constraint_weights[(mf_max_elem_degree+1) *(mf_max_elem_degree+1)]
unsigned int index2(unsigned int i, unsigned int j)
unsigned int index3(unsigned int i, unsigned int j, unsigned int k)
void resolve_hanging_nodes(const unsigned int constraint_mask, Number *values)
constexpr unsigned int mf_max_elem_degree
Definition: cuda_size.h:47
std::vector< unsigned int > lexicographic_to_hierarchic_numbering(unsigned int degree)
void rotate(const double angle, Triangulation< dim > &triangulation)
void swap(MemorySpaceData< Number, MemorySpace > &, MemorySpaceData< Number, MemorySpace > &)
Tensor< 2, dim, Number > w(const Tensor< 2, dim, Number > &F, const Tensor< 2, dim, Number > &dF_dt)
T fixed_power(const T t)
Definition: utilities.h:1081
void copy(const T *begin, const T *end, U *dest)