Reference documentation for deal.II version 9.3.2
\(\newcommand{\dealvcentcolon}{\mathrel{\mathop{:}}}\) \(\newcommand{\dealcoloneq}{\dealvcentcolon\mathrel{\mkern-1.2mu}=}\) \(\newcommand{\jump}[1]{\left[\!\left[ #1 \right]\!\right]}\) \(\newcommand{\average}[1]{\left\{\!\left\{ #1 \right\}\!\right\}}\)
step-51.h
Go to the documentation of this file.
1  = 0) const override
596  * {
597  * double sum = 0;
598  * for (unsigned int i = 0; i < this->n_source_centers; ++i)
599  * {
600  * const Tensor<1, dim> x_minus_xi = p - this->source_centers[i];
601  * sum +=
602  * std::exp(-x_minus_xi.norm_square() / (this->width * this->width));
603  * }
604  *
605  * return sum /
606  * std::pow(2. * numbers::PI * this->width * this->width, dim / 2.);
607  * }
608  *
609  * virtual Tensor<1, dim>
610  * gradient(const Point<dim> &p,
611  * const unsigned int /*component*/ = 0) const override
612  * {
614  * for (unsigned int i = 0; i < this->n_source_centers; ++i)
615  * {
616  * const Tensor<1, dim> x_minus_xi = p - this->source_centers[i];
617  *
618  * sum +=
619  * (-2 / (this->width * this->width) *
620  * std::exp(-x_minus_xi.norm_square() / (this->width * this->width)) *
621  * x_minus_xi);
622  * }
623  *
624  * return sum /
625  * std::pow(2. * numbers::PI * this->width * this->width, dim / 2.);
626  * }
627  * };
628  *
629  *
630  *
631  * @endcode
632  *
633  * This class implements a function where the scalar solution and its negative
634  * gradient are collected together. This function is used when computing the
635  * error of the HDG approximation and its implementation is to simply call
636  * value and gradient function of the Solution class.
637  *
638  * @code
639  * template <int dim>
640  * class SolutionAndGradient : public Function<dim>, protected SolutionBase<dim>
641  * {
642  * public:
643  * SolutionAndGradient()
644  * : Function<dim>(dim + 1)
645  * {}
646  *
647  * virtual void vector_value(const Point<dim> &p,
648  * Vector<double> & v) const override
649  * {
650  * AssertDimension(v.size(), dim + 1);
651  * Solution<dim> solution;
652  * Tensor<1, dim> grad = solution.gradient(p);
653  * for (unsigned int d = 0; d < dim; ++d)
654  * v[d] = -grad[d];
655  * v[dim] = solution.value(p);
656  * }
657  * };
658  *
659  *
660  *
661  * @endcode
662  *
663  * Next comes the implementation of the convection velocity. As described in
664  * the introduction, we choose a velocity field that is @f$(y, -x)@f$ in 2D and
665  * @f$(y, -x, 1)@f$ in 3D. This gives a divergence-free velocity field.
666  *
667  * @code
668  * template <int dim>
669  * class ConvectionVelocity : public TensorFunction<1, dim>
670  * {
671  * public:
672  * ConvectionVelocity()
674  * {}
675  *
676  * virtual Tensor<1, dim> value(const Point<dim> &p) const override
677  * {
678  * Tensor<1, dim> convection;
679  * switch (dim)
680  * {
681  * case 1:
682  * convection[0] = 1;
683  * break;
684  * case 2:
685  * convection[0] = p[1];
686  * convection[1] = -p[0];
687  * break;
688  * case 3:
689  * convection[0] = p[1];
690  * convection[1] = -p[0];
691  * convection[2] = 1;
692  * break;
693  * default:
694  * Assert(false, ExcNotImplemented());
695  * }
696  * return convection;
697  * }
698  * };
699  *
700  *
701  *
702  * @endcode
703  *
704  * The last function we implement is the right hand side for the
705  * manufactured solution. It is very similar to @ref step_7 "step-7", with the exception
706  * that we now have a convection term instead of the reaction term. Since
707  * the velocity field is incompressible, i.e., @f$\nabla \cdot \mathbf{c} =
708  * 0@f$, the advection term simply reads @f$\mathbf{c} \nabla u@f$.
709  *
710  * @code
711  * template <int dim>
712  * class RightHandSide : public Function<dim>, protected SolutionBase<dim>
713  * {
714  * public:
715  * virtual double value(const Point<dim> &p,
716  * const unsigned int /*component*/ = 0) const override
717  * {
718  * ConvectionVelocity<dim> convection_velocity;
719  * Tensor<1, dim> convection = convection_velocity.value(p);
720  * double sum = 0;
721  * for (unsigned int i = 0; i < this->n_source_centers; ++i)
722  * {
723  * const Tensor<1, dim> x_minus_xi = p - this->source_centers[i];
724  *
725  * sum +=
726  * ((2 * dim - 2 * convection * x_minus_xi -
727  * 4 * x_minus_xi.norm_square() / (this->width * this->width)) /
728  * (this->width * this->width) *
729  * std::exp(-x_minus_xi.norm_square() / (this->width * this->width)));
730  * }
731  *
732  * return sum /
733  * std::pow(2. * numbers::PI * this->width * this->width, dim / 2.);
734  * }
735  * };
736  *
737  *
738  *
739  * @endcode
740  *
741  *
742  * <a name="TheHDGsolverclass"></a>
743  * <h3>The HDG solver class</h3>
744  *
745 
746  *
747  * The HDG solution procedure follows closely that of @ref step_7 "step-7". The major
748  * difference is the use of three different sets of DoFHandler and FE
749  * objects, along with the ChunkSparseMatrix and the corresponding solutions
750  * vectors. We also use WorkStream to enable a multithreaded local solution
751  * process which exploits the embarrassingly parallel nature of the local
752  * solver. For WorkStream, we define the local operations on a cell and a
753  * copy function into the global matrix and vector. We do this both for the
754  * assembly (which is run twice, once when we generate the system matrix and
755  * once when we compute the element-interior solutions from the skeleton
756  * values) and for the postprocessing where we extract a solution that
757  * converges at higher order.
758  *
759  * @code
760  * template <int dim>
761  * class HDG
762  * {
763  * public:
764  * enum RefinementMode
765  * {
766  * global_refinement,
767  * adaptive_refinement
768  * };
769  *
770  * HDG(const unsigned int degree, const RefinementMode refinement_mode);
771  * void run();
772  *
773  * private:
774  * void setup_system();
775  * void assemble_system(const bool reconstruct_trace = false);
776  * void solve();
777  * void postprocess();
778  * void refine_grid(const unsigned int cycle);
779  * void output_results(const unsigned int cycle);
780  *
781  * @endcode
782  *
783  * Data for the assembly and solution of the primal variables.
784  *
785  * @code
786  * struct PerTaskData;
787  * struct ScratchData;
788  *
789  * @endcode
790  *
791  * Post-processing the solution to obtain @f$u^*@f$ is an element-by-element
792  * procedure; as such, we do not need to assemble any global data and do
793  * not declare any 'task data' for WorkStream to use.
794  *
795  * @code
796  * struct PostProcessScratchData;
797  *
798  * @endcode
799  *
800  * The following three functions are used by WorkStream to do the actual
801  * work of the program.
802  *
803  * @code
804  * void assemble_system_one_cell(
805  * const typename DoFHandler<dim>::active_cell_iterator &cell,
806  * ScratchData & scratch,
807  * PerTaskData & task_data);
808  *
809  * void copy_local_to_global(const PerTaskData &data);
810  *
811  * void postprocess_one_cell(
812  * const typename DoFHandler<dim>::active_cell_iterator &cell,
813  * PostProcessScratchData & scratch,
814  * unsigned int & empty_data);
815  *
816  *
818  *
819  * @endcode
820  *
821  * The 'local' solutions are interior to each element. These
822  * represent the primal solution field @f$u@f$ as well as the auxiliary
823  * field @f$\mathbf{q}@f$.
824  *
825  * @code
826  * FESystem<dim> fe_local;
827  * DoFHandler<dim> dof_handler_local;
828  * Vector<double> solution_local;
829  *
830  * @endcode
831  *
832  * The new finite element type and corresponding <code>DoFHandler</code> are
833  * used for the global skeleton solution that couples the element-level
834  * local solutions.
835  *
836  * @code
837  * FE_FaceQ<dim> fe;
838  * DoFHandler<dim> dof_handler;
839  * Vector<double> solution;
840  * Vector<double> system_rhs;
841  *
842  * @endcode
843  *
844  * As stated in the introduction, HDG solutions can be post-processed to
845  * attain superconvergence rates of @f$\mathcal{O}(h^{p+2})@f$. The
846  * post-processed solution is a discontinuous finite element solution
847  * representing the primal variable on the interior of each cell. We define
848  * a FE type of degree @f$p+1@f$ to represent this post-processed solution,
849  * which we only use for output after constructing it.
850  *
851  * @code
852  * FE_DGQ<dim> fe_u_post;
853  * DoFHandler<dim> dof_handler_u_post;
854  * Vector<double> solution_u_post;
855  *
856  * @endcode
857  *
858  * The degrees of freedom corresponding to the skeleton strongly enforce
859  * Dirichlet boundary conditions, just as in a continuous Galerkin finite
860  * element method. We can enforce the boundary conditions in an analogous
861  * manner via an AffineConstraints object. In addition, hanging nodes are
862  * handled in the same way as for continuous finite elements: For the face
863  * elements which only define degrees of freedom on the face, this process
864  * sets the solution on the refined side to coincide with the
865  * representation on the coarse side.
866  *
867 
868  *
869  * Note that for HDG, the elimination of hanging nodes is not the only
870  * possibility &mdash; in terms of the HDG theory, one could also use the
871  * unknowns from the refined side and express the local solution on the
872  * coarse side through the trace values on the refined side. However, such
873  * a setup is not as easily implemented in terms of deal.II loops and not
874  * further analyzed.
875  *
876  * @code
877  * AffineConstraints<double> constraints;
878  *
879  * @endcode
880  *
881  * The usage of the ChunkSparseMatrix class is similar to the usual sparse
882  * matrices: You need a sparsity pattern of type ChunkSparsityPattern and
883  * the actual matrix object. When creating the sparsity pattern, we just
884  * have to additionally pass the size of local blocks.
885  *
886  * @code
887  * ChunkSparsityPattern sparsity_pattern;
888  * ChunkSparseMatrix<double> system_matrix;
889  *
890  * @endcode
891  *
892  * Same as @ref step_7 "step-7":
893  *
894  * @code
895  * const RefinementMode refinement_mode;
896  * ConvergenceTable convergence_table;
897  * };
898  *
899  * @endcode
900  *
901  *
902  * <a name="TheHDGclassimplementation"></a>
903  * <h3>The HDG class implementation</h3>
904  *
905 
906  *
907  *
908  * <a name="Constructor"></a>
909  * <h4>Constructor</h4>
910  * The constructor is similar to those in other examples, with the exception
911  * of handling multiple DoFHandler and FiniteElement objects. Note that we
912  * create a system of finite elements for the local DG part, including the
913  * gradient/flux part and the scalar part.
914  *
915  * @code
916  * template <int dim>
917  * HDG<dim>::HDG(const unsigned int degree, const RefinementMode refinement_mode)
918  * : fe_local(FE_DGQ<dim>(degree), dim, FE_DGQ<dim>(degree), 1)
919  * , dof_handler_local(triangulation)
920  * , fe(degree)
921  * , dof_handler(triangulation)
922  * , fe_u_post(degree + 1)
923  * , dof_handler_u_post(triangulation)
924  * , refinement_mode(refinement_mode)
925  * {}
926  *
927  *
928  *
929  * @endcode
930  *
931  *
932  * <a name="HDGsetup_system"></a>
933  * <h4>HDG::setup_system</h4>
934  * The system for an HDG solution is setup in an analogous manner to most
935  * of the other tutorial programs. We are careful to distribute dofs with
936  * all of our DoFHandler objects. The @p solution and @p system_matrix
937  * objects go with the global skeleton solution.
938  *
939  * @code
940  * template <int dim>
941  * void HDG<dim>::setup_system()
942  * {
943  * dof_handler_local.distribute_dofs(fe_local);
944  * dof_handler.distribute_dofs(fe);
945  * dof_handler_u_post.distribute_dofs(fe_u_post);
946  *
947  * std::cout << " Number of degrees of freedom: " << dof_handler.n_dofs()
948  * << std::endl;
949  *
950  * solution.reinit(dof_handler.n_dofs());
951  * system_rhs.reinit(dof_handler.n_dofs());
952  *
953  * solution_local.reinit(dof_handler_local.n_dofs());
954  * solution_u_post.reinit(dof_handler_u_post.n_dofs());
955  *
956  * constraints.clear();
957  * DoFTools::make_hanging_node_constraints(dof_handler, constraints);
958  * std::map<types::boundary_id, const Function<dim> *> boundary_functions;
959  * Solution<dim> solution_function;
960  * boundary_functions[0] = &solution_function;
962  * boundary_functions,
963  * QGauss<dim - 1>(fe.degree + 1),
964  * constraints);
965  * constraints.close();
966  *
967  * @endcode
968  *
969  * When creating the chunk sparsity pattern, we first create the usual
970  * dynamic sparsity pattern and then set the chunk size, which is equal
971  * to the number of dofs on a face, when copying this into the final
972  * sparsity pattern.
973  *
974  * @code
975  * {
976  * DynamicSparsityPattern dsp(dof_handler.n_dofs());
977  * DoFTools::make_sparsity_pattern(dof_handler, dsp, constraints, false);
978  * sparsity_pattern.copy_from(dsp, fe.n_dofs_per_face());
979  * }
980  * system_matrix.reinit(sparsity_pattern);
981  * }
982  *
983  *
984  *
985  * @endcode
986  *
987  *
988  * <a name="HDGPerTaskData"></a>
989  * <h4>HDG::PerTaskData</h4>
990  * Next comes the definition of the local data structures for the parallel
991  * assembly. The first structure @p PerTaskData contains the local vector
992  * and matrix that are written into the global matrix, whereas the
993  * ScratchData contains all data that we need for the local assembly. There
994  * is one variable worth noting here, namely the boolean variable @p
995  * trace_reconstruct. As mentioned in the introduction, we solve the HDG
996  * system in two steps. First, we create a linear system for the skeleton
997  * system where we condense the local part into it via the Schur complement
998  * @f$D-CA^{-1}B@f$. Then, we solve for the local part using the skeleton
999  * solution. For these two steps, we need the same matrices on the elements
1000  * twice, which we want to compute by two assembly steps. Since most of the
1001  * code is similar, we do this with the same function but only switch
1002  * between the two based on a flag that we set when starting the
1003  * assembly. Since we need to pass this information on to the local worker
1004  * routines, we store it once in the task data.
1005  *
1006  * @code
1007  * template <int dim>
1008  * struct HDG<dim>::PerTaskData
1009  * {
1011  * Vector<double> cell_vector;
1012  * std::vector<types::global_dof_index> dof_indices;
1013  *
1014  * bool trace_reconstruct;
1015  *
1016  * PerTaskData(const unsigned int n_dofs, const bool trace_reconstruct)
1017  * : cell_matrix(n_dofs, n_dofs)
1018  * , cell_vector(n_dofs)
1019  * , dof_indices(n_dofs)
1020  * , trace_reconstruct(trace_reconstruct)
1021  * {}
1022  * };
1023  *
1024  *
1025  *
1026  * @endcode
1027  *
1028  *
1029  * <a name="HDGScratchData"></a>
1030  * <h4>HDG::ScratchData</h4>
1031  * @p ScratchData contains persistent data for each
1032  * thread within WorkStream. The FEValues, matrix,
1033  * and vector objects should be familiar by now. There are two objects that
1034  * need to be discussed: `std::vector<std::vector<unsigned int> >
1035  * fe_local_support_on_face` and `std::vector<std::vector<unsigned int> >
1036  * fe_support_on_face`. These are used to indicate whether or not the finite
1037  * elements chosen have support (non-zero values) on a given face of the
1038  * reference cell for the local part associated to @p fe_local and the
1039  * skeleton part @p fe. We extract this information in the
1040  * constructor and store it once for all cells that we work on. Had we not
1041  * stored this information, we would be forced to assemble a large number of
1042  * zero terms on each cell, which would significantly slow the program.
1043  *
1044  * @code
1045  * template <int dim>
1046  * struct HDG<dim>::ScratchData
1047  * {
1048  * FEValues<dim> fe_values_local;
1049  * FEFaceValues<dim> fe_face_values_local;
1050  * FEFaceValues<dim> fe_face_values;
1051  *
1052  * FullMatrix<double> ll_matrix;
1053  * FullMatrix<double> lf_matrix;
1054  * FullMatrix<double> fl_matrix;
1055  * FullMatrix<double> tmp_matrix;
1056  * Vector<double> l_rhs;
1057  * Vector<double> tmp_rhs;
1058  *
1059  * std::vector<Tensor<1, dim>> q_phi;
1060  * std::vector<double> q_phi_div;
1061  * std::vector<double> u_phi;
1062  * std::vector<Tensor<1, dim>> u_phi_grad;
1063  * std::vector<double> tr_phi;
1064  * std::vector<double> trace_values;
1065  *
1066  * std::vector<std::vector<unsigned int>> fe_local_support_on_face;
1067  * std::vector<std::vector<unsigned int>> fe_support_on_face;
1068  *
1069  * ConvectionVelocity<dim> convection_velocity;
1070  * RightHandSide<dim> right_hand_side;
1071  * const Solution<dim> exact_solution;
1072  *
1073  * ScratchData(const FiniteElement<dim> &fe,
1074  * const FiniteElement<dim> &fe_local,
1075  * const QGauss<dim> & quadrature_formula,
1076  * const QGauss<dim - 1> & face_quadrature_formula,
1077  * const UpdateFlags local_flags,
1078  * const UpdateFlags local_face_flags,
1079  * const UpdateFlags flags)
1080  * : fe_values_local(fe_local, quadrature_formula, local_flags)
1081  * , fe_face_values_local(fe_local,
1082  * face_quadrature_formula,
1083  * local_face_flags)
1084  * , fe_face_values(fe, face_quadrature_formula, flags)
1085  * , ll_matrix(fe_local.n_dofs_per_cell(), fe_local.n_dofs_per_cell())
1086  * , lf_matrix(fe_local.n_dofs_per_cell(), fe.n_dofs_per_cell())
1087  * , fl_matrix(fe.n_dofs_per_cell(), fe_local.n_dofs_per_cell())
1088  * , tmp_matrix(fe.n_dofs_per_cell(), fe_local.n_dofs_per_cell())
1089  * , l_rhs(fe_local.n_dofs_per_cell())
1090  * , tmp_rhs(fe_local.n_dofs_per_cell())
1091  * , q_phi(fe_local.n_dofs_per_cell())
1092  * , q_phi_div(fe_local.n_dofs_per_cell())
1093  * , u_phi(fe_local.n_dofs_per_cell())
1094  * , u_phi_grad(fe_local.n_dofs_per_cell())
1095  * , tr_phi(fe.n_dofs_per_cell())
1096  * , trace_values(face_quadrature_formula.size())
1097  * , fe_local_support_on_face(GeometryInfo<dim>::faces_per_cell)
1098  * , fe_support_on_face(GeometryInfo<dim>::faces_per_cell)
1099  * , exact_solution()
1100  * {
1101  * for (unsigned int face_no : GeometryInfo<dim>::face_indices())
1102  * for (unsigned int i = 0; i < fe_local.n_dofs_per_cell(); ++i)
1103  * {
1104  * if (fe_local.has_support_on_face(i, face_no))
1105  * fe_local_support_on_face[face_no].push_back(i);
1106  * }
1107  *
1108  * for (unsigned int face_no : GeometryInfo<dim>::face_indices())
1109  * for (unsigned int i = 0; i < fe.n_dofs_per_cell(); ++i)
1110  * {
1111  * if (fe.has_support_on_face(i, face_no))
1112  * fe_support_on_face[face_no].push_back(i);
1113  * }
1114  * }
1115  *
1116  * ScratchData(const ScratchData &sd)
1117  * : fe_values_local(sd.fe_values_local.get_fe(),
1118  * sd.fe_values_local.get_quadrature(),
1119  * sd.fe_values_local.get_update_flags())
1120  * , fe_face_values_local(sd.fe_face_values_local.get_fe(),
1121  * sd.fe_face_values_local.get_quadrature(),
1122  * sd.fe_face_values_local.get_update_flags())
1123  * , fe_face_values(sd.fe_face_values.get_fe(),
1124  * sd.fe_face_values.get_quadrature(),
1125  * sd.fe_face_values.get_update_flags())
1126  * , ll_matrix(sd.ll_matrix)
1127  * , lf_matrix(sd.lf_matrix)
1128  * , fl_matrix(sd.fl_matrix)
1129  * , tmp_matrix(sd.tmp_matrix)
1130  * , l_rhs(sd.l_rhs)
1131  * , tmp_rhs(sd.tmp_rhs)
1132  * , q_phi(sd.q_phi)
1133  * , q_phi_div(sd.q_phi_div)
1134  * , u_phi(sd.u_phi)
1135  * , u_phi_grad(sd.u_phi_grad)
1136  * , tr_phi(sd.tr_phi)
1137  * , trace_values(sd.trace_values)
1138  * , fe_local_support_on_face(sd.fe_local_support_on_face)
1139  * , fe_support_on_face(sd.fe_support_on_face)
1140  * , exact_solution()
1141  * {}
1142  * };
1143  *
1144  *
1145  *
1146  * @endcode
1147  *
1148  *
1149  * <a name="HDGPostProcessScratchData"></a>
1150  * <h4>HDG::PostProcessScratchData</h4>
1151  * @p PostProcessScratchData contains the data used by WorkStream
1152  * when post-processing the local solution @f$u^*@f$. It is similar, but much
1153  * simpler, than @p ScratchData.
1154  *
1155  * @code
1156  * template <int dim>
1157  * struct HDG<dim>::PostProcessScratchData
1158  * {
1159  * FEValues<dim> fe_values_local;
1160  * FEValues<dim> fe_values;
1161  *
1162  * std::vector<double> u_values;
1163  * std::vector<Tensor<1, dim>> u_gradients;
1165  *
1166  * Vector<double> cell_rhs;
1167  * Vector<double> cell_sol;
1168  *
1169  * PostProcessScratchData(const FiniteElement<dim> &fe,
1170  * const FiniteElement<dim> &fe_local,
1171  * const QGauss<dim> & quadrature_formula,
1172  * const UpdateFlags local_flags,
1173  * const UpdateFlags flags)
1174  * : fe_values_local(fe_local, quadrature_formula, local_flags)
1175  * , fe_values(fe, quadrature_formula, flags)
1176  * , u_values(quadrature_formula.size())
1177  * , u_gradients(quadrature_formula.size())
1178  * , cell_matrix(fe.n_dofs_per_cell(), fe.n_dofs_per_cell())
1179  * , cell_rhs(fe.n_dofs_per_cell())
1180  * , cell_sol(fe.n_dofs_per_cell())
1181  * {}
1182  *
1183  * PostProcessScratchData(const PostProcessScratchData &sd)
1184  * : fe_values_local(sd.fe_values_local.get_fe(),
1185  * sd.fe_values_local.get_quadrature(),
1186  * sd.fe_values_local.get_update_flags())
1187  * , fe_values(sd.fe_values.get_fe(),
1188  * sd.fe_values.get_quadrature(),
1189  * sd.fe_values.get_update_flags())
1190  * , u_values(sd.u_values)
1191  * , u_gradients(sd.u_gradients)
1192  * , cell_matrix(sd.cell_matrix)
1193  * , cell_rhs(sd.cell_rhs)
1194  * , cell_sol(sd.cell_sol)
1195  * {}
1196  * };
1197  *
1198  *
1199  *
1200  * @endcode
1201  *
1202  *
1203  * <a name="HDGassemble_system"></a>
1204  * <h4>HDG::assemble_system</h4>
1205  * The @p assemble_system function is similar to the one on @ref step_32 "step-32", where
1206  * the quadrature formula and the update flags are set up, and then
1207  * <code>WorkStream</code> is used to do the work in a multi-threaded
1208  * manner. The @p trace_reconstruct input parameter is used to decide
1209  * whether we are solving for the global skeleton solution (false) or the
1210  * local solution (true).
1211  *
1212 
1213  *
1214  * One thing worth noting for the multi-threaded execution of assembly is
1215  * the fact that the local computations in `assemble_system_one_cell()` call
1216  * into BLAS and LAPACK functions if those are available in deal.II. Thus,
1217  * the underlying BLAS/LAPACK library must support calls from multiple
1218  * threads at the same time. Most implementations do support this, but some
1219  * libraries need to be built in a specific way to avoid problems. For
1220  * example, OpenBLAS compiled without multithreading inside the BLAS/LAPACK
1221  * calls needs to built with a flag called `USE_LOCKING` set to true.
1222  *
1223  * @code
1224  * template <int dim>
1225  * void HDG<dim>::assemble_system(const bool trace_reconstruct)
1226  * {
1227  * const QGauss<dim> quadrature_formula(fe.degree + 1);
1228  * const QGauss<dim - 1> face_quadrature_formula(fe.degree + 1);
1229  *
1230  * const UpdateFlags local_flags(update_values | update_gradients |
1232  *
1233  * const UpdateFlags local_face_flags(update_values);
1234  *
1237  *
1238  * PerTaskData task_data(fe.n_dofs_per_cell(), trace_reconstruct);
1239  * ScratchData scratch(fe,
1240  * fe_local,
1241  * quadrature_formula,
1242  * face_quadrature_formula,
1243  * local_flags,
1244  * local_face_flags,
1245  * flags);
1246  *
1247  * WorkStream::run(dof_handler.begin_active(),
1248  * dof_handler.end(),
1249  * *this,
1250  * &HDG<dim>::assemble_system_one_cell,
1251  * &HDG<dim>::copy_local_to_global,
1252  * scratch,
1253  * task_data);
1254  * }
1255  *
1256  *
1257  *
1258  * @endcode
1259  *
1260  *
1261  * <a name="HDGassemble_system_one_cell"></a>
1262  * <h4>HDG::assemble_system_one_cell</h4>
1263  * The real work of the HDG program is done by @p assemble_system_one_cell.
1264  * Assembling the local matrices @f$A, B, C@f$ is done here, along with the
1265  * local contributions of the global matrix @f$D@f$.
1266  *
1267  * @code
1268  * template <int dim>
1269  * void HDG<dim>::assemble_system_one_cell(
1270  * const typename DoFHandler<dim>::active_cell_iterator &cell,
1271  * ScratchData & scratch,
1272  * PerTaskData & task_data)
1273  * {
1274  * @endcode
1275  *
1276  * Construct iterator for dof_handler_local for FEValues reinit function.
1277  *
1278  * @code
1280  * cell->level(),
1281  * cell->index(),
1282  * &dof_handler_local);
1283  *
1284  * const unsigned int n_q_points =
1285  * scratch.fe_values_local.get_quadrature().size();
1286  * const unsigned int n_face_q_points =
1287  * scratch.fe_face_values_local.get_quadrature().size();
1288  *
1289  * const unsigned int loc_dofs_per_cell =
1290  * scratch.fe_values_local.get_fe().n_dofs_per_cell();
1291  *
1292  * const FEValuesExtractors::Vector fluxes(0);
1293  * const FEValuesExtractors::Scalar scalar(dim);
1294  *
1295  * scratch.ll_matrix = 0;
1296  * scratch.l_rhs = 0;
1297  * if (!task_data.trace_reconstruct)
1298  * {
1299  * scratch.lf_matrix = 0;
1300  * scratch.fl_matrix = 0;
1301  * task_data.cell_matrix = 0;
1302  * task_data.cell_vector = 0;
1303  * }
1304  * scratch.fe_values_local.reinit(loc_cell);
1305  *
1306  * @endcode
1307  *
1308  * We first compute the cell-interior contribution to @p ll_matrix matrix
1309  * (referred to as matrix @f$A@f$ in the introduction) corresponding to
1310  * local-local coupling, as well as the local right-hand-side vector. We
1311  * store the values at each quadrature point for the basis functions, the
1312  * right-hand-side value, and the convection velocity, in order to have
1313  * quick access to these fields.
1314  *
1315  * @code
1316  * for (unsigned int q = 0; q < n_q_points; ++q)
1317  * {
1318  * const double rhs_value = scratch.right_hand_side.value(
1319  * scratch.fe_values_local.quadrature_point(q));
1320  * const Tensor<1, dim> convection = scratch.convection_velocity.value(
1321  * scratch.fe_values_local.quadrature_point(q));
1322  * const double JxW = scratch.fe_values_local.JxW(q);
1323  * for (unsigned int k = 0; k < loc_dofs_per_cell; ++k)
1324  * {
1325  * scratch.q_phi[k] = scratch.fe_values_local[fluxes].value(k, q);
1326  * scratch.q_phi_div[k] =
1327  * scratch.fe_values_local[fluxes].divergence(k, q);
1328  * scratch.u_phi[k] = scratch.fe_values_local[scalar].value(k, q);
1329  * scratch.u_phi_grad[k] =
1330  * scratch.fe_values_local[scalar].gradient(k, q);
1331  * }
1332  * for (unsigned int i = 0; i < loc_dofs_per_cell; ++i)
1333  * {
1334  * for (unsigned int j = 0; j < loc_dofs_per_cell; ++j)
1335  * scratch.ll_matrix(i, j) +=
1336  * (scratch.q_phi[i] * scratch.q_phi[j] -
1337  * scratch.q_phi_div[i] * scratch.u_phi[j] +
1338  * scratch.u_phi[i] * scratch.q_phi_div[j] -
1339  * (scratch.u_phi_grad[i] * convection) * scratch.u_phi[j]) *
1340  * JxW;
1341  * scratch.l_rhs(i) += scratch.u_phi[i] * rhs_value * JxW;
1342  * }
1343  * }
1344  *
1345  * @endcode
1346  *
1347  * Face terms are assembled on all faces of all elements. This is in
1348  * contrast to more traditional DG methods, where each face is only visited
1349  * once in the assembly procedure.
1350  *
1351  * @code
1352  * for (const auto face_no : cell->face_indices())
1353  * {
1354  * scratch.fe_face_values_local.reinit(loc_cell, face_no);
1355  * scratch.fe_face_values.reinit(cell, face_no);
1356  *
1357  * @endcode
1358  *
1359  * The already obtained @f$\hat{u}@f$ values are needed when solving for the
1360  * local variables.
1361  *
1362  * @code
1363  * if (task_data.trace_reconstruct)
1364  * scratch.fe_face_values.get_function_values(solution,
1365  * scratch.trace_values);
1366  *
1367  * for (unsigned int q = 0; q < n_face_q_points; ++q)
1368  * {
1369  * const double JxW = scratch.fe_face_values.JxW(q);
1370  * const Point<dim> quadrature_point =
1371  * scratch.fe_face_values.quadrature_point(q);
1372  * const Tensor<1, dim> normal =
1373  * scratch.fe_face_values.normal_vector(q);
1374  * const Tensor<1, dim> convection =
1375  * scratch.convection_velocity.value(quadrature_point);
1376  *
1377  * @endcode
1378  *
1379  * Here we compute the stabilization parameter discussed in the
1380  * introduction: since the diffusion is one and the diffusion
1381  * length scale is set to 1/5, it simply results in a contribution
1382  * of 5 for the diffusion part and the magnitude of convection
1383  * through the element boundary in a centered scheme for the
1384  * convection part.
1385  *
1386  * @code
1387  * const double tau_stab = (5. + std::abs(convection * normal));
1388  *
1389  * @endcode
1390  *
1391  * We store the non-zero flux and scalar values, making use of the
1392  * support_on_face information we created in @p ScratchData.
1393  *
1394  * @code
1395  * for (unsigned int k = 0;
1396  * k < scratch.fe_local_support_on_face[face_no].size();
1397  * ++k)
1398  * {
1399  * const unsigned int kk =
1400  * scratch.fe_local_support_on_face[face_no][k];
1401  * scratch.q_phi[k] =
1402  * scratch.fe_face_values_local[fluxes].value(kk, q);
1403  * scratch.u_phi[k] =
1404  * scratch.fe_face_values_local[scalar].value(kk, q);
1405  * }
1406  *
1407  * @endcode
1408  *
1409  * When @p trace_reconstruct=false, we are preparing to assemble the
1410  * system for the skeleton variable @f$\hat{u}@f$. If this is the case,
1411  * we must assemble all local matrices associated with the problem:
1412  * local-local, local-face, face-local, and face-face. The
1413  * face-face matrix is stored as @p TaskData::cell_matrix, so that
1414  * it can be assembled into the global system by @p
1415  * copy_local_to_global.
1416  *
1417  * @code
1418  * if (!task_data.trace_reconstruct)
1419  * {
1420  * for (unsigned int k = 0;
1421  * k < scratch.fe_support_on_face[face_no].size();
1422  * ++k)
1423  * scratch.tr_phi[k] = scratch.fe_face_values.shape_value(
1424  * scratch.fe_support_on_face[face_no][k], q);
1425  * for (unsigned int i = 0;
1426  * i < scratch.fe_local_support_on_face[face_no].size();
1427  * ++i)
1428  * for (unsigned int j = 0;
1429  * j < scratch.fe_support_on_face[face_no].size();
1430  * ++j)
1431  * {
1432  * const unsigned int ii =
1433  * scratch.fe_local_support_on_face[face_no][i];
1434  * const unsigned int jj =
1435  * scratch.fe_support_on_face[face_no][j];
1436  * scratch.lf_matrix(ii, jj) +=
1437  * ((scratch.q_phi[i] * normal +
1438  * (convection * normal - tau_stab) * scratch.u_phi[i]) *
1439  * scratch.tr_phi[j]) *
1440  * JxW;
1441  *
1442  * @endcode
1443  *
1444  * Note the sign of the face_no-local matrix. We negate
1445  * the sign during assembly here so that we can use the
1446  * FullMatrix::mmult with addition when computing the
1447  * Schur complement.
1448  *
1449  * @code
1450  * scratch.fl_matrix(jj, ii) -=
1451  * ((scratch.q_phi[i] * normal +
1452  * tau_stab * scratch.u_phi[i]) *
1453  * scratch.tr_phi[j]) *
1454  * JxW;
1455  * }
1456  *
1457  * for (unsigned int i = 0;
1458  * i < scratch.fe_support_on_face[face_no].size();
1459  * ++i)
1460  * for (unsigned int j = 0;
1461  * j < scratch.fe_support_on_face[face_no].size();
1462  * ++j)
1463  * {
1464  * const unsigned int ii =
1465  * scratch.fe_support_on_face[face_no][i];
1466  * const unsigned int jj =
1467  * scratch.fe_support_on_face[face_no][j];
1468  * task_data.cell_matrix(ii, jj) +=
1469  * ((convection * normal - tau_stab) * scratch.tr_phi[i] *
1470  * scratch.tr_phi[j]) *
1471  * JxW;
1472  * }
1473  *
1474  * if (cell->face(face_no)->at_boundary() &&
1475  * (cell->face(face_no)->boundary_id() == 1))
1476  * {
1477  * const double neumann_value =
1478  * -scratch.exact_solution.gradient(quadrature_point) *
1479  * normal +
1480  * convection * normal *
1481  * scratch.exact_solution.value(quadrature_point);
1482  * for (unsigned int i = 0;
1483  * i < scratch.fe_support_on_face[face_no].size();
1484  * ++i)
1485  * {
1486  * const unsigned int ii =
1487  * scratch.fe_support_on_face[face_no][i];
1488  * task_data.cell_vector(ii) +=
1489  * scratch.tr_phi[i] * neumann_value * JxW;
1490  * }
1491  * }
1492  * }
1493  *
1494  * @endcode
1495  *
1496  * This last term adds the contribution of the term @f$\left<w,\tau
1497  * u_h\right>_{\partial \mathcal T}@f$ to the local matrix. As opposed
1498  * to the face matrices above, we need it in both assembly stages.
1499  *
1500  * @code
1501  * for (unsigned int i = 0;
1502  * i < scratch.fe_local_support_on_face[face_no].size();
1503  * ++i)
1504  * for (unsigned int j = 0;
1505  * j < scratch.fe_local_support_on_face[face_no].size();
1506  * ++j)
1507  * {
1508  * const unsigned int ii =
1509  * scratch.fe_local_support_on_face[face_no][i];
1510  * const unsigned int jj =
1511  * scratch.fe_local_support_on_face[face_no][j];
1512  * scratch.ll_matrix(ii, jj) +=
1513  * tau_stab * scratch.u_phi[i] * scratch.u_phi[j] * JxW;
1514  * }
1515  *
1516  * @endcode
1517  *
1518  * When @p trace_reconstruct=true, we are solving for the local
1519  * solutions on an element by element basis. The local
1520  * right-hand-side is calculated by replacing the basis functions @p
1521  * tr_phi in the @p lf_matrix computation by the computed values @p
1522  * trace_values. Of course, the sign of the matrix is now minus
1523  * since we have moved everything to the other side of the equation.
1524  *
1525  * @code
1526  * if (task_data.trace_reconstruct)
1527  * for (unsigned int i = 0;
1528  * i < scratch.fe_local_support_on_face[face_no].size();
1529  * ++i)
1530  * {
1531  * const unsigned int ii =
1532  * scratch.fe_local_support_on_face[face_no][i];
1533  * scratch.l_rhs(ii) -=
1534  * (scratch.q_phi[i] * normal +
1535  * scratch.u_phi[i] * (convection * normal - tau_stab)) *
1536  * scratch.trace_values[q] * JxW;
1537  * }
1538  * }
1539  * }
1540  *
1541  * @endcode
1542  *
1543  * Once assembly of all of the local contributions is complete, we must
1544  * either: (1) assemble the global system, or (2) compute the local solution
1545  * values and save them. In either case, the first step is to invert the
1546  * local-local matrix.
1547  *
1548  * @code
1549  * scratch.ll_matrix.gauss_jordan();
1550  *
1551  * @endcode
1552  *
1553  * For (1), we compute the Schur complement and add it to the @p
1554  * cell_matrix, matrix @f$D@f$ in the introduction.
1555  *
1556  * @code
1557  * if (task_data.trace_reconstruct == false)
1558  * {
1559  * scratch.fl_matrix.mmult(scratch.tmp_matrix, scratch.ll_matrix);
1560  * scratch.tmp_matrix.vmult_add(task_data.cell_vector, scratch.l_rhs);
1561  * scratch.tmp_matrix.mmult(task_data.cell_matrix,
1562  * scratch.lf_matrix,
1563  * true);
1564  * cell->get_dof_indices(task_data.dof_indices);
1565  * }
1566  * @endcode
1567  *
1568  * For (2), we are simply solving (ll_matrix).(solution_local) = (l_rhs).
1569  * Hence, we multiply @p l_rhs by our already inverted local-local matrix
1570  * and store the result using the <code>set_dof_values</code> function.
1571  *
1572  * @code
1573  * else
1574  * {
1575  * scratch.ll_matrix.vmult(scratch.tmp_rhs, scratch.l_rhs);
1576  * loc_cell->set_dof_values(scratch.tmp_rhs, solution_local);
1577  * }
1578  * }
1579  *
1580  *
1581  *
1582  * @endcode
1583  *
1584  *
1585  * <a name="HDGcopy_local_to_global"></a>
1586  * <h4>HDG::copy_local_to_global</h4>
1587  * If we are in the first step of the solution, i.e. @p trace_reconstruct=false,
1588  * then we assemble the local matrices into the global system.
1589  *
1590  * @code
1591  * template <int dim>
1592  * void HDG<dim>::copy_local_to_global(const PerTaskData &data)
1593  * {
1594  * if (data.trace_reconstruct == false)
1595  * constraints.distribute_local_to_global(data.cell_matrix,
1596  * data.cell_vector,
1597  * data.dof_indices,
1598  * system_matrix,
1599  * system_rhs);
1600  * }
1601  *
1602  *
1603  *
1604  * @endcode
1605  *
1606  *
1607  * <a name="HDGsolve"></a>
1608  * <h4>HDG::solve</h4>
1609  * The skeleton solution is solved for by using a BiCGStab solver with
1610  * identity preconditioner.
1611  *
1612  * @code
1613  * template <int dim>
1614  * void HDG<dim>::solve()
1615  * {
1616  * SolverControl solver_control(system_matrix.m() * 10,
1617  * 1e-11 * system_rhs.l2_norm());
1618  * SolverBicgstab<Vector<double>> solver(solver_control);
1619  * solver.solve(system_matrix, solution, system_rhs, PreconditionIdentity());
1620  *
1621  * std::cout << " Number of BiCGStab iterations: "
1622  * << solver_control.last_step() << std::endl;
1623  *
1624  * system_matrix.clear();
1625  * sparsity_pattern.reinit(0, 0, 0, 1);
1626  *
1627  * constraints.distribute(solution);
1628  *
1629  * @endcode
1630  *
1631  * Once we have solved for the skeleton solution,
1632  * we can solve for the local solutions in an element-by-element
1633  * fashion. We do this by re-using the same @p assemble_system function
1634  * but switching @p trace_reconstruct to true.
1635  *
1636  * @code
1637  * assemble_system(true);
1638  * }
1639  *
1640  *
1641  *
1642  * @endcode
1643  *
1644  *
1645  * <a name="HDGpostprocess"></a>
1646  * <h4>HDG::postprocess</h4>
1647  *
1648 
1649  *
1650  * The postprocess method serves two purposes. First, we want to construct a
1651  * post-processed scalar variables in the element space of degree @f$p+1@f$ that
1652  * we hope will converge at order @f$p+2@f$. This is again an element-by-element
1653  * process and only involves the scalar solution as well as the gradient on
1654  * the local cell. To do this, we introduce the already defined scratch data
1655  * together with some update flags and run the work stream to do this in
1656  * parallel.
1657  *
1658 
1659  *
1660  * Secondly, we want to compute discretization errors just as we did in
1661  * @ref step_7 "step-7". The overall procedure is similar with calls to
1662  * VectorTools::integrate_difference. The difference is in how we compute
1663  * the errors for the scalar variable and the gradient variable. In @ref step_7 "step-7",
1664  * we did this by computing @p L2_norm or @p H1_seminorm
1665  * contributions. Here, we have a DoFHandler with these two contributions
1666  * computed and sorted by their vector component, <code>[0, dim)</code> for
1667  * the
1668  * gradient and @p dim for the scalar. To compute their value, we hence use
1669  * a ComponentSelectFunction with either of them, together with the @p
1670  * SolutionAndGradient class introduced above that contains the analytic
1671  * parts of either of them. Eventually, we also compute the L2-error of the
1672  * post-processed solution and add the results into the convergence table.
1673  *
1674  * @code
1675  * template <int dim>
1676  * void HDG<dim>::postprocess()
1677  * {
1678  * {
1679  * const QGauss<dim> quadrature_formula(fe_u_post.degree + 1);
1680  * const UpdateFlags local_flags(update_values);
1681  * const UpdateFlags flags(update_values | update_gradients |
1682  * update_JxW_values);
1683  *
1684  * PostProcessScratchData scratch(
1685  * fe_u_post, fe_local, quadrature_formula, local_flags, flags);
1686  *
1687  * WorkStream::run(
1688  * dof_handler_u_post.begin_active(),
1689  * dof_handler_u_post.end(),
1690  * [this](const typename DoFHandler<dim>::active_cell_iterator &cell,
1691  * PostProcessScratchData & scratch,
1692  * unsigned int & data) {
1693  * this->postprocess_one_cell(cell, scratch, data);
1694  * },
1695  * std::function<void(const unsigned int &)>(),
1696  * scratch,
1697  * 0U);
1698  * }
1699  *
1700  * Vector<float> difference_per_cell(triangulation.n_active_cells());
1701  *
1702  * ComponentSelectFunction<dim> value_select(dim, dim + 1);
1703  * VectorTools::integrate_difference(dof_handler_local,
1704  * solution_local,
1705  * SolutionAndGradient<dim>(),
1706  * difference_per_cell,
1707  * QGauss<dim>(fe.degree + 2),
1709  * &value_select);
1710  * const double L2_error =
1712  * difference_per_cell,
1714  *
1715  * ComponentSelectFunction<dim> gradient_select(
1716  * std::pair<unsigned int, unsigned int>(0, dim), dim + 1);
1717  * VectorTools::integrate_difference(dof_handler_local,
1718  * solution_local,
1719  * SolutionAndGradient<dim>(),
1720  * difference_per_cell,
1721  * QGauss<dim>(fe.degree + 2),
1723  * &gradient_select);
1724  * const double grad_error =
1726  * difference_per_cell,
1728  *
1729  * VectorTools::integrate_difference(dof_handler_u_post,
1730  * solution_u_post,
1731  * Solution<dim>(),
1732  * difference_per_cell,
1733  * QGauss<dim>(fe.degree + 3),
1735  * const double post_error =
1737  * difference_per_cell,
1739  *
1740  * convergence_table.add_value("cells", triangulation.n_active_cells());
1741  * convergence_table.add_value("dofs", dof_handler.n_dofs());
1742  *
1743  * convergence_table.add_value("val L2", L2_error);
1744  * convergence_table.set_scientific("val L2", true);
1745  * convergence_table.set_precision("val L2", 3);
1746  *
1747  * convergence_table.add_value("grad L2", grad_error);
1748  * convergence_table.set_scientific("grad L2", true);
1749  * convergence_table.set_precision("grad L2", 3);
1750  *
1751  * convergence_table.add_value("val L2-post", post_error);
1752  * convergence_table.set_scientific("val L2-post", true);
1753  * convergence_table.set_precision("val L2-post", 3);
1754  * }
1755  *
1756  *
1757  *
1758  * @endcode
1759  *
1760  *
1761  * <a name="HDGpostprocess_one_cell"></a>
1762  * <h4>HDG::postprocess_one_cell</h4>
1763  *
1764 
1765  *
1766  * This is the actual work done for the postprocessing. According to the
1767  * discussion in the introduction, we need to set up a system that projects
1768  * the gradient part of the DG solution onto the gradient of the
1769  * post-processed variable. Moreover, we need to set the average of the new
1770  * post-processed variable to equal the average of the scalar DG solution
1771  * on the cell.
1772  *
1773 
1774  *
1775  * More technically speaking, the projection of the gradient is a system
1776  * that would potentially fills our @p dofs_per_cell times @p dofs_per_cell
1777  * matrix but is singular (the sum of all rows would be zero because the
1778  * constant function has zero gradient). Therefore, we take one row away and
1779  * use it for imposing the average of the scalar value. We pick the first
1780  * row for the scalar part, even though we could pick any row for @f$\mathcal
1781  * Q_{-p}@f$ elements. However, had we used FE_DGP elements instead, the first
1782  * row would correspond to the constant part already and deleting e.g. the
1783  * last row would give us a singular system. This way, our program can also
1784  * be used for those elements.
1785  *
1786  * @code
1787  * template <int dim>
1788  * void HDG<dim>::postprocess_one_cell(
1789  * const typename DoFHandler<dim>::active_cell_iterator &cell,
1790  * PostProcessScratchData & scratch,
1791  * unsigned int &)
1792  * {
1794  * cell->level(),
1795  * cell->index(),
1796  * &dof_handler_local);
1797  *
1798  * scratch.fe_values_local.reinit(loc_cell);
1799  * scratch.fe_values.reinit(cell);
1800  *
1801  * FEValuesExtractors::Vector fluxes(0);
1802  * FEValuesExtractors::Scalar scalar(dim);
1803  *
1804  * const unsigned int n_q_points = scratch.fe_values.get_quadrature().size();
1805  * const unsigned int dofs_per_cell = scratch.fe_values.dofs_per_cell;
1806  *
1807  * scratch.fe_values_local[scalar].get_function_values(solution_local,
1808  * scratch.u_values);
1809  * scratch.fe_values_local[fluxes].get_function_values(solution_local,
1810  * scratch.u_gradients);
1811  *
1812  * double sum = 0;
1813  * for (unsigned int i = 1; i < dofs_per_cell; ++i)
1814  * {
1815  * for (unsigned int j = 0; j < dofs_per_cell; ++j)
1816  * {
1817  * sum = 0;
1818  * for (unsigned int q = 0; q < n_q_points; ++q)
1819  * sum += (scratch.fe_values.shape_grad(i, q) *
1820  * scratch.fe_values.shape_grad(j, q)) *
1821  * scratch.fe_values.JxW(q);
1822  * scratch.cell_matrix(i, j) = sum;
1823  * }
1824  *
1825  * sum = 0;
1826  * for (unsigned int q = 0; q < n_q_points; ++q)
1827  * sum -= (scratch.fe_values.shape_grad(i, q) * scratch.u_gradients[q]) *
1828  * scratch.fe_values.JxW(q);
1829  * scratch.cell_rhs(i) = sum;
1830  * }
1831  * for (unsigned int j = 0; j < dofs_per_cell; ++j)
1832  * {
1833  * sum = 0;
1834  * for (unsigned int q = 0; q < n_q_points; ++q)
1835  * sum += scratch.fe_values.shape_value(j, q) * scratch.fe_values.JxW(q);
1836  * scratch.cell_matrix(0, j) = sum;
1837  * }
1838  * {
1839  * sum = 0;
1840  * for (unsigned int q = 0; q < n_q_points; ++q)
1841  * sum += scratch.u_values[q] * scratch.fe_values.JxW(q);
1842  * scratch.cell_rhs(0) = sum;
1843  * }
1844  *
1845  * @endcode
1846  *
1847  * Having assembled all terms, we can again go on and solve the linear
1848  * system. We invert the matrix and then multiply the inverse by the
1849  * right hand side. An alternative (and more numerically stable) method
1850  * would have been to only factorize the matrix and apply the factorization.
1851  *
1852  * @code
1853  * scratch.cell_matrix.gauss_jordan();
1854  * scratch.cell_matrix.vmult(scratch.cell_sol, scratch.cell_rhs);
1855  * cell->distribute_local_to_global(scratch.cell_sol, solution_u_post);
1856  * }
1857  *
1858  *
1859  *
1860  * @endcode
1861  *
1862  *
1863  * <a name="HDGoutput_results"></a>
1864  * <h4>HDG::output_results</h4>
1865  * We have 3 sets of results that we would like to output: the local
1866  * solution, the post-processed local solution, and the skeleton solution. The
1867  * former 2 both 'live' on element volumes, whereas the latter lives on
1868  * codimension-1 surfaces
1869  * of the triangulation. Our @p output_results function writes all local solutions
1870  * to the same vtk file, even though they correspond to different
1871  * DoFHandler objects. The graphical output for the skeleton
1872  * variable is done through use of the DataOutFaces class.
1873  *
1874  * @code
1875  * template <int dim>
1876  * void HDG<dim>::output_results(const unsigned int cycle)
1877  * {
1878  * std::string filename;
1879  * switch (refinement_mode)
1880  * {
1881  * case global_refinement:
1882  * filename = "solution-global";
1883  * break;
1884  * case adaptive_refinement:
1885  * filename = "solution-adaptive";
1886  * break;
1887  * default:
1888  * Assert(false, ExcNotImplemented());
1889  * }
1890  *
1891  * std::string face_out(filename);
1892  * face_out += "-face";
1893  *
1894  * filename += "-q" + Utilities::int_to_string(fe.degree, 1);
1895  * filename += "-" + Utilities::int_to_string(cycle, 2);
1896  * filename += ".vtk";
1897  * std::ofstream output(filename);
1898  *
1899  * DataOut<dim> data_out;
1900  *
1901  * @endcode
1902  *
1903  * We first define the names and types of the local solution,
1904  * and add the data to @p data_out.
1905  *
1906  * @code
1907  * std::vector<std::string> names(dim, "gradient");
1908  * names.emplace_back("solution");
1909  * std::vector<DataComponentInterpretation::DataComponentInterpretation>
1910  * component_interpretation(
1912  * component_interpretation[dim] =
1914  * data_out.add_data_vector(dof_handler_local,
1915  * solution_local,
1916  * names,
1917  * component_interpretation);
1918  *
1919  * @endcode
1920  *
1921  * The second data item we add is the post-processed solution.
1922  * In this case, it is a single scalar variable belonging to
1923  * a different DoFHandler.
1924  *
1925  * @code
1926  * std::vector<std::string> post_name(1, "u_post");
1927  * std::vector<DataComponentInterpretation::DataComponentInterpretation>
1929  * data_out.add_data_vector(dof_handler_u_post,
1930  * solution_u_post,
1931  * post_name,
1932  * post_comp_type);
1933  *
1934  * data_out.build_patches(fe.degree);
1935  * data_out.write_vtk(output);
1936  *
1937  * face_out += "-q" + Utilities::int_to_string(fe.degree, 1);
1938  * face_out += "-" + Utilities::int_to_string(cycle, 2);
1939  * face_out += ".vtk";
1940  * std::ofstream face_output(face_out);
1941  *
1942  * @endcode
1943  *
1944  * The <code>DataOutFaces</code> class works analogously to the
1945  * <code>DataOut</code> class when we have a <code>DoFHandler</code> that
1946  * defines the solution on the skeleton of the triangulation. We treat it
1947  * as such here, and the code is similar to that above.
1948  *
1949  * @code
1950  * DataOutFaces<dim> data_out_face(false);
1951  * std::vector<std::string> face_name(1, "u_hat");
1952  * std::vector<DataComponentInterpretation::DataComponentInterpretation>
1953  * face_component_type(1, DataComponentInterpretation::component_is_scalar);
1954  *
1955  * data_out_face.add_data_vector(dof_handler,
1956  * solution,
1957  * face_name,
1958  * face_component_type);
1959  *
1960  * data_out_face.build_patches(fe.degree);
1961  * data_out_face.write_vtk(face_output);
1962  * }
1963  *
1964  * @endcode
1965  *
1966  *
1967  * <a name="HDGrefine_grid"></a>
1968  * <h4>HDG::refine_grid</h4>
1969  *
1970 
1971  *
1972  * We implement two different refinement cases for HDG, just as in
1973  * <code>@ref step_7 "step-7"</code>: adaptive_refinement and global_refinement. The
1974  * global_refinement option recreates the entire triangulation every
1975  * time. This is because we want to use a finer sequence of meshes than what
1976  * we would get with one refinement step, namely 2, 3, 4, 6, 8, 12, 16, ...
1977  * elements per direction.
1978  *
1979 
1980  *
1981  * The adaptive_refinement mode uses the <code>KellyErrorEstimator</code> to
1982  * give a decent indication of the non-regular regions in the scalar local
1983  * solutions.
1984  *
1985  * @code
1986  * template <int dim>
1987  * void HDG<dim>::refine_grid(const unsigned int cycle)
1988  * {
1989  * if (cycle == 0)
1990  * {
1992  * triangulation.refine_global(3 - dim);
1993  * }
1994  * else
1995  * switch (refinement_mode)
1996  * {
1997  * case global_refinement:
1998  * {
1999  * triangulation.clear();
2001  * 2 + (cycle % 2),
2002  * -1,
2003  * 1);
2004  * triangulation.refine_global(3 - dim + cycle / 2);
2005  * break;
2006  * }
2007  *
2008  * case adaptive_refinement:
2009  * {
2010  * Vector<float> estimated_error_per_cell(
2011  * triangulation.n_active_cells());
2012  *
2013  * FEValuesExtractors::Scalar scalar(dim);
2014  * std::map<types::boundary_id, const Function<dim> *>
2015  * neumann_boundary;
2016  * KellyErrorEstimator<dim>::estimate(dof_handler_local,
2017  * QGauss<dim - 1>(fe.degree + 1),
2018  * neumann_boundary,
2019  * solution_local,
2020  * estimated_error_per_cell,
2021  * fe_local.component_mask(
2022  * scalar));
2023  *
2025  * triangulation, estimated_error_per_cell, 0.3, 0.);
2026  *
2027  * triangulation.execute_coarsening_and_refinement();
2028  *
2029  * break;
2030  * }
2031  *
2032  * default:
2033  * {
2034  * Assert(false, ExcNotImplemented());
2035  * }
2036  * }
2037  *
2038  * @endcode
2039  *
2040  * Just as in @ref step_7 "step-7", we set the boundary indicator of two of the faces to 1
2041  * where we want to specify Neumann boundary conditions instead of Dirichlet
2042  * conditions. Since we re-create the triangulation every time for global
2043  * refinement, the flags are set in every refinement step, not just at the
2044  * beginning.
2045  *
2046  * @code
2047  * for (const auto &cell : triangulation.cell_iterators())
2048  * for (const auto &face : cell->face_iterators())
2049  * if (face->at_boundary())
2050  * if ((std::fabs(face->center()(0) - (-1)) < 1e-12) ||
2051  * (std::fabs(face->center()(1) - (-1)) < 1e-12))
2052  * face->set_boundary_id(1);
2053  * }
2054  *
2055  * @endcode
2056  *
2057  *
2058  * <a name="HDGrun"></a>
2059  * <h4>HDG::run</h4>
2060  * The functionality here is basically the same as <code>@ref step_7 "step-7"</code>.
2061  * We loop over 10 cycles, refining the grid on each one. At the end,
2062  * convergence tables are created.
2063  *
2064  * @code
2065  * template <int dim>
2066  * void HDG<dim>::run()
2067  * {
2068  * for (unsigned int cycle = 0; cycle < 10; ++cycle)
2069  * {
2070  * std::cout << "Cycle " << cycle << ':' << std::endl;
2071  *
2072  * refine_grid(cycle);
2073  * setup_system();
2074  * assemble_system(false);
2075  * solve();
2076  * postprocess();
2077  * output_results(cycle);
2078  * }
2079  *
2080  * @endcode
2081  *
2082  * There is one minor change for the convergence table compared to @ref step_7 "step-7":
2083  * Since we did not refine our mesh by a factor two in each cycle (but
2084  * rather used the sequence 2, 3, 4, 6, 8, 12, ...), we need to tell the
2085  * convergence rate evaluation about this. We do this by setting the
2086  * number of cells as a reference column and additionally specifying the
2087  * dimension of the problem, which gives the necessary information for the
2088  * relation between number of cells and mesh size.
2089  *
2090  * @code
2091  * if (refinement_mode == global_refinement)
2092  * {
2093  * convergence_table.evaluate_convergence_rates(
2094  * "val L2", "cells", ConvergenceTable::reduction_rate_log2, dim);
2095  * convergence_table.evaluate_convergence_rates(
2096  * "grad L2", "cells", ConvergenceTable::reduction_rate_log2, dim);
2097  * convergence_table.evaluate_convergence_rates(
2098  * "val L2-post", "cells", ConvergenceTable::reduction_rate_log2, dim);
2099  * }
2100  * convergence_table.write_text(std::cout);
2101  * }
2102  *
2103  * } // end of namespace Step51
2104  *
2105  *
2106  *
2107  * int main()
2108  * {
2109  * const unsigned int dim = 2;
2110  *
2111  * try
2112  * {
2113  * @endcode
2114  *
2115  * Now for the three calls to the main class in complete analogy to
2116  * @ref step_7 "step-7".
2117  *
2118  * @code
2119  * {
2120  * std::cout << "Solving with Q1 elements, adaptive refinement"
2121  * << std::endl
2122  * << "============================================="
2123  * << std::endl
2124  * << std::endl;
2125  *
2126  * Step51::HDG<dim> hdg_problem(1, Step51::HDG<dim>::adaptive_refinement);
2127  * hdg_problem.run();
2128  *
2129  * std::cout << std::endl;
2130  * }
2131  *
2132  * {
2133  * std::cout << "Solving with Q1 elements, global refinement" << std::endl
2134  * << "===========================================" << std::endl
2135  * << std::endl;
2136  *
2137  * Step51::HDG<dim> hdg_problem(1, Step51::HDG<dim>::global_refinement);
2138  * hdg_problem.run();
2139  *
2140  * std::cout << std::endl;
2141  * }
2142  *
2143  * {
2144  * std::cout << "Solving with Q3 elements, global refinement" << std::endl
2145  * << "===========================================" << std::endl
2146  * << std::endl;
2147  *
2148  * Step51::HDG<dim> hdg_problem(3, Step51::HDG<dim>::global_refinement);
2149  * hdg_problem.run();
2150  *
2151  * std::cout << std::endl;
2152  * }
2153  * }
2154  * catch (std::exception &exc)
2155  * {
2156  * std::cerr << std::endl
2157  * << std::endl
2158  * << "----------------------------------------------------"
2159  * << std::endl;
2160  * std::cerr << "Exception on processing: " << std::endl
2161  * << exc.what() << std::endl
2162  * << "Aborting!" << std::endl
2163  * << "----------------------------------------------------"
2164  * << std::endl;
2165  * return 1;
2166  * }
2167  * catch (...)
2168  * {
2169  * std::cerr << std::endl
2170  * << std::endl
2171  * << "----------------------------------------------------"
2172  * << std::endl;
2173  * std::cerr << "Unknown exception!" << std::endl
2174  * << "Aborting!" << std::endl
2175  * << "----------------------------------------------------"
2176  * << std::endl;
2177  * return 1;
2178  * }
2179  *
2180  * return 0;
2181  * }
2182  * @endcode
2183 <a name="Results"></a><h1>Results</h1>
2184 
2185 
2186 <a name="Programoutput"></a><h3>Program output</h3>
2187 
2188 
2189 We first have a look at the output generated by the program when run in 2D. In
2190 the four images below, we show the solution for polynomial degree @f$p=1@f$
2191 and cycles 2, 3, 4, and 8 of the program. In the plots, we overlay the data
2192 generated from the internal data (DG part) with the skeleton part (@f$\hat{u}@f$)
2193 into the same plot. We had to generate two different data sets because cells
2194 and faces represent different geometric entities, the combination of which (in
2195 the same file) is not supported in the VTK output of deal.II.
2196 
2197 The images show the distinctive features of HDG: The cell solution (colored
2198 surfaces) is discontinuous between the cells. The solution on the skeleton
2199 variable sits on the faces and ties together the local parts. The skeleton
2200 solution is not continuous on the vertices where the faces meet, even though
2201 its values are quite close along lines in the same coordinate direction. The
2202 skeleton solution can be interpreted as a rubber spring between the two sides
2203 that balances the jumps in the solution (or rather, the flux @f$\kappa \nabla u
2204 + \mathbf{c} u@f$). From the picture at the top left, it is clear that
2205 the bulk solution frequently over- and undershoots and that the
2206 skeleton variable in indeed a better approximation to the exact
2207 solution; this explains why we can get a better solution using a
2208 postprocessing step.
2209 
2210 As the mesh is refined, the jumps between the cells get
2211 small (we represent a smooth solution), and the skeleton solution approaches
2212 the interior parts. For cycle 8, there is no visible difference in the two
2213 variables. We also see how boundary conditions are implemented weakly and that
2214 the interior variables do not exactly satisfy boundary conditions. On the
2215 lower and left boundaries, we set Neumann boundary conditions, whereas we set
2216 Dirichlet conditions on the right and top boundaries.
2217 
2218 <table align="center">
2219  <tr>
2220  <td><img src="https://www.dealii.org/images/steps/developer/step-51.sol_2.png" alt=""></td>
2221  <td><img src="https://www.dealii.org/images/steps/developer/step-51.sol_3.png" alt=""></td>
2222  </tr>
2223  <tr>
2224  <td><img src="https://www.dealii.org/images/steps/developer/step-51.sol_4.png" alt=""></td>
2225  <td><img src="https://www.dealii.org/images/steps/developer/step-51.sol_8.png" alt=""></td>
2226  </tr>
2227 </table>
2228 
2229 Next, we have a look at the post-processed solution, again at cycles 2, 3, 4,
2230 and 8. This is a discontinuous solution that is locally described by second
2231 order polynomials. While the solution does not look very good on the mesh of
2232 cycle two, it looks much better for cycles three and four. As shown by the
2233 convergence table below, we find that is also converges more quickly to the
2234 analytical solution.
2235 
2236 <table align="center">
2237  <tr>
2238  <td><img src="https://www.dealii.org/images/steps/developer/step-51.post_2.png" alt=""></td>
2239  <td><img src="https://www.dealii.org/images/steps/developer/step-51.post_3.png" alt=""></td>
2240  </tr>
2241  <tr>
2242  <td><img src="https://www.dealii.org/images/steps/developer/step-51.post_4.png" alt=""></td>
2243  <td><img src="https://www.dealii.org/images/steps/developer/step-51.post_8.png" alt=""></td>
2244  </tr>
2245 </table>
2246 
2247 Finally, we look at the solution for @f$p=3@f$ at cycle 2. Despite the coarse
2248 mesh with only 64 cells, the post-processed solution is similar in quality
2249 to the linear solution (not post-processed) at cycle 8 with 4,096
2250 cells. This clearly shows the superiority of high order methods for smooth
2251 solutions.
2252 
2253 <table align="center">
2254  <tr>
2255  <td><img src="https://www.dealii.org/images/steps/developer/step-51.sol_q3_2.png" alt=""></td>
2256  <td><img src="https://www.dealii.org/images/steps/developer/step-51.post_q3_2.png" alt=""></td>
2257  </tr>
2258 </table>
2259 
2260 <a name="Convergencetables"></a><h4>Convergence tables</h4>
2261 
2262 
2263 When the program is run, it also outputs information about the respective
2264 steps and convergence tables with errors in the various components in the
2265 end. In 2D, the convergence tables look the following:
2266 
2267 @code
2268 Q1 elements, adaptive refinement:
2269 cells dofs val L2 grad L2 val L2-post
2270  16 80 1.804e+01 2.207e+01 1.798e+01
2271  31 170 9.874e+00 1.322e+01 9.798e+00
2272  61 314 7.452e-01 3.793e+00 4.891e-01
2273  121 634 3.240e-01 1.511e+00 2.616e-01
2274  238 1198 8.585e-02 8.212e-01 1.808e-02
2275  454 2290 4.802e-02 5.178e-01 2.195e-02
2276  898 4378 2.561e-02 2.947e-01 4.318e-03
2277  1720 7864 1.306e-02 1.664e-01 2.978e-03
2278  3271 14638 7.025e-03 9.815e-02 1.075e-03
2279  6217 27214 4.119e-03 6.407e-02 9.975e-04
2280 
2281 Q1 elements, global refinement:
2282 cells dofs val L2 grad L2 val L2-post
2283  16 80 1.804e+01 - 2.207e+01 - 1.798e+01 -
2284  36 168 6.125e+00 2.66 9.472e+00 2.09 6.084e+00 2.67
2285  64 288 9.785e-01 6.38 4.260e+00 2.78 7.102e-01 7.47
2286  144 624 2.730e-01 3.15 1.866e+00 2.04 6.115e-02 6.05
2287  256 1088 1.493e-01 2.10 1.046e+00 2.01 2.880e-02 2.62
2288  576 2400 6.965e-02 1.88 4.846e-01 1.90 9.204e-03 2.81
2289  1024 4224 4.018e-02 1.91 2.784e-01 1.93 4.027e-03 2.87
2290  2304 9408 1.831e-02 1.94 1.264e-01 1.95 1.236e-03 2.91
2291  4096 16640 1.043e-02 1.96 7.185e-02 1.96 5.306e-04 2.94
2292  9216 37248 4.690e-03 1.97 3.228e-02 1.97 1.599e-04 2.96
2293 
2294 Q3 elements, global refinement:
2295 cells dofs val L2 grad L2 val L2-post
2296  16 160 3.613e-01 - 1.891e+00 - 3.020e-01 -
2297  36 336 6.411e-02 4.26 5.081e-01 3.24 3.238e-02 5.51
2298  64 576 3.480e-02 2.12 2.533e-01 2.42 5.277e-03 6.31
2299  144 1248 8.297e-03 3.54 5.924e-02 3.58 6.330e-04 5.23
2300  256 2176 2.254e-03 4.53 1.636e-02 4.47 1.403e-04 5.24
2301  576 4800 4.558e-04 3.94 3.277e-03 3.96 1.844e-05 5.01
2302  1024 8448 1.471e-04 3.93 1.052e-03 3.95 4.378e-06 5.00
2303  2304 18816 2.956e-05 3.96 2.104e-04 3.97 5.750e-07 5.01
2304  4096 33280 9.428e-06 3.97 6.697e-05 3.98 1.362e-07 5.01
2305  9216 74496 1.876e-06 3.98 1.330e-05 3.99 1.788e-08 5.01
2306 @endcode
2307 
2308 
2309 One can see the error reduction upon grid refinement, and for the cases where
2310 global refinement was performed, also the convergence rates. The quadratic
2311 convergence rates of Q1 elements in the @f$L_2@f$ norm for both the scalar
2312 variable and the gradient variable is apparent, as is the cubic rate for the
2313 postprocessed scalar variable in the @f$L_2@f$ norm. Note this distinctive
2314 feature of an HDG solution. In typical continuous finite elements, the
2315 gradient of the solution of order @f$p@f$ converges at rate @f$p@f$ only, as
2316 opposed to @f$p+1@f$ for the actual solution. Even though superconvergence
2317 results for finite elements are also available (e.g. superconvergent patch
2318 recovery first introduced by Zienkiewicz and Zhu), these are typically limited
2319 to structured meshes and other special cases. For Q3 HDG variables, the scalar
2320 variable and gradient converge at fourth order and the postprocessed scalar
2321 variable at fifth order.
2322 
2323 The same convergence rates are observed in 3d.
2324 @code
2325 Q1 elements, adaptive refinement:
2326 cells dofs val L2 grad L2 val L2-post
2327  8 144 7.122e+00 1.941e+01 6.102e+00
2328  29 500 3.309e+00 1.023e+01 2.145e+00
2329  113 1792 2.204e+00 1.023e+01 1.912e+00
2330  379 5732 6.085e-01 5.008e+00 2.233e-01
2331  1317 19412 1.543e-01 1.464e+00 4.196e-02
2332  4579 64768 5.058e-02 5.611e-01 9.521e-03
2333  14596 199552 2.129e-02 3.122e-01 4.569e-03
2334  46180 611400 1.033e-02 1.622e-01 1.684e-03
2335 144859 1864212 5.007e-03 8.371e-02 7.364e-04
2336 451060 5684508 2.518e-03 4.562e-02 3.070e-04
2337 
2338 Q1 elements, global refinement:
2339 cells dofs val L2 grad L2 val L2-post
2340  8 144 7.122e+00 - 1.941e+01 - 6.102e+00 -
2341  27 432 5.491e+00 0.64 2.184e+01 -0.29 4.448e+00 0.78
2342  64 960 3.646e+00 1.42 1.299e+01 1.81 3.306e+00 1.03
2343  216 3024 1.595e+00 2.04 8.550e+00 1.03 1.441e+00 2.05
2344  512 6912 6.922e-01 2.90 5.306e+00 1.66 2.511e-01 6.07
2345  1728 22464 2.915e-01 2.13 2.490e+00 1.87 8.588e-02 2.65
2346  4096 52224 1.684e-01 1.91 1.453e+00 1.87 4.055e-02 2.61
2347  13824 172800 7.972e-02 1.84 6.861e-01 1.85 1.335e-02 2.74
2348  32768 405504 4.637e-02 1.88 3.984e-01 1.89 5.932e-03 2.82
2349 110592 1354752 2.133e-02 1.92 1.830e-01 1.92 1.851e-03 2.87
2350 
2351 Q3 elements, global refinement:
2352 cells dofs val L2 grad L2 val L2-post
2353  8 576 5.670e+00 - 1.868e+01 - 5.462e+00 -
2354  27 1728 1.048e+00 4.16 6.988e+00 2.42 8.011e-01 4.73
2355  64 3840 2.831e-01 4.55 2.710e+00 3.29 1.363e-01 6.16
2356  216 12096 7.883e-02 3.15 7.721e-01 3.10 2.158e-02 4.55
2357  512 27648 3.642e-02 2.68 3.305e-01 2.95 5.231e-03 4.93
2358  1728 89856 8.546e-03 3.58 7.581e-02 3.63 7.640e-04 4.74
2359  4096 208896 2.598e-03 4.14 2.313e-02 4.13 1.783e-04 5.06
2360  13824 691200 5.314e-04 3.91 4.697e-03 3.93 2.355e-05 4.99
2361  32768 1622016 1.723e-04 3.91 1.517e-03 3.93 5.602e-06 4.99
2362 110592 5419008 3.482e-05 3.94 3.055e-04 3.95 7.374e-07 5.00
2363 @endcode
2364 
2365 <a name="Comparisonwithcontinuousfiniteelements"></a><h3>Comparison with continuous finite elements</h3>
2366 
2367 
2368 <a name="Resultsfor2D"></a><h4>Results for 2D</h4>
2369 
2370 
2371 The convergence tables verify the expected convergence rates stated in the
2372 introduction. Now, we want to show a quick comparison of the computational
2373 efficiency of the HDG method compared to a usual finite element (continuous
2374 Galkerin) method on the problem of this tutorial. Of course, stability aspects
2375 of the HDG method compared to continuous finite elements for
2376 transport-dominated problems are also important in practice, which is an
2377 aspect not seen on a problem with smooth analytic solution. In the picture
2378 below, we compare the @f$L_2@f$ error as a function of the number of degrees of
2379 freedom (left) and of the computing time spent in the linear solver (right)
2380 for two space dimensions of continuous finite elements (CG) and the hybridized
2381 discontinuous Galerkin method presented in this tutorial. As opposed to the
2382 tutorial where we only use unpreconditioned BiCGStab, the times shown in the
2383 figures below use the Trilinos algebraic multigrid preconditioner in
2384 TrilinosWrappers::PreconditionAMG. For the HDG part, a wrapper around
2385 ChunkSparseMatrix for the trace variable has been used in order to utilize the
2386 block structure in the matrix on the finest level.
2387 
2388 <table align="center">
2389  <tr>
2390  <td><img src="https://www.dealii.org/images/steps/developer/step-51.2d_plain.png" width="400" alt=""></td>
2391  <td><img src="https://www.dealii.org/images/steps/developer/step-51.2dt_plain.png" width="400" alt=""></td>
2392  </tr>
2393 </table>
2394 
2395 The results in the graphs show that the HDG method is slower than continuous
2396 finite elements at @f$p=1@f$, about equally fast for cubic elements and
2397 faster for sixth order elements. However, we have seen above that the HDG
2398 method actually produces solutions which are more accurate than what is
2399 represented in the original variables. Therefore, in the next two plots below
2400 we instead display the error of the post-processed solution for HDG (denoted
2401 by @f$p=1^*@f$ for example). We now see a clear advantage of HDG for the same
2402 amount of work for both @f$p=3@f$ and @f$p=6@f$, and about the same quality
2403 for @f$p=1@f$.
2404 
2405 <table align="center">
2406  <tr>
2407  <td><img src="https://www.dealii.org/images/steps/developer/step-51.2d_post.png" width="400" alt=""></td>
2408  <td><img src="https://www.dealii.org/images/steps/developer/step-51.2dt_post.png" width="400" alt=""></td>
2409  </tr>
2410 </table>
2411 
2412 Since the HDG method actually produces results converging as
2413 @f$h^{p+2}@f$, we should compare it to a continuous Galerkin
2414 solution with the same asymptotic convergence behavior, i.e., FE_Q with degree
2415 @f$p+1@f$. If we do this, we get the convergence curves below. We see that
2416 CG with second order polynomials is again clearly better than HDG with
2417 linears. However, the advantage of HDG for higher orders remains.
2418 
2419 <table align="center">
2420  <tr>
2421  <td><img src="https://www.dealii.org/images/steps/developer/step-51.2d_postb.png" width="400" alt=""></td>
2422  <td><img src="https://www.dealii.org/images/steps/developer/step-51.2dt_postb.png" width="400" alt=""></td>
2423  </tr>
2424 </table>
2425 
2426 The results are in line with properties of DG methods in general: Best
2427 performance is typically not achieved for linear elements, but rather at
2428 somewhat higher order, usually around @f$p=3@f$. This is because of a
2429 volume-to-surface effect for discontinuous solutions with too much of the
2430 solution living on the surfaces and hence duplicating work when the elements
2431 are linear. Put in other words, DG methods are often most efficient when used
2432 at relatively high order, despite their focus on a discontinuous (and hence,
2433 seemingly low accurate) representation of solutions.
2434 
2435 <a name="Resultsfor3D"></a><h4>Results for 3D</h4>
2436 
2437 
2438 We now show the same figures in 3D: The first row shows the number of degrees
2439 of freedom and computing time versus the @f$L_2@f$ error in the scalar variable
2440 @f$u@f$ for CG and HDG at order @f$p@f$, the second row shows the
2441 post-processed HDG solution instead of the original one, and the third row
2442 compares the post-processed HDG solution with CG at order @f$p+1@f$. In 3D,
2443 the volume-to-surface effect makes the cost of HDG somewhat higher and the CG
2444 solution is clearly better than HDG for linears by any metric. For cubics, HDG
2445 and CG are of similar quality, whereas HDG is again more efficient for sixth
2446 order polynomials. One can alternatively also use the combination of FE_DGP
2447 and FE_FaceP instead of (FE_DGQ, FE_FaceQ), which do not use tensor product
2448 polynomials of degree @f$p@f$ but Legendre polynomials of <i>complete</i>
2449 degree @f$p@f$. There are fewer degrees of freedom on the skeleton variable
2450 for FE_FaceP for a given mesh size, but the solution quality (error vs. number
2451 of DoFs) is very similar to the results for FE_FaceQ.
2452 
2453 <table align="center">
2454  <tr>
2455  <td><img src="https://www.dealii.org/images/steps/developer/step-51.3d_plain.png" width="400" alt=""></td>
2456  <td><img src="https://www.dealii.org/images/steps/developer/step-51.3dt_plain.png" width="400" alt=""></td>
2457  </tr>
2458  <tr>
2459  <td><img src="https://www.dealii.org/images/steps/developer/step-51.3d_post.png" width="400" alt=""></td>
2460  <td><img src="https://www.dealii.org/images/steps/developer/step-51.3dt_post.png" width="400" alt=""></td>
2461  </tr>
2462  <tr>
2463  <td><img src="https://www.dealii.org/images/steps/developer/step-51.3d_postb.png" width="400" alt=""></td>
2464  <td><img src="https://www.dealii.org/images/steps/developer/step-51.3dt_postb.png" width="400" alt=""></td>
2465  </tr>
2466 </table>
2467 
2468 One final note on the efficiency comparison: We tried to use general-purpose
2469 sparse matrix structures and similar solvers (optimal AMG preconditioners for
2470 both without particular tuning of the AMG parameters on any of them) to give a
2471 fair picture of the cost versus accuracy of two methods, on a toy example. It
2472 should be noted however that geometric multigrid (GMG) for continuous finite
2473 elements is about a factor four to five faster for @f$p=3@f$ and @f$p=6@f$. As of
2474 2019, optimal-complexity iterative solvers for HDG are still under development
2475 in the research community. Also, there are other implementation aspects for CG
2476 available such as fast matrix-free approaches as shown in @ref step_37 "step-37" that make
2477 higher order continuous elements more competitive. Again, it is not clear to
2478 the authors of the tutorial whether similar improvements could be made for
2479 HDG. We refer to <a href="https://dx.doi.org/10.1137/16M110455X">Kronbichler
2480 and Wall (2018)</a> for a recent efficiency evaluation.
2481 
2482 
2483 <a name="Possibilitiesforimprovements"></a><h3>Possibilities for improvements</h3>
2484 
2485 
2486 As already mentioned in the introduction, one possibility is to implement
2487 another post-processing technique as discussed in the literature.
2488 
2489 A second item that is not done optimally relates to the performance of this
2490 program, which is of course an issue in practical applications (weighing in
2491 also the better solution quality of (H)DG methods for transport-dominated
2492 problems). Let us look at
2493 the computing time of the tutorial program and the share of the individual
2494 components:
2495 
2496 <table align="center" class="doxtable">
2497  <tr>
2498  <th>&nbsp;</th>
2499  <th>&nbsp;</th>
2500  <th>Setup</th>
2501  <th>Assemble</th>
2502  <th>Solve</th>
2503  <th>Trace reconstruct</th>
2504  <th>Post-processing</th>
2505  <th>Output</th>
2506  </tr>
2507  <tr>
2508  <th>&nbsp;</th>
2509  <th>Total time</th>
2510  <th colspan="6">Relative share</th>
2511  </tr>
2512  <tr>
2513  <td align="left">2D, Q1, cycle 9, 37,248 dofs</td>
2514  <td align="center">5.34s</td>
2515  <td align="center">0.7%</td>
2516  <td align="center">1.2%</td>
2517  <td align="center">89.5%</td>
2518  <td align="center">0.9%</td>
2519  <td align="center">2.3%</td>
2520  <td align="center">5.4%</td>
2521  </tr>
2522  <tr>
2523  <td align="left">2D, Q3, cycle 9, 74,496 dofs</td>
2524  <td align="center">22.2s</td>
2525  <td align="center">0.4%</td>
2526  <td align="center">4.3%</td>
2527  <td align="center">84.1%</td>
2528  <td align="center">4.1%</td>
2529  <td align="center">3.5%</td>
2530  <td align="center">3.6%</td>
2531  </tr>
2532  <tr>
2533  <td align="left">3D, Q1, cycle 7, 172,800 dofs</td>
2534  <td align="center">9.06s</td>
2535  <td align="center">3.1%</td>
2536  <td align="center">8.9%</td>
2537  <td align="center">42.7%</td>
2538  <td align="center">7.0%</td>
2539  <td align="center">20.6%</td>
2540  <td align="center">17.7%</td>
2541  </tr>
2542  <tr>
2543  <td align="left">3D, Q3, cycle 7, 691,200 dofs</td>
2544  <td align="center">516s</td>
2545  <td align="center">0.6%</td>
2546  <td align="center">34.5%</td>
2547  <td align="center">13.4%</td>
2548  <td align="center">32.8%</td>
2549  <td align="center">17.1%</td>
2550  <td align="center">1.5%</td>
2551  </tr>
2552 </table>
2553 
2554 As can be seen from the table, the solver and assembly calls dominate the
2555 runtime of the program. This also gives a clear indication of where
2556 improvements would make the most sense:
2557 
2558 <ol>
2559  <li> Better linear solvers: We use a BiCGStab iterative solver without
2560  preconditioner, where the number of iteration increases with increasing
2561  problem size (the number of iterations for Q1 elements and global
2562  refinements starts at 35 for the small sizes but increase up to 701 for the
2563  largest size). To do better, one could for example use an algebraic
2564  multigrid preconditioner from Trilinos, or some more advanced variants as
2565  the one discussed in <a
2566  href="https://dx.doi.org/10.1137/16M110455X">Kronbichler and Wall
2567  (2018)</a>. For diffusion-dominated problems such as the problem at hand
2568  with finer meshes, such a solver can be designed that uses the matrix-vector
2569  products from the more efficient ChunkSparseMatrix on the finest level, as
2570  long as we are not working in parallel with MPI. For MPI-parallelized
2571  computations, a standard TrilinosWrappers::SparseMatrix can be used.
2572 
2573  <li> Speed up assembly by pre-assembling parts that do not change from one
2574  cell to another (those that do neither contain variable coefficients nor
2575  mapping-dependent terms).
2576 </ol>
2577  *
2578  *
2579 <a name="PlainProg"></a>
2580 <h1> The plain program</h1>
2581 @include "step-51.cc"
2582 */
void add_data_vector(const VectorType &data, const std::vector< std::string > &names, const DataVectorType type=type_automatic, const std::vector< DataComponentInterpretation::DataComponentInterpretation > &data_component_interpretation=std::vector< DataComponentInterpretation::DataComponentInterpretation >())
virtual void build_patches(const unsigned int n_subdivisions=0)
Definition: data_out.cc:1085
Definition: fe_dgp.h:310
Definition: fe_dgq.h:111
Definition: fe_q.h:549
void mmult(FullMatrix< number2 > &C, const FullMatrix< number2 > &B, const bool adding=false) const
virtual RangeNumberType value(const Point< dim > &p, const unsigned int component=0) const
virtual void vector_value(const Point< dim > &p, Vector< RangeNumberType > &values) const
static void estimate(const Mapping< dim, spacedim > &mapping, const DoFHandler< dim, spacedim > &dof, const Quadrature< dim - 1 > &quadrature, const std::map< types::boundary_id, const Function< spacedim, typename InputVector::value_type > * > &neumann_bc, const InputVector &solution, Vector< float > &error, const ComponentMask &component_mask=ComponentMask(), const Function< spacedim > *coefficients=nullptr, const unsigned int n_threads=numbers::invalid_unsigned_int, const types::subdomain_id subdomain_id=numbers::invalid_subdomain_id, const types::material_id material_id=numbers::invalid_material_id, const Strategy strategy=cell_diameter_over_24)
Definition: point.h:111
constexpr SymmetricTensor< 2, dim, Number > invert(const SymmetricTensor< 2, dim, Number > &t)
SymmetricTensor< rank, dim, Number > sum(const SymmetricTensor< rank, dim, Number > &local, const MPI_Comm &mpi_communicator)
constexpr Number trace(const SymmetricTensor< 2, dim, Number > &d)
virtual value_type value(const Point< dim > &p) const
constexpr void clear()
constexpr numbers::NumberTraits< Number >::real_type norm_square() const
Point< 3 > vertices[4]
UpdateFlags
@ update_values
Shape function values.
@ update_normal_vectors
Normal vectors.
@ update_JxW_values
Transformed quadrature weights.
@ update_gradients
Shape function gradients.
@ update_quadrature_points
Transformed quadrature points.
Point< 2 > second
Definition: grid_out.cc:4588
Point< 2 > first
Definition: grid_out.cc:4587
unsigned int level
Definition: grid_out.cc:4590
__global__ void reduction(Number *result, const Number *v, const size_type N)
__global__ void set(Number *val, const Number s, const size_type N)
#define Assert(cond, exc)
Definition: exceptions.h:1465
static ::ExceptionBase & ExcNotImplemented()
#define AssertDimension(dim1, dim2)
Definition: exceptions.h:1622
void write_vtk(std::ostream &out) const
typename ActiveSelector::active_cell_iterator active_cell_iterator
Definition: dof_handler.h:438
void loop(ITERATOR begin, typename identity< ITERATOR >::type end, DOFINFO &dinfo, INFOBOX &info, const std::function< void(DOFINFO &, typename INFOBOX::CellInfo &)> &cell_worker, const std::function< void(DOFINFO &, typename INFOBOX::CellInfo &)> &boundary_worker, const std::function< void(DOFINFO &, DOFINFO &, typename INFOBOX::CellInfo &, typename INFOBOX::CellInfo &)> &face_worker, ASSEMBLER &assembler, const LoopControl &lctrl=LoopControl())
Definition: loop.h:439
void make_hanging_node_constraints(const DoFHandler< dim, spacedim > &dof_handler, AffineConstraints< number > &constraints)
void make_sparsity_pattern(const DoFHandler< dim, spacedim > &dof_handler, SparsityPatternType &sparsity_pattern, const AffineConstraints< number > &constraints=AffineConstraints< number >(), const bool keep_constrained_dofs=true, const types::subdomain_id subdomain_id=numbers::invalid_subdomain_id)
Expression fabs(const Expression &x)
Expression sign(const Expression &x)
void subdivided_hyper_cube(Triangulation< dim, spacedim > &tria, const unsigned int repetitions, const double left=0., const double right=1., const bool colorize=false)
void refine(Triangulation< dim, spacedim > &tria, const Vector< Number > &criteria, const double threshold, const unsigned int max_to_mark=numbers::invalid_unsigned_int)
void refine_and_coarsen_fixed_number(Triangulation< dim, spacedim > &triangulation, const Vector< Number > &criteria, const double top_fraction_of_cells, const double bottom_fraction_of_cells, const unsigned int max_n_cells=std::numeric_limits< unsigned int >::max())
void scale(const double scaling_factor, Triangulation< dim, spacedim > &triangulation)
Definition: grid_tools.cc:2042
double volume(const Triangulation< dim, spacedim > &tria, const Mapping< dim, spacedim > &mapping=(ReferenceCells::get_hypercube< dim >() .template get_default_linear_mapping< dim, spacedim >()))
Definition: grid_tools.cc:137
static const types::blas_int zero
@ matrix
Contents is actually a matrix.
static const char U
@ general
No special properties.
static const char T
static const types::blas_int one
static const char O
void cell_matrix(FullMatrix< double > &M, const FEValuesBase< dim > &fe, const FEValuesBase< dim > &fetest, const ArrayView< const std::vector< double >> &velocity, const double factor=1.)
Definition: advection.h:75
double norm(const FEValuesBase< dim > &fe, const ArrayView< const std::vector< Tensor< 1, dim >>> &Du)
Definition: divergence.h:472
void L2(Vector< number > &result, const FEValuesBase< dim > &fe, const std::vector< double > &input, const double factor=1.)
Definition: l2.h:160
Point< spacedim > point(const gp_Pnt &p, const double tolerance=1e-10)
Definition: utilities.cc:188
SymmetricTensor< 2, dim, Number > C(const Tensor< 2, dim, Number > &F)
SymmetricTensor< 2, dim, Number > d(const Tensor< 2, dim, Number > &F, const Tensor< 2, dim, Number > &dF_dt)
Tensor< 2, dim, Number > w(const Tensor< 2, dim, Number > &F, const Tensor< 2, dim, Number > &dF_dt)
SymmetricTensor< 2, dim, Number > e(const Tensor< 2, dim, Number > &F)
constexpr ReturnType< rank, T >::value_type & extract(T &t, const ArrayType &indices)
void call(const std::function< RT()> &function, internal::return_value< RT > &ret_val)
VectorType::value_type * end(VectorType &V)
void free(T *&pointer)
Definition: cuda.h:97
T sum(const T &t, const MPI_Comm &mpi_communicator)
std::string int_to_string(const unsigned int value, const unsigned int digits=numbers::invalid_unsigned_int)
Definition: utilities.cc:473
double compute_global_error(const Triangulation< dim, spacedim > &tria, const InVector &cellwise_error, const NormType &norm, const double exponent=2.)
void integrate_difference(const Mapping< dim, spacedim > &mapping, const DoFHandler< dim, spacedim > &dof, const InVector &fe_function, const Function< spacedim, typename InVector::value_type > &exact_solution, OutVector &difference, const Quadrature< dim > &q, const NormType &norm, const Function< spacedim, double > *weight=nullptr, const double exponent=2.)
void project_boundary_values(const Mapping< dim, spacedim > &mapping, const DoFHandler< dim, spacedim > &dof, const std::map< types::boundary_id, const Function< spacedim, number > * > &boundary_functions, const Quadrature< dim - 1 > &q, std::map< types::global_dof_index, number > &boundary_values, std::vector< unsigned int > component_mapping={})
void run(const Iterator &begin, const typename identity< Iterator >::type &end, Worker worker, Copier copier, const ScratchData &sample_scratch_data, const CopyData &sample_copy_data, const unsigned int queue_length, const unsigned int chunk_size)
Definition: work_stream.h:691
void run(const std::vector< std::vector< Iterator >> &colored_iterators, Worker worker, Copier copier, const ScratchData &sample_scratch_data, const CopyData &sample_copy_data, const unsigned int queue_length=2 *MultithreadInfo::n_threads(), const unsigned int chunk_size=8)
Definition: work_stream.h:1337
void copy(const T *begin, const T *end, U *dest)
int(&) functions(const void *v1, const void *v2)
void assemble(const MeshWorker::DoFInfoBox< dim, DOFINFO > &dinfo, A *assembler)
Definition: loop.h:71
void reinit(MatrixBlock< MatrixType > &v, const BlockSparsityPattern &p)
Definition: matrix_block.h:618
static constexpr double PI
Definition: numbers.h:231
Definition: types.h:32
::VectorizedArray< Number, width > abs(const ::VectorizedArray< Number, width > &)
::VectorizedArray< Number, width > exp(const ::VectorizedArray< Number, width > &)
::VectorizedArray< Number, width > pow(const ::VectorizedArray< Number, width > &, const Number p)
const ::parallel::distributed::Triangulation< dim, spacedim > * triangulation