Reference documentation for deal.II version 9.3.2
\(\newcommand{\dealvcentcolon}{\mathrel{\mathop{:}}}\) \(\newcommand{\dealcoloneq}{\dealvcentcolon\mathrel{\mkern-1.2mu}=}\) \(\newcommand{\jump}[1]{\left[\!\left[ #1 \right]\!\right]}\) \(\newcommand{\average}[1]{\left\{\!\left\{ #1 \right\}\!\right\}}\)
step-22.h
Go to the documentation of this file.
1 ,
1327  * const unsigned int /*component*/) const
1328  * {
1329  * return 0;
1330  * }
1331  *
1332  *
1333  * template <int dim>
1334  * void RightHandSide<dim>::vector_value(const Point<dim> &p,
1335  * Vector<double> & values) const
1336  * {
1337  * for (unsigned int c = 0; c < this->n_components; ++c)
1338  * values(c) = RightHandSide<dim>::value(p, c);
1339  * }
1340  *
1341  *
1342  * @endcode
1343  *
1344  *
1345  * <a name="Linearsolversandpreconditioners"></a>
1346  * <h3>Linear solvers and preconditioners</h3>
1347  *
1348 
1349  *
1350  * The linear solvers and preconditioners are discussed extensively in the
1351  * introduction. Here, we create the respective objects that will be used.
1352  *
1353 
1354  *
1355  *
1356  * <a name="ThecodeInverseMatrixcodeclasstemplate"></a>
1357  * <h4>The <code>InverseMatrix</code> class template</h4>
1358  * The <code>InverseMatrix</code> class represents the data structure for an
1359  * inverse matrix. Unlike @ref step_20 "step-20", we implement this with a class instead of
1360  * the helper function inverse_linear_operator() we will apply this class to
1361  * different kinds of matrices that will require different preconditioners
1362  * (in @ref step_20 "step-20" we only used a non-identity preconditioner for the mass
1363  * matrix). The types of matrix and preconditioner are passed to this class
1364  * via template parameters, and matrix and preconditioner objects of these
1365  * types will then be passed to the constructor when an
1366  * <code>InverseMatrix</code> object is created. The member function
1367  * <code>vmult</code> is obtained by solving a linear system:
1368  *
1369  * @code
1370  * template <class MatrixType, class PreconditionerType>
1371  * class InverseMatrix : public Subscriptor
1372  * {
1373  * public:
1374  * InverseMatrix(const MatrixType & m,
1375  * const PreconditionerType &preconditioner);
1376  *
1377  * void vmult(Vector<double> &dst, const Vector<double> &src) const;
1378  *
1379  * private:
1381  * const SmartPointer<const PreconditionerType> preconditioner;
1382  * };
1383  *
1384  *
1385  * template <class MatrixType, class PreconditionerType>
1386  * InverseMatrix<MatrixType, PreconditionerType>::InverseMatrix(
1387  * const MatrixType & m,
1388  * const PreconditionerType &preconditioner)
1389  * : matrix(&m)
1390  * , preconditioner(&preconditioner)
1391  * {}
1392  *
1393  *
1394  * @endcode
1395  *
1396  * This is the implementation of the <code>vmult</code> function.
1397  *
1398 
1399  *
1400  * In this class we use a rather large tolerance for the solver control. The
1401  * reason for this is that the function is used very frequently, and hence,
1402  * any additional effort to make the residual in the CG solve smaller makes
1403  * the solution more expensive. Note that we do not only use this class as a
1404  * preconditioner for the Schur complement, but also when forming the
1405  * inverse of the Laplace matrix &ndash; which is hence directly responsible
1406  * for the accuracy of the solution itself, so we can't choose a too large
1407  * tolerance, either.
1408  *
1409  * @code
1410  * template <class MatrixType, class PreconditionerType>
1411  * void InverseMatrix<MatrixType, PreconditionerType>::vmult(
1412  * Vector<double> & dst,
1413  * const Vector<double> &src) const
1414  * {
1415  * SolverControl solver_control(src.size(), 1e-6 * src.l2_norm());
1416  * SolverCG<Vector<double>> cg(solver_control);
1417  *
1418  * dst = 0;
1419  *
1420  * cg.solve(*matrix, dst, src, *preconditioner);
1421  * }
1422  *
1423  *
1424  * @endcode
1425  *
1426  *
1427  * <a name="ThecodeSchurComplementcodeclasstemplate"></a>
1428  * <h4>The <code>SchurComplement</code> class template</h4>
1429  *
1430 
1431  *
1432  * This class implements the Schur complement discussed in the introduction.
1433  * It is in analogy to @ref step_20 "step-20". Though, we now call it with a template
1434  * parameter <code>PreconditionerType</code> in order to access that when
1435  * specifying the respective type of the inverse matrix class. As a
1436  * consequence of the definition above, the declaration
1437  * <code>InverseMatrix</code> now contains the second template parameter for
1438  * a preconditioner class as above, which affects the
1439  * <code>SmartPointer</code> object <code>m_inverse</code> as well.
1440  *
1441  * @code
1442  * template <class PreconditionerType>
1443  * class SchurComplement : public Subscriptor
1444  * {
1445  * public:
1446  * SchurComplement(
1447  * const BlockSparseMatrix<double> &system_matrix,
1448  * const InverseMatrix<SparseMatrix<double>, PreconditionerType> &A_inverse);
1449  *
1450  * void vmult(Vector<double> &dst, const Vector<double> &src) const;
1451  *
1452  * private:
1453  * const SmartPointer<const BlockSparseMatrix<double>> system_matrix;
1454  * const SmartPointer<
1455  * const InverseMatrix<SparseMatrix<double>, PreconditionerType>>
1456  * A_inverse;
1457  *
1458  * mutable Vector<double> tmp1, tmp2;
1459  * };
1460  *
1461  *
1462  *
1463  * template <class PreconditionerType>
1464  * SchurComplement<PreconditionerType>::SchurComplement(
1465  * const BlockSparseMatrix<double> &system_matrix,
1466  * const InverseMatrix<SparseMatrix<double>, PreconditionerType> &A_inverse)
1467  * : system_matrix(&system_matrix)
1468  * , A_inverse(&A_inverse)
1469  * , tmp1(system_matrix.block(0, 0).m())
1470  * , tmp2(system_matrix.block(0, 0).m())
1471  * {}
1472  *
1473  *
1474  * template <class PreconditionerType>
1475  * void
1476  * SchurComplement<PreconditionerType>::vmult(Vector<double> & dst,
1477  * const Vector<double> &src) const
1478  * {
1479  * system_matrix->block(0, 1).vmult(tmp1, src);
1480  * A_inverse->vmult(tmp2, tmp1);
1481  * system_matrix->block(1, 0).vmult(dst, tmp2);
1482  * }
1483  *
1484  *
1485  * @endcode
1486  *
1487  *
1488  * <a name="StokesProblemclassimplementation"></a>
1489  * <h3>StokesProblem class implementation</h3>
1490  *
1491 
1492  *
1493  *
1494  * <a name="StokesProblemStokesProblem"></a>
1495  * <h4>StokesProblem::StokesProblem</h4>
1496  *
1497 
1498  *
1499  * The constructor of this class looks very similar to the one of
1500  * @ref step_20 "step-20". The constructor initializes the variables for the polynomial
1501  * degree, triangulation, finite element system and the dof handler. The
1502  * underlying polynomial functions are of order <code>degree+1</code> for
1503  * the vector-valued velocity components and of order <code>degree</code>
1504  * for the pressure. This gives the LBB-stable element pair
1505  * @f$Q_{degree+1}^d\times Q_{degree}@f$, often referred to as the Taylor-Hood
1506  * element.
1507  *
1508 
1509  *
1510  * Note that we initialize the triangulation with a MeshSmoothing argument,
1511  * which ensures that the refinement of cells is done in a way that the
1512  * approximation of the PDE solution remains well-behaved (problems arise if
1513  * grids are too unstructured), see the documentation of
1514  * <code>Triangulation::MeshSmoothing</code> for details.
1515  *
1516  * @code
1517  * template <int dim>
1518  * StokesProblem<dim>::StokesProblem(const unsigned int degree)
1519  * : degree(degree)
1520  * , triangulation(Triangulation<dim>::maximum_smoothing)
1521  * , fe(FE_Q<dim>(degree + 1), dim, FE_Q<dim>(degree), 1)
1522  * , dof_handler(triangulation)
1523  * {}
1524  *
1525  *
1526  * @endcode
1527  *
1528  *
1529  * <a name="StokesProblemsetup_dofs"></a>
1530  * <h4>StokesProblem::setup_dofs</h4>
1531  *
1532 
1533  *
1534  * Given a mesh, this function associates the degrees of freedom with it and
1535  * creates the corresponding matrices and vectors. At the beginning it also
1536  * releases the pointer to the preconditioner object (if the shared pointer
1537  * pointed at anything at all at this point) since it will definitely not be
1538  * needed any more after this point and will have to be re-computed after
1539  * assembling the matrix, and unties the sparse matrices from their sparsity
1540  * pattern objects.
1541  *
1542 
1543  *
1544  * We then proceed with distributing degrees of freedom and renumbering
1545  * them: In order to make the ILU preconditioner (in 3D) work efficiently,
1546  * it is important to enumerate the degrees of freedom in such a way that it
1547  * reduces the bandwidth of the matrix, or maybe more importantly: in such a
1548  * way that the ILU is as close as possible to a real LU decomposition. On
1549  * the other hand, we need to preserve the block structure of velocity and
1550  * pressure already seen in @ref step_20 "step-20" and @ref step_21 "step-21". This is done in two
1551  * steps: First, all dofs are renumbered to improve the ILU and then we
1552  * renumber once again by components. Since
1553  * <code>DoFRenumbering::component_wise</code> does not touch the
1554  * renumbering within the individual blocks, the basic renumbering from the
1555  * first step remains. As for how the renumber degrees of freedom to improve
1556  * the ILU: deal.II has a number of algorithms that attempt to find
1557  * orderings to improve ILUs, or reduce the bandwidth of matrices, or
1558  * optimize some other aspect. The DoFRenumbering namespace shows a
1559  * comparison of the results we obtain with several of these algorithms
1560  * based on the testcase discussed here in this tutorial program. Here, we
1561  * will use the traditional Cuthill-McKee algorithm already used in some of
1562  * the previous tutorial programs. In the <a href="#improved-ilu">section
1563  * on improved ILU</a> we're going to discuss this issue in more detail.
1564  *
1565 
1566  *
1567  * There is one more change compared to previous tutorial programs: There is
1568  * no reason in sorting the <code>dim</code> velocity components
1569  * individually. In fact, rather than first enumerating all @f$x@f$-velocities,
1570  * then all @f$y@f$-velocities, etc, we would like to keep all velocities at the
1571  * same location together and only separate between velocities (all
1572  * components) and pressures. By default, this is not what the
1573  * DoFRenumbering::component_wise function does: it treats each vector
1574  * component separately; what we have to do is group several components into
1575  * "blocks" and pass this block structure to that function. Consequently, we
1576  * allocate a vector <code>block_component</code> with as many elements as
1577  * there are components and describe all velocity components to correspond
1578  * to block 0, while the pressure component will form block 1:
1579  *
1580  * @code
1581  * template <int dim>
1582  * void StokesProblem<dim>::setup_dofs()
1583  * {
1584  * A_preconditioner.reset();
1585  * system_matrix.clear();
1586  * preconditioner_matrix.clear();
1587  *
1588  * dof_handler.distribute_dofs(fe);
1589  * DoFRenumbering::Cuthill_McKee(dof_handler);
1590  *
1591  * std::vector<unsigned int> block_component(dim + 1, 0);
1592  * block_component[dim] = 1;
1593  * DoFRenumbering::component_wise(dof_handler, block_component);
1594  *
1595  * @endcode
1596  *
1597  * Now comes the implementation of Dirichlet boundary conditions, which
1598  * should be evident after the discussion in the introduction. All that
1599  * changed is that the function already appears in the setup functions,
1600  * whereas we were used to see it in some assembly routine. Further down
1601  * below where we set up the mesh, we will associate the top boundary
1602  * where we impose Dirichlet boundary conditions with boundary indicator
1603  * 1. We will have to pass this boundary indicator as second argument to
1604  * the function below interpolating boundary values. There is one more
1605  * thing, though. The function describing the Dirichlet conditions was
1606  * defined for all components, both velocity and pressure. However, the
1607  * Dirichlet conditions are to be set for the velocity only. To this end,
1608  * we use a ComponentMask that only selects the velocity components. The
1609  * component mask is obtained from the finite element by specifying the
1610  * particular components we want. Since we use adaptively refined grids,
1611  * the affine constraints object needs to be first filled with hanging node
1612  * constraints generated from the DoF handler. Note the order of the two
1613  * functions &mdash; we first compute the hanging node constraints, and
1614  * then insert the boundary values into the constraints object. This makes
1615  * sure that we respect H<sup>1</sup> conformity on boundaries with
1616  * hanging nodes (in three space dimensions), where the hanging node needs
1617  * to dominate the Dirichlet boundary values.
1618  *
1619  * @code
1620  * {
1621  * constraints.clear();
1622  *
1623  * FEValuesExtractors::Vector velocities(0);
1624  * DoFTools::make_hanging_node_constraints(dof_handler, constraints);
1626  * 1,
1627  * BoundaryValues<dim>(),
1628  * constraints,
1629  * fe.component_mask(velocities));
1630  * }
1631  *
1632  * constraints.close();
1633  *
1634  * @endcode
1635  *
1636  * In analogy to @ref step_20 "step-20", we count the dofs in the individual components.
1637  * We could do this in the same way as there, but we want to operate on
1638  * the block structure we used already for the renumbering: The function
1639  * <code>DoFTools::count_dofs_per_fe_block</code> does the same as
1640  * <code>DoFTools::count_dofs_per_fe_component</code>, but now grouped as
1641  * velocity and pressure block via <code>block_component</code>.
1642  *
1643  * @code
1644  * const std::vector<types::global_dof_index> dofs_per_block =
1645  * DoFTools::count_dofs_per_fe_block(dof_handler, block_component);
1646  * const unsigned int n_u = dofs_per_block[0];
1647  * const unsigned int n_p = dofs_per_block[1];
1648  *
1649  * std::cout << " Number of active cells: " << triangulation.n_active_cells()
1650  * << std::endl
1651  * << " Number of degrees of freedom: " << dof_handler.n_dofs()
1652  * << " (" << n_u << '+' << n_p << ')' << std::endl;
1653  *
1654  * @endcode
1655  *
1656  * The next task is to allocate a sparsity pattern for the system matrix we
1657  * will create and one for the preconditioner matrix. We could do this in
1658  * the same way as in @ref step_20 "step-20", i.e. directly build an object of type
1659  * SparsityPattern through DoFTools::make_sparsity_pattern. However, there
1660  * is a major reason not to do so:
1661  * In 3D, the function DoFTools::max_couplings_between_dofs yields a
1662  * conservative but rather large number for the coupling between the
1663  * individual dofs, so that the memory initially provided for the creation
1664  * of the sparsity pattern of the matrix is far too much -- so much actually
1665  * that the initial sparsity pattern won't even fit into the physical memory
1666  * of most systems already for moderately-sized 3D problems, see also the
1667  * discussion in @ref step_18 "step-18". Instead, we first build temporary objects that use
1668  * a different data structure that doesn't require allocating more memory
1669  * than necessary but isn't suitable for use as a basis of SparseMatrix or
1670  * BlockSparseMatrix objects; in a second step we then copy these objects
1671  * into objects of type BlockSparsityPattern. This is entirely analogous to
1672  * what we already did in @ref step_11 "step-11" and @ref step_18 "step-18". In particular, we make use of
1673  * the fact that we will never write into the @f$(1,1)@f$ block of the system
1674  * matrix and that this is the only block to be filled for the
1675  * preconditioner matrix.
1676  *
1677 
1678  *
1679  * All this is done inside new scopes, which means that the memory of
1680  * <code>dsp</code> will be released once the information has been copied to
1681  * <code>sparsity_pattern</code>.
1682  *
1683  * @code
1684  * {
1685  * BlockDynamicSparsityPattern dsp(2, 2);
1686  *
1687  * dsp.block(0, 0).reinit(n_u, n_u);
1688  * dsp.block(1, 0).reinit(n_p, n_u);
1689  * dsp.block(0, 1).reinit(n_u, n_p);
1690  * dsp.block(1, 1).reinit(n_p, n_p);
1691  *
1692  * dsp.collect_sizes();
1693  *
1694  * Table<2, DoFTools::Coupling> coupling(dim + 1, dim + 1);
1695  *
1696  * for (unsigned int c = 0; c < dim + 1; ++c)
1697  * for (unsigned int d = 0; d < dim + 1; ++d)
1698  * if (!((c == dim) && (d == dim)))
1699  * coupling[c][d] = DoFTools::always;
1700  * else
1701  * coupling[c][d] = DoFTools::none;
1702  *
1703  * DoFTools::make_sparsity_pattern(
1704  * dof_handler, coupling, dsp, constraints, false);
1705  *
1706  * sparsity_pattern.copy_from(dsp);
1707  * }
1708  *
1709  * {
1710  * BlockDynamicSparsityPattern preconditioner_dsp(2, 2);
1711  *
1712  * preconditioner_dsp.block(0, 0).reinit(n_u, n_u);
1713  * preconditioner_dsp.block(1, 0).reinit(n_p, n_u);
1714  * preconditioner_dsp.block(0, 1).reinit(n_u, n_p);
1715  * preconditioner_dsp.block(1, 1).reinit(n_p, n_p);
1716  *
1717  * preconditioner_dsp.collect_sizes();
1718  *
1719  * Table<2, DoFTools::Coupling> preconditioner_coupling(dim + 1, dim + 1);
1720  *
1721  * for (unsigned int c = 0; c < dim + 1; ++c)
1722  * for (unsigned int d = 0; d < dim + 1; ++d)
1723  * if (((c == dim) && (d == dim)))
1724  * preconditioner_coupling[c][d] = DoFTools::always;
1725  * else
1726  * preconditioner_coupling[c][d] = DoFTools::none;
1727  *
1728  * DoFTools::make_sparsity_pattern(dof_handler,
1729  * preconditioner_coupling,
1730  * preconditioner_dsp,
1731  * constraints,
1732  * false);
1733  *
1734  * preconditioner_sparsity_pattern.copy_from(preconditioner_dsp);
1735  * }
1736  *
1737  * @endcode
1738  *
1739  * Finally, the system matrix, the preconsitioner matrix, the solution and
1740  * the right hand side vector are created from the block structure similar
1741  * to the approach in @ref step_20 "step-20":
1742  *
1743  * @code
1744  * system_matrix.reinit(sparsity_pattern);
1745  * preconditioner_matrix.reinit(preconditioner_sparsity_pattern);
1746  *
1747  * solution.reinit(2);
1748  * solution.block(0).reinit(n_u);
1749  * solution.block(1).reinit(n_p);
1750  * solution.collect_sizes();
1751  *
1752  * system_rhs.reinit(2);
1753  * system_rhs.block(0).reinit(n_u);
1754  * system_rhs.block(1).reinit(n_p);
1755  * system_rhs.collect_sizes();
1756  * }
1757  *
1758  *
1759  * @endcode
1760  *
1761  *
1762  * <a name="StokesProblemassemble_system"></a>
1763  * <h4>StokesProblem::assemble_system</h4>
1764  *
1765 
1766  *
1767  * The assembly process follows the discussion in @ref step_20 "step-20" and in the
1768  * introduction. We use the well-known abbreviations for the data structures
1769  * that hold the local matrices, right hand side, and global numbering of the
1770  * degrees of freedom for the present cell.
1771  *
1772  * @code
1773  * template <int dim>
1774  * void StokesProblem<dim>::assemble_system()
1775  * {
1776  * system_matrix = 0;
1777  * system_rhs = 0;
1778  * preconditioner_matrix = 0;
1779  *
1780  * QGauss<dim> quadrature_formula(degree + 2);
1781  *
1782  * FEValues<dim> fe_values(fe,
1783  * quadrature_formula,
1784  * update_values | update_quadrature_points |
1785  * update_JxW_values | update_gradients);
1786  *
1787  * const unsigned int dofs_per_cell = fe.n_dofs_per_cell();
1788  *
1789  * const unsigned int n_q_points = quadrature_formula.size();
1790  *
1791  * FullMatrix<double> local_matrix(dofs_per_cell, dofs_per_cell);
1792  * FullMatrix<double> local_preconditioner_matrix(dofs_per_cell,
1793  * dofs_per_cell);
1794  * Vector<double> local_rhs(dofs_per_cell);
1795  *
1796  * std::vector<types::global_dof_index> local_dof_indices(dofs_per_cell);
1797  *
1798  * const RightHandSide<dim> right_hand_side;
1799  * std::vector<Vector<double>> rhs_values(n_q_points, Vector<double>(dim + 1));
1800  *
1801  * @endcode
1802  *
1803  * Next, we need two objects that work as extractors for the FEValues
1804  * object. Their use is explained in detail in the report on @ref
1805  * vector_valued :
1806  *
1807  * @code
1808  * const FEValuesExtractors::Vector velocities(0);
1809  * const FEValuesExtractors::Scalar pressure(dim);
1810  *
1811  * @endcode
1812  *
1813  * As an extension over @ref step_20 "step-20" and @ref step_21 "step-21", we include a few optimizations
1814  * that make assembly much faster for this particular problem. The
1815  * improvements are based on the observation that we do a few calculations
1816  * too many times when we do as in @ref step_20 "step-20": The symmetric gradient actually
1817  * has <code>dofs_per_cell</code> different values per quadrature point, but
1818  * we extract it <code>dofs_per_cell*dofs_per_cell</code> times from the
1819  * FEValues object - for both the loop over <code>i</code> and the inner
1820  * loop over <code>j</code>. In 3d, that means evaluating it @f$89^2=7921@f$
1821  * instead of @f$89@f$ times, a not insignificant difference.
1822  *
1823 
1824  *
1825  * So what we're going to do here is to avoid such repeated calculations
1826  * by getting a vector of rank-2 tensors (and similarly for the divergence
1827  * and the basis function value on pressure) at the quadrature point prior
1828  * to starting the loop over the dofs on the cell. First, we create the
1829  * respective objects that will hold these values. Then, we start the loop
1830  * over all cells and the loop over the quadrature points, where we first
1831  * extract these values. There is one more optimization we implement here:
1832  * the local matrix (as well as the global one) is going to be symmetric,
1833  * since all the operations involved are symmetric with respect to @f$i@f$ and
1834  * @f$j@f$. This is implemented by simply running the inner loop not to
1835  * <code>dofs_per_cell</code>, but only up to <code>i</code>, the index of
1836  * the outer loop.
1837  *
1838  * @code
1839  * std::vector<SymmetricTensor<2, dim>> symgrad_phi_u(dofs_per_cell);
1840  * std::vector<double> div_phi_u(dofs_per_cell);
1841  * std::vector<double> phi_p(dofs_per_cell);
1842  *
1843  * for (const auto &cell : dof_handler.active_cell_iterators())
1844  * {
1845  * fe_values.reinit(cell);
1846  * local_matrix = 0;
1847  * local_preconditioner_matrix = 0;
1848  * local_rhs = 0;
1849  *
1850  * right_hand_side.vector_value_list(fe_values.get_quadrature_points(),
1851  * rhs_values);
1852  *
1853  * for (unsigned int q = 0; q < n_q_points; ++q)
1854  * {
1855  * for (unsigned int k = 0; k < dofs_per_cell; ++k)
1856  * {
1857  * symgrad_phi_u[k] =
1858  * fe_values[velocities].symmetric_gradient(k, q);
1859  * div_phi_u[k] = fe_values[velocities].divergence(k, q);
1860  * phi_p[k] = fe_values[pressure].value(k, q);
1861  * }
1862  *
1863  * @endcode
1864  *
1865  * Now finally for the bilinear forms of both the system matrix and
1866  * the matrix we use for the preconditioner. Recall that the
1867  * formulas for these two are
1868  * @f{align*}{
1869  * A_{ij} &= a(\varphi_i,\varphi_j)
1870  * \\ &= \underbrace{2(\varepsilon(\varphi_{i,\textbf{u}}),
1871  * \varepsilon(\varphi_{j,\textbf{u}}))_{\Omega}}
1872  * _{(1)}
1873  * \;
1874  * \underbrace{- (\textrm{div}\; \varphi_{i,\textbf{u}},
1875  * \varphi_{j,p})_{\Omega}}
1876  * _{(2)}
1877  * \;
1878  * \underbrace{- (\varphi_{i,p},
1879  * \textrm{div}\;
1880  * \varphi_{j,\textbf{u}})_{\Omega}}
1881  * _{(3)}
1882  * @f}
1883  * and
1884  * @f{align*}{
1885  * M_{ij} &= \underbrace{(\varphi_{i,p},
1886  * \varphi_{j,p})_{\Omega}}
1887  * _{(4)},
1888  * @f}
1889  * respectively, where @f$\varphi_{i,\textbf{u}}@f$ and @f$\varphi_{i,p}@f$
1890  * are the velocity and pressure components of the @f$i@f$th shape
1891  * function. The various terms above are then easily recognized in
1892  * the following implementation:
1893  *
1894  * @code
1895  * for (unsigned int i = 0; i < dofs_per_cell; ++i)
1896  * {
1897  * for (unsigned int j = 0; j <= i; ++j)
1898  * {
1899  * local_matrix(i, j) +=
1900  * (2 * (symgrad_phi_u[i] * symgrad_phi_u[j]) // (1)
1901  * - div_phi_u[i] * phi_p[j] // (2)
1902  * - phi_p[i] * div_phi_u[j]) // (3)
1903  * * fe_values.JxW(q); // * dx
1904  *
1905  * local_preconditioner_matrix(i, j) +=
1906  * (phi_p[i] * phi_p[j]) // (4)
1907  * * fe_values.JxW(q); // * dx
1908  * }
1909  * @endcode
1910  *
1911  * Note that in the implementation of (1) above, `operator*`
1912  * is overloaded for symmetric tensors, yielding the scalar
1913  * product between the two tensors.
1914  *
1915 
1916  *
1917  * For the right-hand side we use the fact that the shape
1918  * functions are only non-zero in one component (because our
1919  * elements are primitive). Instead of multiplying the tensor
1920  * representing the dim+1 values of shape function i with the
1921  * whole right-hand side vector, we only look at the only
1922  * non-zero component. The function
1923  * FiniteElement::system_to_component_index will return
1924  * which component this shape function lives in (0=x velocity,
1925  * 1=y velocity, 2=pressure in 2d), which we use to pick out
1926  * the correct component of the right-hand side vector to
1927  * multiply with.
1928  *
1929  * @code
1930  * const unsigned int component_i =
1931  * fe.system_to_component_index(i).first;
1932  * local_rhs(i) += (fe_values.shape_value(i, q) // (phi_u_i(x_q)
1933  * * rhs_values[q](component_i)) // * f(x_q))
1934  * * fe_values.JxW(q); // * dx
1935  * }
1936  * }
1937  *
1938  * @endcode
1939  *
1940  * Before we can write the local data into the global matrix (and
1941  * simultaneously use the AffineConstraints object to apply
1942  * Dirichlet boundary conditions and eliminate hanging node constraints,
1943  * as we discussed in the introduction), we have to be careful about one
1944  * thing, though. We have only built half of the local matrices
1945  * because of symmetry, but we're going to save the full matrices
1946  * in order to use the standard functions for solving. This is done
1947  * by flipping the indices in case we are pointing into the empty part
1948  * of the local matrices.
1949  *
1950  * @code
1951  * for (unsigned int i = 0; i < dofs_per_cell; ++i)
1952  * for (unsigned int j = i + 1; j < dofs_per_cell; ++j)
1953  * {
1954  * local_matrix(i, j) = local_matrix(j, i);
1955  * local_preconditioner_matrix(i, j) =
1956  * local_preconditioner_matrix(j, i);
1957  * }
1958  *
1959  * cell->get_dof_indices(local_dof_indices);
1960  * constraints.distribute_local_to_global(local_matrix,
1961  * local_rhs,
1962  * local_dof_indices,
1963  * system_matrix,
1964  * system_rhs);
1965  * constraints.distribute_local_to_global(local_preconditioner_matrix,
1966  * local_dof_indices,
1967  * preconditioner_matrix);
1968  * }
1969  *
1970  * @endcode
1971  *
1972  * Before we're going to solve this linear system, we generate a
1973  * preconditioner for the velocity-velocity matrix, i.e.,
1974  * <code>block(0,0)</code> in the system matrix. As mentioned above, this
1975  * depends on the spatial dimension. Since the two classes described by
1976  * the <code>InnerPreconditioner::type</code> alias have the same
1977  * interface, we do not have to do anything different whether we want to
1978  * use a sparse direct solver or an ILU:
1979  *
1980  * @code
1981  * std::cout << " Computing preconditioner..." << std::endl << std::flush;
1982  *
1983  * A_preconditioner =
1984  * std::make_shared<typename InnerPreconditioner<dim>::type>();
1985  * A_preconditioner->initialize(
1986  * system_matrix.block(0, 0),
1987  * typename InnerPreconditioner<dim>::type::AdditionalData());
1988  * }
1989  *
1990  *
1991  *
1992  * @endcode
1993  *
1994  *
1995  * <a name="StokesProblemsolve"></a>
1996  * <h4>StokesProblem::solve</h4>
1997  *
1998 
1999  *
2000  * After the discussion in the introduction and the definition of the
2001  * respective classes above, the implementation of the <code>solve</code>
2002  * function is rather straight-forward and done in a similar way as in
2003  * @ref step_20 "step-20". To start with, we need an object of the
2004  * <code>InverseMatrix</code> class that represents the inverse of the
2005  * matrix A. As described in the introduction, the inverse is generated with
2006  * the help of an inner preconditioner of type
2007  * <code>InnerPreconditioner::type</code>.
2008  *
2009  * @code
2010  * template <int dim>
2011  * void StokesProblem<dim>::solve()
2012  * {
2013  * const InverseMatrix<SparseMatrix<double>,
2014  * typename InnerPreconditioner<dim>::type>
2015  * A_inverse(system_matrix.block(0, 0), *A_preconditioner);
2016  * Vector<double> tmp(solution.block(0).size());
2017  *
2018  * @endcode
2019  *
2020  * This is as in @ref step_20 "step-20". We generate the right hand side @f$B A^{-1} F - G@f$
2021  * for the Schur complement and an object that represents the respective
2022  * linear operation @f$B A^{-1} B^T@f$, now with a template parameter
2023  * indicating the preconditioner - in accordance with the definition of
2024  * the class.
2025  *
2026  * @code
2027  * {
2028  * Vector<double> schur_rhs(solution.block(1).size());
2029  * A_inverse.vmult(tmp, system_rhs.block(0));
2030  * system_matrix.block(1, 0).vmult(schur_rhs, tmp);
2031  * schur_rhs -= system_rhs.block(1);
2032  *
2033  * SchurComplement<typename InnerPreconditioner<dim>::type> schur_complement(
2034  * system_matrix, A_inverse);
2035  *
2036  * @endcode
2037  *
2038  * The usual control structures for the solver call are created...
2039  *
2040  * @code
2041  * SolverControl solver_control(solution.block(1).size(),
2042  * 1e-6 * schur_rhs.l2_norm());
2043  * SolverCG<Vector<double>> cg(solver_control);
2044  *
2045  * @endcode
2046  *
2047  * Now to the preconditioner to the Schur complement. As explained in
2048  * the introduction, the preconditioning is done by a mass matrix in the
2049  * pressure variable.
2050  *
2051 
2052  *
2053  * Actually, the solver needs to have the preconditioner in the form
2054  * @f$P^{-1}@f$, so we need to create an inverse operation. Once again, we
2055  * use an object of the class <code>InverseMatrix</code>, which
2056  * implements the <code>vmult</code> operation that is needed by the
2057  * solver. In this case, we have to invert the pressure mass matrix. As
2058  * it already turned out in earlier tutorial programs, the inversion of
2059  * a mass matrix is a rather cheap and straight-forward operation
2060  * (compared to, e.g., a Laplace matrix). The CG method with ILU
2061  * preconditioning converges in 5-10 steps, independently on the mesh
2062  * size. This is precisely what we do here: We choose another ILU
2063  * preconditioner and take it along to the InverseMatrix object via the
2064  * corresponding template parameter. A CG solver is then called within
2065  * the vmult operation of the inverse matrix.
2066  *
2067 
2068  *
2069  * An alternative that is cheaper to build, but needs more iterations
2070  * afterwards, would be to choose a SSOR preconditioner with factor
2071  * 1.2. It needs about twice the number of iterations, but the costs for
2072  * its generation are almost negligible.
2073  *
2074  * @code
2075  * SparseILU<double> preconditioner;
2076  * preconditioner.initialize(preconditioner_matrix.block(1, 1),
2077  * SparseILU<double>::AdditionalData());
2078  *
2079  * InverseMatrix<SparseMatrix<double>, SparseILU<double>> m_inverse(
2080  * preconditioner_matrix.block(1, 1), preconditioner);
2081  *
2082  * @endcode
2083  *
2084  * With the Schur complement and an efficient preconditioner at hand, we
2085  * can solve the respective equation for the pressure (i.e. block 0 in
2086  * the solution vector) in the usual way:
2087  *
2088  * @code
2089  * cg.solve(schur_complement, solution.block(1), schur_rhs, m_inverse);
2090  *
2091  * @endcode
2092  *
2093  * After this first solution step, the hanging node constraints have to
2094  * be distributed to the solution in order to achieve a consistent
2095  * pressure field.
2096  *
2097  * @code
2098  * constraints.distribute(solution);
2099  *
2100  * std::cout << " " << solver_control.last_step()
2101  * << " outer CG Schur complement iterations for pressure"
2102  * << std::endl;
2103  * }
2104  *
2105  * @endcode
2106  *
2107  * As in @ref step_20 "step-20", we finally need to solve for the velocity equation where
2108  * we plug in the solution to the pressure equation. This involves only
2109  * objects we already know - so we simply multiply @f$p@f$ by @f$B^T@f$, subtract
2110  * the right hand side and multiply by the inverse of @f$A@f$. At the end, we
2111  * need to distribute the constraints from hanging nodes in order to
2112  * obtain a consistent flow field:
2113  *
2114  * @code
2115  * {
2116  * system_matrix.block(0, 1).vmult(tmp, solution.block(1));
2117  * tmp *= -1;
2118  * tmp += system_rhs.block(0);
2119  *
2120  * A_inverse.vmult(solution.block(0), tmp);
2121  *
2122  * constraints.distribute(solution);
2123  * }
2124  * }
2125  *
2126  *
2127  * @endcode
2128  *
2129  *
2130  * <a name="StokesProblemoutput_results"></a>
2131  * <h4>StokesProblem::output_results</h4>
2132  *
2133 
2134  *
2135  * The next function generates graphical output. In this example, we are
2136  * going to use the VTK file format. We attach names to the individual
2137  * variables in the problem: <code>velocity</code> to the <code>dim</code>
2138  * components of velocity and <code>pressure</code> to the pressure.
2139  *
2140 
2141  *
2142  * Not all visualization programs have the ability to group individual
2143  * vector components into a vector to provide vector plots; in particular,
2144  * this holds for some VTK-based visualization programs. In this case, the
2145  * logical grouping of components into vectors should already be described
2146  * in the file containing the data. In other words, what we need to do is
2147  * provide our output writers with a way to know which of the components of
2148  * the finite element logically form a vector (with @f$d@f$ components in @f$d@f$
2149  * space dimensions) rather than letting them assume that we simply have a
2150  * bunch of scalar fields. This is achieved using the members of the
2151  * <code>DataComponentInterpretation</code> namespace: as with the filename,
2152  * we create a vector in which the first <code>dim</code> components refer
2153  * to the velocities and are given the tag
2154  * DataComponentInterpretation::component_is_part_of_vector; we
2155  * finally push one tag
2156  * DataComponentInterpretation::component_is_scalar to describe
2157  * the grouping of the pressure variable.
2158  *
2159 
2160  *
2161  * The rest of the function is then the same as in @ref step_20 "step-20".
2162  *
2163  * @code
2164  * template <int dim>
2165  * void
2166  * StokesProblem<dim>::output_results(const unsigned int refinement_cycle) const
2167  * {
2168  * std::vector<std::string> solution_names(dim, "velocity");
2169  * solution_names.emplace_back("pressure");
2170  *
2171  * std::vector<DataComponentInterpretation::DataComponentInterpretation>
2172  * data_component_interpretation(
2173  * dim, DataComponentInterpretation::component_is_part_of_vector);
2174  * data_component_interpretation.push_back(
2175  * DataComponentInterpretation::component_is_scalar);
2176  *
2177  * DataOut<dim> data_out;
2178  * data_out.attach_dof_handler(dof_handler);
2179  * data_out.add_data_vector(solution,
2180  * solution_names,
2181  * DataOut<dim>::type_dof_data,
2182  * data_component_interpretation);
2183  * data_out.build_patches();
2184  *
2185  * std::ofstream output(
2186  * "solution-" + Utilities::int_to_string(refinement_cycle, 2) + ".vtk");
2187  * data_out.write_vtk(output);
2188  * }
2189  *
2190  *
2191  * @endcode
2192  *
2193  *
2194  * <a name="StokesProblemrefine_mesh"></a>
2195  * <h4>StokesProblem::refine_mesh</h4>
2196  *
2197 
2198  *
2199  * This is the last interesting function of the <code>StokesProblem</code>
2200  * class. As indicated by its name, it takes the solution to the problem
2201  * and refines the mesh where this is needed. The procedure is the same as
2202  * in the respective step in @ref step_6 "step-6", with the exception that we base the
2203  * refinement only on the change in pressure, i.e., we call the Kelly error
2204  * estimator with a mask object of type ComponentMask that selects the
2205  * single scalar component for the pressure that we are interested in (we
2206  * get such a mask from the finite element class by specifying the component
2207  * we want). Additionally, we do not coarsen the grid again:
2208  *
2209  * @code
2210  * template <int dim>
2211  * void StokesProblem<dim>::refine_mesh()
2212  * {
2213  * Vector<float> estimated_error_per_cell(triangulation.n_active_cells());
2214  *
2215  * FEValuesExtractors::Scalar pressure(dim);
2216  * KellyErrorEstimator<dim>::estimate(
2217  * dof_handler,
2218  * QGauss<dim - 1>(degree + 1),
2219  * std::map<types::boundary_id, const Function<dim> *>(),
2220  * solution,
2221  * estimated_error_per_cell,
2222  * fe.component_mask(pressure));
2223  *
2224  * GridRefinement::refine_and_coarsen_fixed_number(triangulation,
2225  * estimated_error_per_cell,
2226  * 0.3,
2227  * 0.0);
2228  * triangulation.execute_coarsening_and_refinement();
2229  * }
2230  *
2231  *
2232  * @endcode
2233  *
2234  *
2235  * <a name="StokesProblemrun"></a>
2236  * <h4>StokesProblem::run</h4>
2237  *
2238 
2239  *
2240  * The last step in the Stokes class is, as usual, the function that
2241  * generates the initial grid and calls the other functions in the
2242  * respective order.
2243  *
2244 
2245  *
2246  * We start off with a rectangle of size @f$4 \times 1@f$ (in 2d) or @f$4 \times 1
2247  * \times 1@f$ (in 3d), placed in @f$R^2/R^3@f$ as @f$(-2,2)\times(-1,0)@f$ or
2248  * @f$(-2,2)\times(0,1)\times(-1,0)@f$, respectively. It is natural to start
2249  * with equal mesh size in each direction, so we subdivide the initial
2250  * rectangle four times in the first coordinate direction. To limit the
2251  * scope of the variables involved in the creation of the mesh to the range
2252  * where we actually need them, we put the entire block between a pair of
2253  * braces:
2254  *
2255  * @code
2256  * template <int dim>
2257  * void StokesProblem<dim>::run()
2258  * {
2259  * {
2260  * std::vector<unsigned int> subdivisions(dim, 1);
2261  * subdivisions[0] = 4;
2262  *
2263  * const Point<dim> bottom_left = (dim == 2 ?
2264  * Point<dim>(-2, -1) : // 2d case
2265  * Point<dim>(-2, 0, -1)); // 3d case
2266  *
2267  * const Point<dim> top_right = (dim == 2 ?
2268  * Point<dim>(2, 0) : // 2d case
2269  * Point<dim>(2, 1, 0)); // 3d case
2270  *
2271  * GridGenerator::subdivided_hyper_rectangle(triangulation,
2272  * subdivisions,
2273  * bottom_left,
2274  * top_right);
2275  * }
2276  *
2277  * @endcode
2278  *
2279  * A boundary indicator of 1 is set to all boundaries that are subject to
2280  * Dirichlet boundary conditions, i.e. to faces that are located at 0 in
2281  * the last coordinate direction. See the example description above for
2282  * details.
2283  *
2284  * @code
2285  * for (const auto &cell : triangulation.active_cell_iterators())
2286  * for (const auto &face : cell->face_iterators())
2287  * if (face->center()[dim - 1] == 0)
2288  * face->set_all_boundary_ids(1);
2289  *
2290  *
2291  * @endcode
2292  *
2293  * We then apply an initial refinement before solving for the first
2294  * time. In 3D, there are going to be more degrees of freedom, so we
2295  * refine less there:
2296  *
2297  * @code
2298  * triangulation.refine_global(4 - dim);
2299  *
2300  * @endcode
2301  *
2302  * As first seen in @ref step_6 "step-6", we cycle over the different refinement levels
2303  * and refine (except for the first cycle), setup the degrees of freedom
2304  * and matrices, assemble, solve and create output:
2305  *
2306  * @code
2307  * for (unsigned int refinement_cycle = 0; refinement_cycle < 6;
2308  * ++refinement_cycle)
2309  * {
2310  * std::cout << "Refinement cycle " << refinement_cycle << std::endl;
2311  *
2312  * if (refinement_cycle > 0)
2313  * refine_mesh();
2314  *
2315  * setup_dofs();
2316  *
2317  * std::cout << " Assembling..." << std::endl << std::flush;
2318  * assemble_system();
2319  *
2320  * std::cout << " Solving..." << std::flush;
2321  * solve();
2322  *
2323  * output_results(refinement_cycle);
2324  *
2325  * std::cout << std::endl;
2326  * }
2327  * }
2328  * } // namespace Step22
2329  *
2330  *
2331  * @endcode
2332  *
2333  *
2334  * <a name="Thecodemaincodefunction"></a>
2335  * <h3>The <code>main</code> function</h3>
2336  *
2337 
2338  *
2339  * The main function is the same as in @ref step_20 "step-20". We pass the element degree as
2340  * a parameter and choose the space dimension at the well-known template slot.
2341  *
2342  * @code
2343  * int main()
2344  * {
2345  * try
2346  * {
2347  * using namespace Step22;
2348  *
2349  * StokesProblem<2> flow_problem(1);
2350  * flow_problem.run();
2351  * }
2352  * catch (std::exception &exc)
2353  * {
2354  * std::cerr << std::endl
2355  * << std::endl
2356  * << "----------------------------------------------------"
2357  * << std::endl;
2358  * std::cerr << "Exception on processing: " << std::endl
2359  * << exc.what() << std::endl
2360  * << "Aborting!" << std::endl
2361  * << "----------------------------------------------------"
2362  * << std::endl;
2363  *
2364  * return 1;
2365  * }
2366  * catch (...)
2367  * {
2368  * std::cerr << std::endl
2369  * << std::endl
2370  * << "----------------------------------------------------"
2371  * << std::endl;
2372  * std::cerr << "Unknown exception!" << std::endl
2373  * << "Aborting!" << std::endl
2374  * << "----------------------------------------------------"
2375  * << std::endl;
2376  * return 1;
2377  * }
2378  *
2379  * return 0;
2380  * }
2381  * @endcode
2382 <a name="Results"></a>
2383 <a name="Results"></a><h1>Results</h1>
2384 
2385 
2386 <a name="Outputoftheprogramandgraphicalvisualization"></a><h3>Output of the program and graphical visualization</h3>
2387 
2388 
2389 <a name="2Dcalculations"></a><h4>2D calculations</h4>
2390 
2391 
2392 Running the program with the space dimension set to 2 in the <code>main</code>
2393 function yields the following output (in "release mode",
2394 See also <a href="http://www.math.colostate.edu/~bangerth/videos.676.18.html">video lecture 18</a>.):
2395 @code
2396 examples/step-22> make run
2397 Refinement cycle 0
2398  Number of active cells: 64
2399  Number of degrees of freedom: 679 (594+85)
2400  Assembling...
2401  Computing preconditioner...
2402  Solving... 11 outer CG Schur complement iterations for pressure
2403 
2404 Refinement cycle 1
2405  Number of active cells: 160
2406  Number of degrees of freedom: 1683 (1482+201)
2407  Assembling...
2408  Computing preconditioner...
2409  Solving... 11 outer CG Schur complement iterations for pressure
2410 
2411 Refinement cycle 2
2412  Number of active cells: 376
2413  Number of degrees of freedom: 3813 (3370+443)
2414  Assembling...
2415  Computing preconditioner...
2416  Solving... 11 outer CG Schur complement iterations for pressure
2417 
2418 Refinement cycle 3
2419  Number of active cells: 880
2420  Number of degrees of freedom: 8723 (7722+1001)
2421  Assembling...
2422  Computing preconditioner...
2423  Solving... 11 outer CG Schur complement iterations for pressure
2424 
2425 Refinement cycle 4
2426  Number of active cells: 2008
2427  Number of degrees of freedom: 19383 (17186+2197)
2428  Assembling...
2429  Computing preconditioner...
2430  Solving... 11 outer CG Schur complement iterations for pressure
2431 
2432 Refinement cycle 5
2433  Number of active cells: 4288
2434  Number of degrees of freedom: 40855 (36250+4605)
2435  Assembling...
2436  Computing preconditioner...
2437  Solving... 11 outer CG Schur complement iterations for pressure
2438 @endcode
2439 
2440 The entire computation above takes about 2 seconds on a reasonably
2441 quick (for 2015 standards) machine.
2442 
2443 What we see immediately from this is that the number of (outer)
2444 iterations does not increase as we refine the mesh. This confirms the
2445 statement in the introduction that preconditioning the Schur
2446 complement with the mass matrix indeed yields a matrix spectrally
2447 equivalent to the identity matrix (i.e. with eigenvalues bounded above
2448 and below independently of the mesh size or the relative sizes of
2449 cells). In other words, the mass matrix and the Schur complement are
2450 spectrally equivalent.
2451 
2452 In the images below, we show the grids for the first six refinement
2453 steps in the program. Observe how the grid is refined in regions
2454 where the solution rapidly changes: On the upper boundary, we have
2455 Dirichlet boundary conditions that are -1 in the left half of the line
2456 and 1 in the right one, so there is an abrupt change at @f$x=0@f$. Likewise,
2457 there are changes from Dirichlet to Neumann data in the two upper
2458 corners, so there is need for refinement there as well:
2459 
2460 <table width="60%" align="center">
2461  <tr>
2462  <td align="center">
2463  <img src="https://www.dealii.org/images/steps/developer/step-22.2d.mesh-0.png" alt="">
2464  </td>
2465  <td align="center">
2466  <img src="https://www.dealii.org/images/steps/developer/step-22.2d.mesh-1.png" alt="">
2467  </td>
2468  </tr>
2469  <tr>
2470  <td align="center">
2471  <img src="https://www.dealii.org/images/steps/developer/step-22.2d.mesh-2.png" alt="">
2472  </td>
2473  <td align="center">
2474  <img src="https://www.dealii.org/images/steps/developer/step-22.2d.mesh-3.png" alt="">
2475  </td>
2476  </tr>
2477  <tr>
2478  <td align="center">
2479  <img src="https://www.dealii.org/images/steps/developer/step-22.2d.mesh-4.png" alt="">
2480  </td>
2481  <td align="center">
2482  <img src="https://www.dealii.org/images/steps/developer/step-22.2d.mesh-5.png" alt="">
2483  </td>
2484  </tr>
2485 </table>
2486 
2487 Finally, following is a plot of the flow field. It shows fluid
2488 transported along with the moving upper boundary and being replaced by
2489 material coming from below:
2490 
2491 <img src="https://www.dealii.org/images/steps/developer/step-22.2d.solution.png" alt="">
2492 
2493 This plot uses the capability of VTK-based visualization programs (in
2494 this case of VisIt) to show vector data; this is the result of us
2495 declaring the velocity components of the finite element in use to be a
2496 set of vector components, rather than independent scalar components in
2497 the <code>StokesProblem@<dim@>::%output_results</code> function of this
2498 tutorial program.
2499 
2500 
2501 
2502 <a name="3Dcalculations"></a><h4>3D calculations</h4>
2503 
2504 
2505 In 3d, the screen output of the program looks like this:
2506 
2507 @code
2508 Refinement cycle 0
2509  Number of active cells: 32
2510  Number of degrees of freedom: 1356 (1275+81)
2511  Assembling...
2512  Computing preconditioner...
2513  Solving... 13 outer CG Schur complement iterations for pressure.
2514 
2515 Refinement cycle 1
2516  Number of active cells: 144
2517  Number of degrees of freedom: 5088 (4827+261)
2518  Assembling...
2519  Computing preconditioner...
2520  Solving... 14 outer CG Schur complement iterations for pressure.
2521 
2522 Refinement cycle 2
2523  Number of active cells: 704
2524  Number of degrees of freedom: 22406 (21351+1055)
2525  Assembling...
2526  Computing preconditioner...
2527  Solving... 14 outer CG Schur complement iterations for pressure.
2528 
2529 Refinement cycle 3
2530  Number of active cells: 3168
2531  Number of degrees of freedom: 93176 (89043+4133)
2532  Assembling...
2533  Computing preconditioner...
2534  Solving... 15 outer CG Schur complement iterations for pressure.
2535 
2536 Refinement cycle 4
2537  Number of active cells: 11456
2538  Number of degrees of freedom: 327808 (313659+14149)
2539  Assembling...
2540  Computing preconditioner...
2541  Solving... 15 outer CG Schur complement iterations for pressure.
2542 
2543 Refinement cycle 5
2544  Number of active cells: 45056
2545  Number of degrees of freedom: 1254464 (1201371+53093)
2546  Assembling...
2547  Computing preconditioner...
2548  Solving... 14 outer CG Schur complement iterations for pressure.
2549 @endcode
2550 
2551 Again, we see that the number of outer iterations does not increase as
2552 we refine the mesh. Nevertheless, the compute time increases
2553 significantly: for each of the iterations above separately, it takes about
2554 0.14 seconds, 0.63 seconds, 4.8 seconds, 35 seconds, 2 minutes and 33 seconds,
2555 and 13 minutes and 12 seconds. This overall superlinear (in the number of
2556 unknowns) increase in runtime is due to the fact that our inner solver is not
2557 @f${\cal O}(N)@f$: a simple experiment shows that as we keep refining the mesh, the
2558 average number of ILU-preconditioned CG iterations to invert the
2559 velocity-velocity block @f$A@f$ increases.
2560 
2561 We will address the question of how possibly to improve our solver <a
2562 href="#improved-solver">below</a>.
2563 
2564 As for the graphical output, the grids generated during the solution
2565 look as follow:
2566 
2567 <table width="60%" align="center">
2568  <tr>
2569  <td align="center">
2570  <img src="https://www.dealii.org/images/steps/developer/step-22.3d.mesh-0.png" alt="">
2571  </td>
2572  <td align="center">
2573  <img src="https://www.dealii.org/images/steps/developer/step-22.3d.mesh-1.png" alt="">
2574  </td>
2575  </tr>
2576  <tr>
2577  <td align="center">
2578  <img src="https://www.dealii.org/images/steps/developer/step-22.3d.mesh-2.png" alt="">
2579  </td>
2580  <td align="center">
2581  <img src="https://www.dealii.org/images/steps/developer/step-22.3d.mesh-3.png" alt="">
2582  </td>
2583  </tr>
2584  <tr>
2585  <td align="center">
2586  <img src="https://www.dealii.org/images/steps/developer/step-22.3d.mesh-4.png" alt="">
2587  </td>
2588  <td align="center">
2589  <img src="https://www.dealii.org/images/steps/developer/step-22.3d.mesh-5.png" alt="">
2590  </td>
2591  </tr>
2592 </table>
2593 
2594 Again, they show essentially the location of singularities introduced
2595 by boundary conditions. The vector field computed makes for an
2596 interesting graph:
2597 
2598 <img src="https://www.dealii.org/images/steps/developer/step-22.3d.solution.png" alt="">
2599 
2600 The isocontours shown here as well are those of the pressure
2601 variable, showing the singularity at the point of discontinuous
2602 velocity boundary conditions.
2603 
2604 
2605 
2606 <a name="Sparsitypattern"></a><h3>Sparsity pattern</h3>
2607 
2608 
2609 As explained during the generation of the sparsity pattern, it is
2610 important to have the numbering of degrees of freedom in mind when
2611 using preconditioners like incomplete LU decompositions. This is most
2612 conveniently visualized using the distribution of nonzero elements in
2613 the stiffness matrix.
2614 
2615 If we don't do anything special to renumber degrees of freedom (i.e.,
2616 without using DoFRenumbering::Cuthill_McKee, but with using
2617 DoFRenumbering::component_wise to ensure that degrees of freedom are
2618 appropriately sorted into their corresponding blocks of the matrix and
2619 vector), then we get the following image after the first adaptive
2620 refinement in two dimensions:
2621 
2622 <img src="https://www.dealii.org/images/steps/developer/step-22.2d.sparsity-nor.png" alt="">
2623 
2624 In order to generate such a graph, you have to insert a piece of
2625 code like the following to the end of the setup step.
2626 @code
2627  {
2628  std::ofstream out ("sparsity_pattern.gpl");
2629  sparsity_pattern.print_gnuplot(out);
2630  }
2631 @endcode
2632 
2633 It is clearly visible that the nonzero entries are spread over almost the
2634 whole matrix. This makes preconditioning by ILU inefficient: ILU generates a
2635 Gaussian elimination (LU decomposition) without fill-in elements, which means
2636 that more tentative fill-ins left out will result in a worse approximation of
2637 the complete decomposition.
2638 
2639 In this program, we have thus chosen a more advanced renumbering of
2640 components. The renumbering with DoFRenumbering::Cuthill_McKee and grouping
2641 the components into velocity and pressure yields the following output:
2642 
2643 <img src="https://www.dealii.org/images/steps/developer/step-22.2d.sparsity-ren.png" alt="">
2644 
2645 It is apparent that the situation has improved a lot. Most of the elements are
2646 now concentrated around the diagonal in the (0,0) block in the matrix. Similar
2647 effects are also visible for the other blocks. In this case, the ILU
2648 decomposition will be much closer to the full LU decomposition, which improves
2649 the quality of the preconditioner. (It may be interesting to note that the
2650 sparse direct solver UMFPACK does some %internal renumbering of the equations
2651 before actually generating a sparse LU decomposition; that procedure leads to
2652 a very similar pattern to the one we got from the Cuthill-McKee algorithm.)
2653 
2654 Finally, we want to have a closer
2655 look at a sparsity pattern in 3D. We show only the (0,0) block of the
2656 matrix, again after one adaptive refinement. Apart from the fact that the matrix
2657 size has increased, it is also visible that there are many more entries
2658 in the matrix. Moreover, even for the optimized renumbering, there will be a
2659 considerable amount of tentative fill-in elements. This illustrates why UMFPACK
2660 is not a good choice in 3D - a full decomposition needs many new entries that
2661  eventually won't fit into the physical memory (RAM):
2662 
2663 <img src="https://www.dealii.org/images/steps/developer/step-22.3d.sparsity_uu-ren.png" alt="">
2664 
2665 
2666 
2667 <a name="Possibilitiesforextensions"></a><h3>Possibilities for extensions</h3>
2668 
2669 
2670 <a name="improved-solver">
2671 <a name="Improvedlinearsolverin3D"></a><h4>Improved linear solver in 3D</h4>
2672 
2673 </a>
2674 
2675 We have seen in the section of computational results that the number of outer
2676 iterations does not depend on the mesh size, which is optimal in a sense of
2677 scalability. This does, however, not apply to the solver as a whole, as
2678 mentioned above:
2679 We did not look at the number of inner iterations when generating the inverse of
2680 the matrix @f$A@f$ and the mass matrix @f$M_p@f$. Of course, this is unproblematic in
2681 the 2D case where we precondition @f$A@f$ with a direct solver and the
2682 <code>vmult</code> operation of the inverse matrix structure will converge in
2683 one single CG step, but this changes in 3D where we only use an ILU
2684 preconditioner. There, the number of required preconditioned CG steps to
2685 invert @f$A@f$ increases as the mesh is refined, and each <code>vmult</code>
2686 operation involves on average approximately 14, 23, 36, 59, 75 and 101 inner
2687 CG iterations in the refinement steps shown above. (On the other hand,
2688 the number of iterations for applying the inverse pressure mass matrix is
2689 always around five, both in two and three dimensions.) To summarize, most work
2690 is spent on solving linear systems with the same matrix @f$A@f$ over and over again.
2691 What makes this look even worse is the fact that we
2692 actually invert a matrix that is about 95 percent the size of the total system
2693 matrix and stands for 85 percent of the non-zero entries in the sparsity
2694 pattern. Hence, the natural question is whether it is reasonable to solve a
2695 linear system with matrix @f$A@f$ for about 15 times when calculating the solution
2696 to the block system.
2697 
2698 The answer is, of course, that we can do that in a few other (most of the time
2699 better) ways.
2700 Nevertheless, it has to be remarked that an indefinite system as the one
2701 at hand puts indeed much higher
2702 demands on the linear algebra than standard elliptic problems as we have seen
2703 in the early tutorial programs. The improvements are still rather
2704 unsatisfactory, if one compares with an elliptic problem of similar
2705 size. Either way, we will introduce below a number of improvements to the
2706 linear solver, a discussion that we will re-consider again with additional
2707 options in the @ref step_31 "step-31" program.
2708 
2709 <a name="improved-ilu">
2710 <a name="BetterILUdecompositionbysmartreordering"></a><h5>Better ILU decomposition by smart reordering</h5>
2711 
2712 </a>
2713 A first attempt to improve the speed of the linear solution process is to choose
2714 a dof reordering that makes the ILU being closer to a full LU decomposition, as
2715 already mentioned in the in-code comments. The DoFRenumbering namespace compares
2716 several choices for the renumbering of dofs for the Stokes equations. The best
2717 result regarding the computing time was found for the King ordering, which is
2718 accessed through the call DoFRenumbering::boost::king_ordering. With that
2719 program, the inner solver needs considerably less operations, e.g. about 62
2720 inner CG iterations for the inversion of @f$A@f$ at cycle 4 compared to about 75
2721 iterations with the standard Cuthill-McKee-algorithm. Also, the computing time
2722 at cycle 4 decreased from about 17 to 11 minutes for the <code>solve()</code>
2723 call. However, the King ordering (and the orderings provided by the
2724 DoFRenumbering::boost namespace in general) has a serious drawback - it uses
2725 much more memory than the in-build deal versions, since it acts on abstract
2726 graphs rather than the geometry provided by the triangulation. In the present
2727 case, the renumbering takes about 5 times as much memory, which yields an
2728 infeasible algorithm for the last cycle in 3D with 1.2 million
2729 unknowns.
2730 
2731 <a name="BetterpreconditionerfortheinnerCGsolver"></a><h5>Better preconditioner for the inner CG solver</h5>
2732 
2733 Another idea to improve the situation even more would be to choose a
2734 preconditioner that makes CG for the (0,0) matrix @f$A@f$ converge in a
2735 mesh-independent number of iterations, say 10 to 30. We have seen such a
2736 candidate in @ref step_16 "step-16": multigrid.
2737 
2738 <a name="BlockSchurcomplementpreconditioner"></a><h5>Block Schur complement preconditioner</h5>
2739 
2740 <a name="block-schur"></a>
2741 Even with a good preconditioner for @f$A@f$, we still
2742 need to solve of the same linear system repeatedly (with different
2743 right hand sides, though) in order to make the Schur complement solve
2744 converge. The approach we are going to discuss here is how inner iteration
2745 and outer iteration can be combined. If we persist in calculating the Schur
2746 complement, there is no other possibility.
2747 
2748 The alternative is to attack the block system at once and use an approximate
2749 Schur complement as efficient preconditioner. The idea is as
2750 follows: If we find a block preconditioner @f$P@f$ such that the matrix
2751 @f{eqnarray*}
2752  P^{-1}\left(\begin{array}{cc}
2753  A & B^T \\ B & 0
2754  \end{array}\right)
2755 @f}
2756 is simple, then an iterative solver with that preconditioner will converge in a
2757 few iterations. Using the Schur complement @f$S = B A^{-1} B^T@f$, one finds that
2758 @f{eqnarray*}
2759  P^{-1}
2760  =
2761  \left(\begin{array}{cc}
2762  A^{-1} & 0 \\ S^{-1} B A^{-1} & -S^{-1}
2763  \end{array}\right)
2764 @f}
2765 would appear to be a good choice since
2766 @f{eqnarray*}
2767  P^{-1}\left(\begin{array}{cc}
2768  A & B^T \\ B & 0
2769  \end{array}\right)
2770  =
2771  \left(\begin{array}{cc}
2772  A^{-1} & 0 \\ S^{-1} B A^{-1} & -S^{-1}
2773  \end{array}\right)\cdot \left(\begin{array}{cc}
2774  A & B^T \\ B & 0
2775  \end{array}\right)
2776  =
2777  \left(\begin{array}{cc}
2778  I & A^{-1} B^T \\ 0 & I
2779  \end{array}\right).
2780 @f}
2781 This is the approach taken by the paper by Silvester and Wathen referenced
2782 to in the introduction (with the exception that Silvester and Wathen use
2783 right preconditioning). In this case, a Krylov-based iterative method would
2784 converge in one step only if exact inverses of @f$A@f$ and @f$S@f$ were applied,
2785 since all the eigenvalues are one (and the number of iterations in such a
2786 method is bounded by the number of distinct eigenvalues). Below, we will
2787 discuss the choice of an adequate solver for this problem. First, we are
2788 going to have a closer look at the implementation of the preconditioner.
2789 
2790 Since @f$P@f$ is aimed to be a preconditioner only, we shall use approximations to
2791 the inverse of the Schur complement @f$S@f$ and the matrix @f$A@f$. Hence, the Schur
2792 complement will be approximated by the pressure mass matrix @f$M_p@f$, and we use
2793 a preconditioner to @f$A@f$ (without an InverseMatrix class around it) for
2794 approximating @f$A^{-1}@f$.
2795 
2796 Here comes the class that implements the block Schur
2797 complement preconditioner. The <code>vmult</code> operation for block vectors
2798 according to the derivation above can be specified by three successive
2799 operations:
2800 @code
2801 template <class PreconditionerA, class PreconditionerMp>
2802 class BlockSchurPreconditioner : public Subscriptor
2803 {
2804  public:
2805  BlockSchurPreconditioner (const BlockSparseMatrix<double> &S,
2806  const InverseMatrix<SparseMatrix<double>,PreconditionerMp> &Mpinv,
2807  const PreconditionerA &Apreconditioner);
2808 
2809  void vmult (BlockVector<double> &dst,
2810  const BlockVector<double> &src) const;
2811 
2812  private:
2813  const SmartPointer<const BlockSparseMatrix<double> > system_matrix;
2815  PreconditionerMp > > m_inverse;
2816  const PreconditionerA &a_preconditioner;
2817 
2818  mutable Vector<double> tmp;
2819 
2820 };
2821 
2822 template <class PreconditionerA, class PreconditionerMp>
2823 BlockSchurPreconditioner<PreconditionerA, PreconditionerMp>::BlockSchurPreconditioner(
2824  const BlockSparseMatrix<double> &S,
2825  const InverseMatrix<SparseMatrix<double>,PreconditionerMp> &Mpinv,
2826  const PreconditionerA &Apreconditioner
2827  )
2828  :
2829  system_matrix (&S),
2830  m_inverse (&Mpinv),
2831  a_preconditioner (Apreconditioner),
2832  tmp (S.block(1,1).m())
2833 {}
2834 
2835  // Now the interesting function, the multiplication of
2836  // the preconditioner with a BlockVector.
2837 template <class PreconditionerA, class PreconditionerMp>
2838 void BlockSchurPreconditioner<PreconditionerA, PreconditionerMp>::vmult (
2839  BlockVector<double> &dst,
2840  const BlockVector<double> &src) const
2841 {
2842  // Form u_new = A^{-1} u
2843  a_preconditioner.vmult (dst.block(0), src.block(0));
2844  // Form tmp = - B u_new + p
2845  // (<code>SparseMatrix::residual</code>
2846  // does precisely this)
2847  system_matrix->block(1,0).residual(tmp, dst.block(0), src.block(1));
2848  // Change sign in tmp
2849  tmp *= -1;
2850  // Multiply by approximate Schur complement
2851  // (i.e. a pressure mass matrix)
2852  m_inverse->vmult (dst.block(1), tmp);
2853 }
2854 @endcode
2855 
2856 Since we act on the whole block system now, we have to live with one
2857 disadvantage: we need to perform the solver iterations on
2858 the full block system instead of the smaller pressure space.
2859 
2860 Now we turn to the question which solver we should use for the block
2861 system. The first observation is that the resulting preconditioned matrix cannot
2862 be solved with CG since it is neither positive definite nor symmetric.
2863 
2864 The deal.II libraries implement several solvers that are appropriate for the
2865 problem at hand. One choice is the solver @ref SolverBicgstab "BiCGStab", which
2866 was used for the solution of the unsymmetric advection problem in @ref step_9 "step-9". The
2867 second option, the one we are going to choose, is @ref SolverGMRES "GMRES"
2868 (generalized minimum residual). Both methods have their pros and cons - there
2869 are problems where one of the two candidates clearly outperforms the other, and
2870 vice versa.
2871 <a href="http://en.wikipedia.org/wiki/GMRES#Comparison_with_other_solvers">Wikipedia</a>'s
2872 article on the GMRES method gives a comparative presentation.
2873 A more comprehensive and well-founded comparison can be read e.g. in the book by
2874 J.W. Demmel (Applied Numerical Linear Algebra, SIAM, 1997, section 6.6.6).
2875 
2876 For our specific problem with the ILU preconditioner for @f$A@f$, we certainly need
2877 to perform hundreds of iterations on the block system for large problem sizes
2878 (we won't beat CG!). Actually, this disfavors GMRES: During the GMRES
2879 iterations, a basis of Krylov vectors is successively built up and some
2880 operations are performed on these vectors. The more vectors are in this basis,
2881 the more operations and memory will be needed. The number of operations scales
2882 as @f${\cal O}(n + k^2)@f$ and memory as @f${\cal O}(kn)@f$, where @f$k@f$ is the number of
2883 vectors in the Krylov basis and @f$n@f$ the size of the (block) matrix.
2884 To not let these demands grow excessively, deal.II limits the size @f$k@f$ of the
2885 basis to 30 vectors by default.
2886 Then, the basis is rebuilt. This implementation of the GMRES method is called
2887 GMRES(k), with default @f$k=30@f$. What we have gained by this restriction,
2888 namely a bound on operations and memory requirements, will be compensated by
2889 the fact that we use an incomplete basis - this will increase the number of
2890 required iterations.
2891 
2892 BiCGStab, on the other hand, won't get slower when many iterations are needed
2893 (one iteration uses only results from one preceding step and
2894 not all the steps as GMRES). Besides the fact the BiCGStab is more expensive per
2895 step since two matrix-vector products are needed (compared to one for
2896 CG or GMRES), there is one main reason which makes BiCGStab not appropriate for
2897 this problem: The preconditioner applies the inverse of the pressure
2898 mass matrix by using the InverseMatrix class. Since the application of the
2899 inverse matrix to a vector is done only in approximative way (an exact inverse
2900 is too expensive), this will also affect the solver. In the case of BiCGStab,
2901 the Krylov vectors will not be orthogonal due to that perturbation. While
2902 this is uncritical for a small number of steps (up to about 50), it ruins the
2903 performance of the solver when these perturbations have grown to a significant
2904 magnitude in the coarse of iterations.
2905 
2906 We did some experiments with BiCGStab and found it to
2907 be faster than GMRES up to refinement cycle 3 (in 3D), but it became very slow
2908 for cycles 4 and 5 (even slower than the original Schur complement), so the
2909 solver is useless in this situation. Choosing a sharper tolerance for the
2910 inverse matrix class (<code>1e-10*src.l2_norm()</code> instead of
2911 <code>1e-6*src.l2_norm()</code>) made BiCGStab perform well also for cycle 4,
2912 but did not change the failure on the very large problems.
2913 
2914 GMRES is of course also effected by the approximate inverses, but it is not as
2915 sensitive to orthogonality and retains a relatively good performance also for
2916 large sizes, see the results below.
2917 
2918 With this said, we turn to the realization of the solver call with GMRES with
2919 @f$k=100@f$ temporary vectors:
2920 
2921 @code
2922  const SparseMatrix<double> &pressure_mass_matrix
2923  = preconditioner_matrix.block(1,1);
2924  SparseILU<double> pmass_preconditioner;
2925  pmass_preconditioner.initialize (pressure_mass_matrix,
2926  SparseILU<double>::AdditionalData());
2927 
2928  InverseMatrix<SparseMatrix<double>,SparseILU<double> >
2929  m_inverse (pressure_mass_matrix, pmass_preconditioner);
2930 
2931  BlockSchurPreconditioner<typename InnerPreconditioner<dim>::type,
2932  SparseILU<double> >
2933  preconditioner (system_matrix, m_inverse, *A_preconditioner);
2934 
2935  SolverControl solver_control (system_matrix.m(),
2936  1e-6*system_rhs.l2_norm());
2937  GrowingVectorMemory<BlockVector<double> > vector_memory;
2938  SolverGMRES<BlockVector<double> >::AdditionalData gmres_data;
2939  gmres_data.max_n_tmp_vectors = 100;
2940 
2941  SolverGMRES<BlockVector<double> > gmres(solver_control, vector_memory,
2942  gmres_data);
2943 
2944  gmres.solve(system_matrix, solution, system_rhs,
2945  preconditioner);
2946 
2947  constraints.distribute (solution);
2948 
2949  std::cout << " "
2950  << solver_control.last_step()
2951  << " block GMRES iterations";
2952 @endcode
2953 
2954 Obviously, one needs to add the include file @ref SolverGMRES
2955 "<lac/solver_gmres.h>" in order to make this run.
2956 We call the solver with a BlockVector template in order to enable
2957 GMRES to operate on block vectors and matrices.
2958 Note also that we need to set the (1,1) block in the system
2959 matrix to zero (we saved the pressure mass matrix there which is not part of the
2960 problem) after we copied the information to another matrix.
2961 
2962 Using the Timer class, we collect some statistics that compare the runtime
2963 of the block solver with the one from the problem implementation above.
2964 Besides the solution with the two options we also check if the solutions
2965 of the two variants are close to each other (i.e. this solver gives indeed the
2966 same solution as we had before) and calculate the infinity
2967 norm of the vector difference.
2968 
2969 Let's first see the results in 2D:
2970 @code
2971 Refinement cycle 0
2972  Number of active cells: 64
2973  Number of degrees of freedom: 679 (594+85) [0.00162792 s]
2974  Assembling... [0.00108981 s]
2975  Computing preconditioner... [0.0025959 s]
2976  Solving...
2977  Schur complement: 11 outer CG iterations for p [0.00479603s ]
2978  Block Schur preconditioner: 12 GMRES iterations [0.00441718 s]
2979  l_infinity difference between solution vectors: 5.38258e-07
2980 
2981 Refinement cycle 1
2982  Number of active cells: 160
2983  Number of degrees of freedom: 1683 (1482+201) [0.00345707 s]
2984  Assembling... [0.00237417 s]
2985  Computing preconditioner... [0.00605702 s]
2986  Solving...
2987  Schur complement: 11 outer CG iterations for p [0.0123992s ]
2988  Block Schur preconditioner: 12 GMRES iterations [0.011909 s]
2989  l_infinity difference between solution vectors: 1.74658e-05
2990 
2991 Refinement cycle 2
2992  Number of active cells: 376
2993  Number of degrees of freedom: 3813 (3370+443) [0.00729299 s]
2994  Assembling... [0.00529909 s]
2995  Computing preconditioner... [0.0167508 s]
2996  Solving...
2997  Schur complement: 11 outer CG iterations for p [0.031672s ]
2998  Block Schur preconditioner: 12 GMRES iterations [0.029232 s]
2999  l_infinity difference between solution vectors: 7.81569e-06
3000 
3001 Refinement cycle 3
3002  Number of active cells: 880
3003  Number of degrees of freedom: 8723 (7722+1001) [0.017709 s]
3004  Assembling... [0.0126002 s]
3005  Computing preconditioner... [0.0435679 s]
3006  Solving...
3007  Schur complement: 11 outer CG iterations for p [0.0971651s ]
3008  Block Schur preconditioner: 12 GMRES iterations [0.0992041 s]
3009  l_infinity difference between solution vectors: 1.87249e-05
3010 
3011 Refinement cycle 4
3012  Number of active cells: 2008
3013  Number of degrees of freedom: 19383 (17186+2197) [0.039988 s]
3014  Assembling... [0.028281 s]
3015  Computing preconditioner... [0.118314 s]
3016  Solving...
3017  Schur complement: 11 outer CG iterations for p [0.252133s ]
3018  Block Schur preconditioner: 13 GMRES iterations [0.269125 s]
3019  l_infinity difference between solution vectors: 6.38657e-05
3020 
3021 Refinement cycle 5
3022  Number of active cells: 4288
3023  Number of degrees of freedom: 40855 (36250+4605) [0.0880702 s]
3024  Assembling... [0.0603511 s]
3025  Computing preconditioner... [0.278339 s]
3026  Solving...
3027  Schur complement: 11 outer CG iterations for p [0.53846s ]
3028  Block Schur preconditioner: 13 GMRES iterations [0.578667 s]
3029  l_infinity difference between solution vectors: 0.000173363
3030 @endcode
3031 
3032 We see that there is no huge difference in the solution time between the
3033 block Schur complement preconditioner solver and the Schur complement
3034 itself. The reason is simple: we used a direct solve as preconditioner for
3035 @f$A@f$ - so we cannot expect any gain by avoiding the inner iterations. We see
3036 that the number of iterations has slightly increased for GMRES, but all in
3037 all the two choices are fairly similar.
3038 
3039 The picture of course changes in 3D:
3040 
3041 @code
3042 Refinement cycle 0
3043  Number of active cells: 32
3044  Number of degrees of freedom: 1356 (1275+81) [0.00845218 s]
3045  Assembling... [0.019372 s]
3046  Computing preconditioner... [0.00712395 s]
3047  Solving...
3048  Schur complement: 13 outer CG iterations for p [0.0320101s ]
3049  Block Schur preconditioner: 22 GMRES iterations [0.0048759 s]
3050  l_infinity difference between solution vectors: 2.15942e-05
3051 
3052 Refinement cycle 1
3053  Number of active cells: 144
3054  Number of degrees of freedom: 5088 (4827+261) [0.0346942 s]
3055  Assembling... [0.0857739 s]
3056  Computing preconditioner... [0.0465031 s]
3057  Solving...
3058  Schur complement: 14 outer CG iterations for p [0.349258s ]
3059  Block Schur preconditioner: 35 GMRES iterations [0.048759 s]
3060  l_infinity difference between solution vectors: 1.77657e-05
3061 
3062 Refinement cycle 2
3063  Number of active cells: 704
3064  Number of degrees of freedom: 22406 (21351+1055) [0.175669 s]
3065  Assembling... [0.437447 s]
3066  Computing preconditioner... [0.286435 s]
3067  Solving...
3068  Schur complement: 14 outer CG iterations for p [3.65519s ]
3069  Block Schur preconditioner: 63 GMRES iterations [0.497787 s]
3070  l_infinity difference between solution vectors: 5.08078e-05
3071 
3072 Refinement cycle 3
3073  Number of active cells: 3168
3074  Number of degrees of freedom: 93176 (89043+4133) [0.790985 s]
3075  Assembling... [1.97598 s]
3076  Computing preconditioner... [1.4325 s]
3077  Solving...
3078  Schur complement: 15 outer CG iterations for p [29.9666s ]
3079  Block Schur preconditioner: 128 GMRES iterations [5.02645 s]
3080  l_infinity difference between solution vectors: 0.000119671
3081 
3082 Refinement cycle 4
3083  Number of active cells: 11456
3084  Number of degrees of freedom: 327808 (313659+14149) [3.44995 s]
3085  Assembling... [7.54772 s]
3086  Computing preconditioner... [5.46306 s]
3087  Solving...
3088  Schur complement: 15 outer CG iterations for p [139.987s ]
3089  Block Schur preconditioner: 255 GMRES iterations [38.0946 s]
3090  l_infinity difference between solution vectors: 0.00020793
3091 
3092 Refinement cycle 5
3093  Number of active cells: 45056
3094  Number of degrees of freedom: 1254464 (1201371+53093) [19.6795 s]
3095  Assembling... [28.6586 s]
3096  Computing preconditioner... [22.401 s]
3097  Solving...
3098  Schur complement: 14 outer CG iterations for p [796.767s ]
3099  Block Schur preconditioner: 524 GMRES iterations [355.597 s]
3100  l_infinity difference between solution vectors: 0.000501219
3101 @endcode
3102 
3103 Here, the block preconditioned solver is clearly superior to the Schur
3104 complement, but the advantage gets less for more mesh points. This is
3105 because GMRES(k) scales worse with the problem size than CG, as we discussed
3106 above. Nonetheless, the improvement by a factor of 3-6 for moderate problem
3107 sizes is quite impressive.
3108 
3109 
3110 <a name="Combiningtheblockpreconditionerandmultigrid"></a><h5>Combining the block preconditioner and multigrid</h5>
3111 
3112 An ultimate linear solver for this problem could be imagined as a
3113 combination of an optimal
3114 preconditioner for @f$A@f$ (e.g. multigrid) and the block preconditioner
3115 described above, which is the approach taken in the @ref step_31 "step-31"
3116 and @ref step_32 "step-32" tutorial programs (where we use an algebraic multigrid
3117 method) and @ref step_56 "step-56" (where we use a geometric multigrid method).
3118 
3119 
3120 <a name="Noblockmatricesandvectors"></a><h5>No block matrices and vectors</h5>
3121 
3122 Another possibility that can be taken into account is to not set up a block
3123 system, but rather solve the system of velocity and pressure all at once. The
3124 options are direct solve with UMFPACK (2D) or GMRES with ILU
3125 preconditioning (3D). It should be straightforward to try that.
3126 
3127 
3128 
3129 <a name="Moreinterestingtestcases"></a><h4>More interesting testcases</h4>
3130 
3131 
3132 The program can of course also serve as a basis to compute the flow in more
3133 interesting cases. The original motivation to write this program was for it to
3134 be a starting point for some geophysical flow problems, such as the
3135 movement of magma under places where continental plates drift apart (for
3136 example mid-ocean ridges). Of course, in such places, the geometry is more
3137 complicated than the examples shown above, but it is not hard to accommodate
3138 for that.
3139 
3140 For example, by using the following modification of the boundary values
3141 function
3142 @code
3143 template <int dim>
3144 double
3145 BoundaryValues<dim>::value (const Point<dim> &p,
3146  const unsigned int component) const
3147 {
3148  Assert (component < this->n_components,
3149  ExcIndexRange (component, 0, this->n_components));
3150 
3151  const double x_offset = std::atan(p[1]*4)/3;
3152 
3153  if (component == 0)
3154  return (p[0] < x_offset ? -1 : (p[0] > x_offset ? 1 : 0));
3155  return 0;
3156 }
3157 @endcode
3158 and the following way to generate the mesh as the domain
3159 @f$[-2,2]\times[-2,2]\times[-1,0]@f$
3160 @code
3161  std::vector<unsigned int> subdivisions (dim, 1);
3162  subdivisions[0] = 4;
3163  if (dim>2)
3164  subdivisions[1] = 4;
3165 
3166  const Point<dim> bottom_left = (dim == 2 ?
3167  Point<dim>(-2,-1) :
3168  Point<dim>(-2,-2,-1));
3169  const Point<dim> top_right = (dim == 2 ?
3170  Point<dim>(2,0) :
3171  Point<dim>(2,2,0));
3172 
3174  subdivisions,
3175  bottom_left,
3176  top_right);
3177 @endcode
3178 then we get images where the fault line is curved:
3179 <table width="60%" align="center">
3180  <tr>
3181  <td align="center">
3182  <img src="https://www.dealii.org/images/steps/developer/step-22.3d-extension.png" alt="">
3183  </td>
3184  <td align="center">
3185  <img src="https://www.dealii.org/images/steps/developer/step-22.3d-grid-extension.png" alt="">
3186  </td>
3187  </tr>
3188 </table>
3189  *
3190  *
3191 <a name="PlainProg"></a>
3192 <h1> The plain program</h1>
3193 @include "step-22.cc"
3194 */
Definition: point.h:111
constexpr SymmetricTensor< 2, dim, Number > invert(const SymmetricTensor< 2, dim, Number > &t)
std::array< Number, 1 > eigenvalues(const SymmetricTensor< 2, 1, Number > &T)
Point< 2 > second
Definition: grid_out.cc:4588
Point< 2 > first
Definition: grid_out.cc:4587
__global__ void set(Number *val, const Number s, const size_type N)
static ::ExceptionBase & ExcIndexRange(int arg1, int arg2, int arg3)
#define Assert(cond, exc)
Definition: exceptions.h:1465
BlockType & block(const unsigned int i)
void loop(ITERATOR begin, typename identity< ITERATOR >::type end, DOFINFO &dinfo, INFOBOX &info, const std::function< void(DOFINFO &, typename INFOBOX::CellInfo &)> &cell_worker, const std::function< void(DOFINFO &, typename INFOBOX::CellInfo &)> &boundary_worker, const std::function< void(DOFINFO &, DOFINFO &, typename INFOBOX::CellInfo &, typename INFOBOX::CellInfo &)> &face_worker, ASSEMBLER &assembler, const LoopControl &lctrl=LoopControl())
Definition: loop.h:439
void make_hanging_node_constraints(const DoFHandler< dim, spacedim > &dof_handler, AffineConstraints< number > &constraints)
void make_sparsity_pattern(const DoFHandler< dim, spacedim > &dof_handler, SparsityPatternType &sparsity_pattern, const AffineConstraints< number > &constraints=AffineConstraints< number >(), const bool keep_constrained_dofs=true, const types::subdomain_id subdomain_id=numbers::invalid_subdomain_id)
const Event initial
Definition: event.cc:65
void approximate(SynchronousIterators< std::tuple< typename DoFHandler< dim, spacedim >::active_cell_iterator, Vector< float >::iterator >> const &cell, const Mapping< dim, spacedim > &mapping, const DoFHandler< dim, spacedim > &dof_handler, const InputVector &solution, const unsigned int component)
Expression atan(const Expression &x)
void king_ordering(DoFHandler< dim, spacedim > &dof_handler, const bool reversed_numbering=false, const bool use_constraints=false)
void component_wise(DoFHandler< dim, spacedim > &dof_handler, const std::vector< unsigned int > &target_component=std::vector< unsigned int >())
void Cuthill_McKee(DoFHandler< dim, spacedim > &dof_handler, const bool reversed_numbering=false, const bool use_constraints=false, const std::vector< types::global_dof_index > &starting_indices=std::vector< types::global_dof_index >())
std::vector< types::global_dof_index > count_dofs_per_fe_block(const DoFHandler< dim, spacedim > &dof, const std::vector< unsigned int > &target_block=std::vector< unsigned int >())
Definition: dof_tools.cc:2047
std::vector< types::global_dof_index > count_dofs_per_fe_component(const DoFHandler< dim, spacedim > &dof_handler, const bool vector_valued_once=false, const std::vector< unsigned int > &target_component={})
Definition: dof_tools.cc:1943
void subdivided_hyper_rectangle(Triangulation< dim, spacedim > &tria, const std::vector< unsigned int > &repetitions, const Point< dim > &p1, const Point< dim > &p2, const bool colorize=false)
static const types::blas_int zero
@ matrix
Contents is actually a matrix.
static const char A
@ symmetric
Matrix is symmetric.
@ diagonal
Matrix is diagonal.
@ general
No special properties.
static const char T
static const types::blas_int one
static const char O
Point< spacedim > point(const gp_Pnt &p, const double tolerance=1e-10)
Definition: utilities.cc:188
SymmetricTensor< 2, dim, Number > d(const Tensor< 2, dim, Number > &F, const Tensor< 2, dim, Number > &dF_dt)
SymmetricTensor< 2, dim, Number > e(const Tensor< 2, dim, Number > &F)
constexpr ReturnType< rank, T >::value_type & extract(T &t, const ArrayType &indices)
void call(const std::function< RT()> &function, internal::return_value< RT > &ret_val)
VectorType::value_type * begin(VectorType &V)
VectorType::value_type * end(VectorType &V)
void interpolate_boundary_values(const Mapping< dim, spacedim > &mapping, const DoFHandler< dim, spacedim > &dof, const std::map< types::boundary_id, const Function< spacedim, number > * > &function_map, std::map< types::global_dof_index, number > &boundary_values, const ComponentMask &component_mask=ComponentMask())
int(&) functions(const void *v1, const void *v2)
Definition: types.h:32
const ::parallel::distributed::Triangulation< dim, spacedim > * triangulation