MueLu  Version of the Day
MueLu_CGSolver_def.hpp
Go to the documentation of this file.
1 // @HEADER
2 //
3 // ***********************************************************************
4 //
5 // MueLu: A package for multigrid based preconditioning
6 // Copyright 2012 Sandia Corporation
7 //
8 // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
9 // the U.S. Government retains certain rights in this software.
10 //
11 // Redistribution and use in source and binary forms, with or without
12 // modification, are permitted provided that the following conditions are
13 // met:
14 //
15 // 1. Redistributions of source code must retain the above copyright
16 // notice, this list of conditions and the following disclaimer.
17 //
18 // 2. Redistributions in binary form must reproduce the above copyright
19 // notice, this list of conditions and the following disclaimer in the
20 // documentation and/or other materials provided with the distribution.
21 //
22 // 3. Neither the name of the Corporation nor the names of the
23 // contributors may be used to endorse or promote products derived from
24 // this software without specific prior written permission.
25 //
26 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
27 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
29 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
30 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
31 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
32 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
33 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
34 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
35 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
36 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
37 //
38 // Questions? Contact
39 // Jonathan Hu (jhu@sandia.gov)
40 // Andrey Prokopenko (aprokop@sandia.gov)
41 // Ray Tuminaro (rstumin@sandia.gov)
42 //
43 // ***********************************************************************
44 //
45 // @HEADER
46 #ifndef MUELU_CGSOLVER_DEF_HPP
47 #define MUELU_CGSOLVER_DEF_HPP
48 
49 #include <Xpetra_MatrixFactory.hpp>
50 #include <Xpetra_MatrixMatrix.hpp>
51 
52 #include "MueLu_CGSolver_decl.hpp"
53 
54 #include "MueLu_Constraint.hpp"
55 #include "MueLu_Monitor.hpp"
56 #include "MueLu_Utilities.hpp"
57 
58 namespace MueLu {
59 
60  using Teuchos::rcp_const_cast;
61 
62  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
64  : nIts_(Its)
65  { }
66 
67  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
68  void CGSolver<Scalar, LocalOrdinal, GlobalOrdinal, Node>::Iterate(const Matrix& Aref, const Constraint& C, const Matrix& P0, RCP<Matrix>& finalP) const {
69  PrintMonitor m(*this, "CG iterations");
70 
71  if (nIts_ == 0) {
72  finalP = MatrixFactory2::BuildCopy(rcpFromRef(P0));
73  return;
74  }
75 
76  // Note: this function matrix notations follow Saad's "Iterative methods", ed. 2, pg. 246
77  // So, X is the unknown prolongator, P's are conjugate directions, Z's are preconditioned P's
78  RCP<const Matrix> A = rcpFromRef(Aref);
79 
80  RCP<Matrix> X, P, R, Z, AP;
81  RCP<Matrix> newX, tmpAP;
82 #ifndef TWO_ARG_MATRIX_ADD
83  RCP<Matrix> newR, newP;
84 #endif
85 
86  SC oldRZ, newRZ, alpha, beta, app;
87 
88  bool useTpetra = (A->getRowMap()->lib() == Xpetra::UseTpetra);
89 
90  Teuchos::FancyOStream& mmfancy = this->GetOStream(Statistics2);
91 
92  // T is used only for projecting onto
93  RCP<CrsMatrix> T_ = CrsMatrixFactory::Build(C.GetPattern());
94  T_->fillComplete(P0.getDomainMap(), P0.getRangeMap());
95  RCP<Matrix> T = rcp(new CrsMatrixWrap(T_));
96 
97  SC one = Teuchos::ScalarTraits<SC>::one();
98 
99  Teuchos::ArrayRCP<const SC> D = Utils::GetMatrixDiagonal(*A);
100 
101  // Initial P0 would only be used for multiplication
102  X = rcp_const_cast<Matrix>(rcpFromRef(P0));
103 
104  bool doFillComplete = true;
105  // bool optimizeStorage = false;
106  bool optimizeStorage = true;
107 
108  tmpAP = Xpetra::MatrixMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>::Multiply(*A, false, *X, false, mmfancy, doFillComplete, optimizeStorage);
109  C.Apply(*tmpAP, *T);
110 
111  // R_0 = -A*X_0
112  R = MatrixFactory2::BuildCopy(T);
113 #ifdef HAVE_MUELU_TPETRA
114  if (useTpetra)
115  Utils::Op2NonConstTpetraCrs(R)->resumeFill();
116 #endif
117  R->scale(-one);
118  if (useTpetra)
119  R->fillComplete(R->getDomainMap(), R->getRangeMap());
120 
121  // Z_0 = M^{-1}R_0
122  Z = MatrixFactory2::BuildCopy(R);
123  Utils::MyOldScaleMatrix(*Z, D, true, true, false);
124 
125  // P_0 = Z_0
126  P = MatrixFactory2::BuildCopy(Z);
127 
128  oldRZ = Frobenius(*R, *Z);
129 
130  for (size_t k = 0; k < nIts_; k++) {
131  // AP = constrain(A*P)
132  if (k == 0 || useTpetra) {
133  // Construct the MxM pattern from scratch
134  // This is done by default for Tpetra as the three argument version requires tmpAP
135  // to *not* be locally indexed which defeats the purpose
136  // TODO: need a three argument Tpetra version which allows reuse of already fill-completed matrix
137  tmpAP = Xpetra::MatrixMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>::Multiply(*A, false, *P, false, mmfancy, doFillComplete, optimizeStorage);
138  } else {
139  // Reuse the MxM pattern
140  tmpAP = Xpetra::MatrixMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>::Multiply(*A, false, *P, false, tmpAP, mmfancy, doFillComplete, optimizeStorage);
141  }
142  C.Apply(*tmpAP, *T);
143  AP = T;
144 
145  app = Frobenius(*AP, *P);
146  if (Teuchos::ScalarTraits<SC>::magnitude(app) < Teuchos::ScalarTraits<SC>::sfmin()) {
147  // It happens, for instance, if P = 0
148  // For example, if we use TentativePFactory for both nonzero pattern and initial guess
149  // I think it might also happen because of numerical breakdown, but we don't test for that yet
150  if (k == 0)
151  X = MatrixFactory2::BuildCopy(rcpFromRef(P0));
152  break;
153  }
154 
155  // alpha = (R_k, Z_k)/(A*P_k, P_k)
156  alpha = oldRZ / app;
157  this->GetOStream(Runtime1,1) << "alpha = " << alpha << std::endl;
158 
159  // X_{k+1} = X_k + alpha*P_k
160 #ifndef TWO_ARG_MATRIX_ADD
161  newX = Teuchos::null;
162  Xpetra::MatrixMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>::TwoMatrixAdd(*P, false, alpha, *X, false, Teuchos::ScalarTraits<Scalar>::one(), newX, mmfancy);
163  newX->fillComplete(P0.getDomainMap(), P0.getRangeMap());
164  X.swap(newX);
165 #else
166  Xpetra::MatrixMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>::TwoMatrixAdd(*P, false, alpha, *X, one);
167 #endif
168 
169  if (k == nIts_ - 1)
170  break;
171 
172  // R_{k+1} = R_k - alpha*A*P_k
173 #ifndef TWO_ARG_MATRIX_ADD
174  newR = Teuchos::null;
175  Xpetra::MatrixMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>::TwoMatrixAdd(*AP, false, -alpha, *R, false, Teuchos::ScalarTraits<Scalar>::one(), newR, mmfancy);
176  newR->fillComplete(P0.getDomainMap(), P0.getRangeMap());
177  R.swap(newR);
178 #else
179  Xpetra::MatrixMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>::TwoMatrixAdd(*AP, false, -alpha, *R, one);
180 #endif
181 
182  // Z_{k+1} = M^{-1} R_{k+1}
183  Z = MatrixFactory2::BuildCopy(R);
184  Utils::MyOldScaleMatrix(*Z, D, true, true, false);
185 
186  // beta = (R_{k+1}, Z_{k+1})/(R_k, Z_k)
187  newRZ = Frobenius(*R, *Z);
188  beta = newRZ / oldRZ;
189 
190  // P_{k+1} = Z_{k+1} + beta*P_k
191 #ifndef TWO_ARG_MATRIX_ADD
192  newP = Teuchos::null;
193  Xpetra::MatrixMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>::TwoMatrixAdd(*P, false, beta, *Z, false, Teuchos::ScalarTraits<Scalar>::one(), newP, mmfancy);
194  newP->fillComplete(P0.getDomainMap(), P0.getRangeMap());
195  P.swap(newP);
196 #else
197  Xpetra::MatrixMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>::TwoMatrixAdd(*Z, false, one, *P, beta);
198 #endif
199 
200  oldRZ = newRZ;
201  }
202 
203  finalP = X;
204  }
205 
206  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
207  Scalar CGSolver<Scalar, LocalOrdinal, GlobalOrdinal, Node>::Frobenius(const Matrix& A, const Matrix& B) const {
208  // We check only row maps. Column may be different. One would hope that they are the same, as we typically
209  // calculate frobenius norm of the specified sparsity pattern with an updated matrix from the previous step,
210  // but matrix addition, even when one is submatrix of the other, changes column map (though change may be as
211  // simple as couple of elements swapped)
212  TEUCHOS_TEST_FOR_EXCEPTION(!A.getRowMap()->isSameAs(*B.getRowMap()), Exceptions::Incompatible, "MueLu::CGSolver::Frobenius: row maps are incompatible");
213  TEUCHOS_TEST_FOR_EXCEPTION(!A.isFillComplete() || !B.isFillComplete(), Exceptions::RuntimeError, "Matrices must be fill completed");
214 
215  const Map& AColMap = *A.getColMap();
216  const Map& BColMap = *B.getColMap();
217 
218  Teuchos::ArrayView<const LO> indA, indB;
219  Teuchos::ArrayView<const SC> valA, valB;
220  size_t nnzA = 0, nnzB = 0;
221 
222  // We use a simple algorithm
223  // for each row we fill valBAll array with the values in the corresponding row of B
224  // as such, it serves as both sorted array and as storage, so we don't need to do a
225  // tricky problem: "find a value in the row of B corresponding to the specific GID"
226  // Once we do that, we translate LID of entries of row of A to LID of B, and multiply
227  // corresponding entries.
228  // The algorithm should be reasonably cheap, as it does not sort anything, provided
229  // that getLocalElement and getGlobalElement functions are reasonably effective. It
230  // *is* possible that the costs are hidden in those functions, but if maps are close
231  // to linear maps, we should be fine
232  Teuchos::Array<SC> valBAll(BColMap.getNodeNumElements());
233 
234  LO invalid = Teuchos::OrdinalTraits<LO>::invalid();
235  SC zero = Teuchos::ScalarTraits<SC> ::zero(), f = zero, gf;
236  size_t numRows = A.getNodeNumRows();
237  for (size_t i = 0; i < numRows; i++) {
238  A.getLocalRowView(i, indA, valA);
239  B.getLocalRowView(i, indB, valB);
240  nnzA = indA.size();
241  nnzB = indB.size();
242 
243  // Set up array values
244  for (size_t j = 0; j < nnzB; j++)
245  valBAll[indB[j]] = valB[j];
246 
247  for (size_t j = 0; j < nnzA; j++) {
248  // The cost of the whole Frobenius dot product function depends on the
249  // cost of the getLocalElement and getGlobalElement functions here.
250  LO ind = BColMap.getLocalElement(AColMap.getGlobalElement(indA[j]));
251  if (ind != invalid)
252  f += valBAll[ind] * valA[j];
253  }
254 
255  // Clean up array values
256  for (size_t j = 0; j < nnzB; j++)
257  valBAll[indB[j]] = zero;
258  }
259 
260  MueLu_sumAll(AColMap.getComm(), f, gf);
261 
262  return gf;
263  }
264 
265 } // namespace MueLu
266 
267 #endif //ifndef MUELU_CGSOLVER_DECL_HPP
Constraint space information for the potential prolongator.
Namespace for MueLu classes and methods.
Exception throws to report incompatible objects (like maps).
RCP< const CrsGraph > GetPattern() const
Print even more statistics.
void Iterate(const Matrix &A, const Constraint &C, const Matrix &P0, RCP< Matrix > &P) const
Iterate.
#define MueLu_sumAll(rcpComm, in, out)
static void MyOldScaleMatrix(Matrix &Op, const Teuchos::ArrayRCP< const SC > &scalingVector, bool doInverse=true, bool doFillComplete=true, bool doOptimizeStorage=true)
static RCP< Tpetra::CrsMatrix< SC, LO, GO, NO > > Op2NonConstTpetraCrs(RCP< Matrix > Op)
Teuchos::FancyOStream & GetOStream(MsgType type, int thisProcRankOnly=0) const
Get an output stream for outputting the input message type.
SC Frobenius(const Matrix &A, const Matrix &B) const
void Apply(const Matrix &P, Matrix &Projected) const
Apply constraint.
size_t nIts_
Number of performed iterations.
static Teuchos::ArrayRCP< SC > GetMatrixDiagonal(const Matrix &A)
Extract Matrix Diagonal.
Exception throws to report errors in the internal logical of the program.
Description of what is happening (more verbose)