MueLu  Version of the Day
MueLu_TentativePFactory_def.hpp
Go to the documentation of this file.
1 // @HEADER
2 //
3 // ***********************************************************************
4 //
5 // MueLu: A package for multigrid based preconditioning
6 // Copyright 2012 Sandia Corporation
7 //
8 // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
9 // the U.S. Government retains certain rights in this software.
10 //
11 // Redistribution and use in source and binary forms, with or without
12 // modification, are permitted provided that the following conditions are
13 // met:
14 //
15 // 1. Redistributions of source code must retain the above copyright
16 // notice, this list of conditions and the following disclaimer.
17 //
18 // 2. Redistributions in binary form must reproduce the above copyright
19 // notice, this list of conditions and the following disclaimer in the
20 // documentation and/or other materials provided with the distribution.
21 //
22 // 3. Neither the name of the Corporation nor the names of the
23 // contributors may be used to endorse or promote products derived from
24 // this software without specific prior written permission.
25 //
26 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
27 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
29 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
30 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
31 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
32 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
33 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
34 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
35 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
36 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
37 //
38 // Questions? Contact
39 // Jonathan Hu (jhu@sandia.gov)
40 // Andrey Prokopenko (aprokop@sandia.gov)
41 // Ray Tuminaro (rstumin@sandia.gov)
42 //
43 // ***********************************************************************
44 //
45 // @HEADER
46 #ifndef MUELU_TENTATIVEPFACTORY_DEF_HPP
47 #define MUELU_TENTATIVEPFACTORY_DEF_HPP
48 
49 #include <Xpetra_MapFactory.hpp>
50 #include <Xpetra_Map.hpp>
51 #include <Xpetra_CrsMatrix.hpp>
52 #include <Xpetra_Matrix.hpp>
53 #include <Xpetra_MultiVector.hpp>
54 #include <Xpetra_MultiVectorFactory.hpp>
55 #include <Xpetra_VectorFactory.hpp>
56 #include <Xpetra_Import.hpp>
57 #include <Xpetra_ImportFactory.hpp>
58 #include <Xpetra_CrsMatrixWrap.hpp>
59 #include <Xpetra_StridedMap.hpp>
60 #include <Xpetra_StridedMapFactory.hpp>
61 
63 
64 #include "MueLu_Aggregates.hpp"
65 #include "MueLu_AmalgamationFactory.hpp"
66 #include "MueLu_AmalgamationInfo.hpp"
67 #include "MueLu_CoarseMapFactory.hpp"
68 #include "MueLu_NullspaceFactory.hpp"
69 #include "MueLu_PerfUtils.hpp"
70 #include "MueLu_Monitor.hpp"
71 #include "MueLu_Utilities.hpp"
72 
73 namespace MueLu {
74 
75  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
77  RCP<ParameterList> validParamList = rcp(new ParameterList());
78 
79  validParamList->set< RCP<const FactoryBase> >("A", Teuchos::null, "Generating factory of the matrix A");
80  validParamList->set< RCP<const FactoryBase> >("Aggregates", Teuchos::null, "Generating factory of the aggregates");
81  validParamList->set< RCP<const FactoryBase> >("Nullspace", Teuchos::null, "Generating factory of the nullspace");
82  validParamList->set< RCP<const FactoryBase> >("UnAmalgamationInfo", Teuchos::null, "Generating factory of UnAmalgamationInfo");
83  validParamList->set< RCP<const FactoryBase> >("CoarseMap", Teuchos::null, "Generating factory of the coarse map");
84  return validParamList;
85  }
86 
87  template <class Scalar,class LocalOrdinal, class GlobalOrdinal, class Node>
89  Input(fineLevel, "A");
90  Input(fineLevel, "Aggregates");
91  Input(fineLevel, "Nullspace");
92  Input(fineLevel, "UnAmalgamationInfo");
93  Input(fineLevel, "CoarseMap");
94  }
95 
96  template <class Scalar,class LocalOrdinal, class GlobalOrdinal, class Node>
98  return BuildP(fineLevel, coarseLevel);
99  }
100 
101  template <class Scalar,class LocalOrdinal, class GlobalOrdinal, class Node>
103  FactoryMonitor m(*this, "Build", coarseLevel);
104 
105  RCP<Matrix> A = Get< RCP<Matrix> > (fineLevel, "A");
106  RCP<Aggregates> aggregates = Get< RCP<Aggregates> > (fineLevel, "Aggregates");
107  RCP<AmalgamationInfo> amalgInfo = Get< RCP<AmalgamationInfo> >(fineLevel, "UnAmalgamationInfo");
108  RCP<MultiVector> fineNullspace = Get< RCP<MultiVector> > (fineLevel, "Nullspace");
109  RCP<const Map> coarseMap = Get< RCP<const Map> > (fineLevel, "CoarseMap");
110 
111  RCP<Matrix> Ptentative;
112  RCP<MultiVector> coarseNullspace;
113  if (!aggregates->AggregatesCrossProcessors())
114  BuildPuncoupled(A, aggregates, amalgInfo, fineNullspace, coarseMap, Ptentative, coarseNullspace);
115  else
116  BuildPcoupled (A, aggregates, amalgInfo, fineNullspace, coarseMap, Ptentative, coarseNullspace);
117 
118  // If available, use striding information of fine level matrix A for range
119  // map and coarseMap as domain map; otherwise use plain range map of
120  // Ptent = plain range map of A for range map and coarseMap as domain map.
121  // NOTE:
122  // The latter is not really safe, since there is no striding information
123  // for the range map. This is not really a problem, since striding
124  // information is always available on the intermedium levels and the
125  // coarsest levels.
126  if (A->IsView("stridedMaps") == true)
127  Ptentative->CreateView("stridedMaps", A->getRowMap("stridedMaps"), coarseMap);
128  else
129  Ptentative->CreateView("stridedMaps", Ptentative->getRangeMap(), coarseMap);
130 
131  Set(coarseLevel, "Nullspace", coarseNullspace);
132  Set(coarseLevel, "P", Ptentative);
133 
134  if (IsPrint(Statistics1)) {
135  RCP<ParameterList> params = rcp(new ParameterList());
136  params->set("printLoadBalancingInfo", true);
137  GetOStream(Statistics1) << PerfUtils::PrintMatrixInfo(*Ptentative, "Ptent", params);
138  }
139  }
140 
141  template <class Scalar,class LocalOrdinal, class GlobalOrdinal, class Node>
143  BuildPuncoupled(RCP<Matrix> A, RCP<Aggregates> aggregates, RCP<AmalgamationInfo> amalgInfo, RCP<MultiVector> fineNullspace,
144  RCP<const Map> coarseMap, RCP<Matrix>& Ptentative, RCP<MultiVector>& coarseNullspace) const {
145  RCP<const Map> rowMap = A->getRowMap();
146  RCP<const Map> colMap = A->getColMap();
147 
148  const size_t numRows = rowMap->getNodeNumElements();
149 
150  RCP<const Teuchos::Comm<int> > comm = rowMap->getComm();
151 
152  typedef Teuchos::ScalarTraits<SC> STS;
153  typedef typename STS::magnitudeType Magnitude;
154  const SC zero = STS::zero();
155  const SC one = STS::one();
156  const LO INVALID = Teuchos::OrdinalTraits<LO>::invalid();
157 
158  const GO numAggs = aggregates->GetNumAggregates();
159  const size_t NSDim = fineNullspace->getNumVectors();
160 
161  // Aggregates map is based on the amalgamated column map
162  // We can skip global-to-local conversion if LIDs in row map are
163  // same as LIDs in column map
164  bool goodMap = isGoodMap(*rowMap, *colMap);
165 
166  // Create a lookup table to determine the rows (fine DOFs) that belong to a given aggregate.
167  // aggStart is a pointer into aggToRowMapLO
168  // aggStart[i]..aggStart[i+1] are indices into aggToRowMapLO
169  // aggToRowMapLO[aggStart[i]]..aggToRowMapLO[aggStart[i+1]-1] are the DOFs in aggregate i
170  ArrayRCP<LO> aggStart;
171  ArrayRCP<LO> aggToRowMapLO;
172  ArrayRCP<GO> aggToRowMapGO;
173  if (goodMap) {
174  amalgInfo->UnamalgamateAggregatesLO(*aggregates, aggStart, aggToRowMapLO);
175  GetOStream(Runtime1) << "Column map is consistent with the row map, good." << std::endl;
176 
177  } else {
178  amalgInfo->UnamalgamateAggregates(*aggregates, aggStart, aggToRowMapGO);
179  GetOStream(Warnings0) << "Column map is not consistent with the row map\n"
180  << "using GO->LO conversion with performance penalty" << std::endl;
181  }
182 
183  // Find largest aggregate size
184  LO maxAggSize = 0;
185  for (GO i = 0; i < numAggs; i++)
186  maxAggSize = std::max(maxAggSize, aggStart[i+1] - aggStart[i]);
187 
188  coarseNullspace = MultiVectorFactory::Build(coarseMap, NSDim);
189 
190  // Pull out the nullspace vectors so that we can have random access.
191  ArrayRCP<ArrayRCP<const SC> > fineNS (NSDim);
192  ArrayRCP<ArrayRCP<SC> > coarseNS(NSDim);
193  for (size_t i = 0; i < NSDim; i++) {
194  fineNS[i] = fineNullspace->getData(i);
195  if (coarseMap->getNodeNumElements() > 0)
196  coarseNS[i] = coarseNullspace->getDataNonConst(i);
197  }
198 
199  size_t nnzEstimate = numRows * NSDim;
200 
201  // Time to construct the matrix and fill in the values
202  Ptentative = rcp(new CrsMatrixWrap(rowMap, coarseMap, 0, Xpetra::StaticProfile));
203  RCP<CrsMatrix> PtentCrs = rcp_dynamic_cast<CrsMatrixWrap>(Ptentative)->getCrsMatrix();
204 
205  ArrayRCP<size_t> iaPtent;
206  ArrayRCP<LO> jaPtent;
207  ArrayRCP<SC> valPtent;
208 
209  PtentCrs->allocateAllValues(nnzEstimate, iaPtent, jaPtent, valPtent);
210 
211  ArrayView<size_t> ia = iaPtent();
212  ArrayView<LO> ja = jaPtent();
213  ArrayView<SC> val = valPtent();
214 
215  ia[0] = 0;
216  for (size_t i = 1; i <= numRows; i++)
217  ia[i] = ia[i-1] + NSDim;
218 
219  for (size_t j = 0; j < nnzEstimate; j++) {
220  ja [j] = INVALID;
221  val[j] = zero;
222  }
223 
224  for (GO agg = 0; agg < numAggs; agg++) {
225  LO aggSize = aggStart[agg+1] - aggStart[agg];
226 
227  Xpetra::global_size_t offset = agg*NSDim;
228 
229  // Extract the piece of the nullspace corresponding to the aggregate, and
230  // put it in the flat array, "localQR" (in column major format) for the
231  // QR routine.
232  Teuchos::SerialDenseMatrix<LO,SC> localQR(aggSize, NSDim);
233  if (goodMap) {
234  for (size_t j = 0; j < NSDim; j++)
235  for (LO k = 0; k < aggSize; k++)
236  localQR(k,j) = fineNS[j][aggToRowMapLO[aggStart[agg]+k]];
237  } else {
238  for (size_t j = 0; j < NSDim; j++)
239  for (LO k = 0; k < aggSize; k++)
240  localQR(k,j) = fineNS[j][rowMap->getLocalElement(aggToRowMapGO[aggStart[agg]+k])];
241  }
242 
243  // Test for zero columns
244  for (size_t j = 0; j < NSDim; j++) {
245  bool bIsZeroNSColumn = true;
246 
247  for (LO k = 0; k < aggSize; k++)
248  if (localQR(k,j) != zero)
249  bIsZeroNSColumn = false;
250 
251  TEUCHOS_TEST_FOR_EXCEPTION(bIsZeroNSColumn == true, Exceptions::RuntimeError,
252  "MueLu::TentativePFactory::MakeTentative: fine level NS part has a zero column");
253  }
254 
255  // Calculate QR decomposition (standard)
256  // NOTE: Q is stored in localQR and R is stored in coarseNS
257  if (aggSize >= Teuchos::as<LO>(NSDim)) {
258 
259  if (NSDim == 1) {
260  // Only one nullspace vector, calculate Q and R by hand
261  Magnitude norm = STS::magnitude(zero);
262  for (size_t k = 0; k < Teuchos::as<size_t>(aggSize); k++)
263  norm += STS::magnitude(localQR(k,0)*localQR(k,0));
264  norm = Teuchos::ScalarTraits<Magnitude>::squareroot(norm);
265 
266  // R = norm
267  coarseNS[0][offset] = norm;
268 
269  // Q = localQR(:,0)/norm
270  for (LO i = 0; i < aggSize; i++)
271  localQR(i,0) /= norm;
272 
273  } else {
274  Teuchos::SerialQRDenseSolver<LO,SC> qrSolver;
275  qrSolver.setMatrix(Teuchos::rcp(&localQR, false));
276  qrSolver.factor();
277 
278  // R = upper triangular part of localQR
279  for (size_t j = 0; j < NSDim; j++)
280  for (size_t k = 0; k <= j; k++)
281  coarseNS[j][offset+k] = localQR(k,j); //TODO is offset+k the correct local ID?!
282 
283  // Calculate Q, the tentative prolongator.
284  // The Lapack GEQRF call only works for myAggsize >= NSDim
285  qrSolver.formQ();
286  Teuchos::RCP<Teuchos::SerialDenseMatrix<LO,SC> > qFactor = qrSolver.getQ();
287  for (size_t j = 0; j < NSDim; j++)
288  for (size_t i = 0; i < Teuchos::as<size_t>(aggSize); i++)
289  localQR(i,j) = (*qFactor)(i,j);
290  }
291 
292  } else {
293  // Special handling for aggSize < NSDim (i.e. single node aggregates in structural mechanics)
294 
295  // The local QR decomposition is not possible in the "overconstrained"
296  // case (i.e. number of columns in localQR > number of rows), which
297  // corresponds to #DOFs in Aggregate < NSDim. For usual problems this
298  // is only possible for single node aggregates in structural mechanics.
299  // (Similar problems may arise in discontinuous Galerkin problems...)
300  // We bypass the QR decomposition and use an identity block in the
301  // tentative prolongator for the single node aggregate and transfer the
302  // corresponding fine level null space information 1-to-1 to the coarse
303  // level null space part.
304 
305  // NOTE: The resulting tentative prolongation operator has
306  // (aggSize*DofsPerNode-NSDim) zero columns leading to a singular
307  // coarse level operator A. To deal with that one has the following
308  // options:
309  // - Use the "RepairMainDiagonal" flag in the RAPFactory (default:
310  // false) to add some identity block to the diagonal of the zero rows
311  // in the coarse level operator A, such that standard level smoothers
312  // can be used again.
313  // - Use special (projection-based) level smoothers, which can deal
314  // with singular matrices (very application specific)
315  // - Adapt the code below to avoid zero columns. However, we do not
316  // support a variable number of DOFs per node in MueLu/Xpetra which
317  // makes the implementation really hard.
318 
319  // R = extended (by adding identity rows) localQR
320  for (size_t j = 0; j < NSDim; j++)
321  for (size_t k = 0; k < NSDim; k++)
322  if (k < as<size_t>(aggSize))
323  coarseNS[j][offset+k] = localQR(k,j);
324  else
325  coarseNS[j][offset+k] = (k == j ? one : zero);
326 
327  // Q = I (rectangular)
328  for (size_t i = 0; i < as<size_t>(aggSize); i++)
329  for (size_t j = 0; j < NSDim; j++)
330  localQR(i,j) = (j == i ? one : zero);
331  }
332 
333  // Process each row in the local Q factor
334  // FIXME: What happens if maps are blocked?
335  for (LO j = 0; j < aggSize; j++) {
336  LO localRow = (goodMap ? aggToRowMapLO[aggStart[agg]+j] : rowMap->getLocalElement(aggToRowMapGO[aggStart[agg]+j]));
337 
338  size_t rowStart = ia[localRow];
339  for (size_t k = 0, lnnz = 0; k < NSDim; k++) {
340  // Skip zeros (there may be plenty of them, i.e., NSDim > 1 or boundary conditions)
341  if (localQR(j,k) != zero) {
342  ja [rowStart+lnnz] = offset + k;
343  val[rowStart+lnnz] = localQR(j,k);
344  lnnz++;
345  }
346  }
347  }
348  }
349 
350  // Compress storage (remove all INVALID, which happen when we skip zeros)
351  // We do that in-place
352  size_t ia_tmp = 0, nnz = 0;
353  for (size_t i = 0; i < numRows; i++) {
354  for (size_t j = ia_tmp; j < ia[i+1]; j++)
355  if (ja[j] != INVALID) {
356  ja [nnz] = ja [j];
357  val[nnz] = val[j];
358  nnz++;
359  }
360  ia_tmp = ia[i+1];
361  ia[i+1] = nnz;
362  }
363  if (rowMap->lib() == Xpetra::UseTpetra) {
364  // - Cannot resize for Epetra, as it checks for same pointers
365  // - Need to resize for Tpetra, as it check ().size() == ia[numRows]
366  // NOTE: these invalidate ja and val views
367  jaPtent .resize(nnz);
368  valPtent.resize(nnz);
369  }
370 
371  GetOStream(Runtime1) << "TentativePFactory : aggregates do not cross process boundaries" << std::endl;
372 
373  PtentCrs->setAllValues(iaPtent, jaPtent, valPtent);
374  PtentCrs->expertStaticFillComplete(coarseMap, A->getDomainMap());
375  }
376 
377  template <class Scalar,class LocalOrdinal, class GlobalOrdinal, class Node>
379  BuildPcoupled(RCP<Matrix> A, RCP<Aggregates> aggregates, RCP<AmalgamationInfo> amalgInfo, RCP<MultiVector> fineNullspace,
380  RCP<const Map> coarseMap, RCP<Matrix>& Ptentative, RCP<MultiVector>& coarseNullspace) const {
381  RCP<const Teuchos::Comm<int> > comm = A->getRowMap()->getComm();
382 
383  typedef Teuchos::ScalarTraits<SC> STS;
384  typedef typename STS::magnitudeType Magnitude;
385  const SC zero = STS::zero();
386  const SC one = STS::one();
387 
388  // number of aggregates
389  GO numAggs = aggregates->GetNumAggregates();
390 
391  // Create a lookup table to determine the rows (fine DOFs) that belong to a given aggregate.
392  // aggStart is a pointer into aggToRowMap
393  // aggStart[i]..aggStart[i+1] are indices into aggToRowMap
394  // aggToRowMap[aggStart[i]]..aggToRowMap[aggStart[i+1]-1] are the DOFs in aggregate i
395  ArrayRCP<LO> aggStart;
396  ArrayRCP< GO > aggToRowMap;
397  amalgInfo->UnamalgamateAggregates(*aggregates, aggStart, aggToRowMap);
398 
399  // find size of the largest aggregate
400  LO maxAggSize=0;
401  for (GO i=0; i<numAggs; ++i) {
402  LO sizeOfThisAgg = aggStart[i+1] - aggStart[i];
403  if (sizeOfThisAgg > maxAggSize) maxAggSize = sizeOfThisAgg;
404  }
405 
406  // dimension of fine level nullspace
407  const size_t NSDim = fineNullspace->getNumVectors();
408 
409  // index base for coarse Dof map (usually 0)
410  GO indexBase=A->getRowMap()->getIndexBase();
411 
412  const RCP<const Map> nonUniqueMap = amalgInfo->ComputeUnamalgamatedImportDofMap(*aggregates);
413  const RCP<const Map> uniqueMap = A->getDomainMap();
414  RCP<const Import> importer = ImportFactory::Build(uniqueMap, nonUniqueMap);
415  RCP<MultiVector> fineNullspaceWithOverlap = MultiVectorFactory::Build(nonUniqueMap,NSDim);
416  fineNullspaceWithOverlap->doImport(*fineNullspace,*importer,Xpetra::INSERT);
417 
418  // Pull out the nullspace vectors so that we can have random access.
419  ArrayRCP< ArrayRCP<const SC> > fineNS(NSDim);
420  for (size_t i=0; i<NSDim; ++i)
421  fineNS[i] = fineNullspaceWithOverlap->getData(i);
422 
423  //Allocate storage for the coarse nullspace.
424  coarseNullspace = MultiVectorFactory::Build(coarseMap, NSDim);
425 
426  ArrayRCP< ArrayRCP<SC> > coarseNS(NSDim);
427  for (size_t i=0; i<NSDim; ++i)
428  if (coarseMap->getNodeNumElements() > 0) coarseNS[i] = coarseNullspace->getDataNonConst(i);
429 
430  //This makes the rowmap of Ptent the same as that of A->
431  //This requires moving some parts of some local Q's to other processors
432  //because aggregates can span processors.
433  RCP<const Map > rowMapForPtent = A->getRowMap();
434  const Map& rowMapForPtentRef = *rowMapForPtent;
435 
436  // Set up storage for the rows of the local Qs that belong to other processors.
437  // FIXME This is inefficient and could be done within the main loop below with std::vector's.
438  RCP<const Map> colMap = A->getColMap();
439 
440  RCP<const Map > ghostQMap;
441  RCP<MultiVector> ghostQvalues;
442  Array<RCP<Xpetra::Vector<GO,LO,GO,Node> > > ghostQcolumns;
443  RCP<Xpetra::Vector<GO,LO,GO,Node> > ghostQrowNums;
444  ArrayRCP< ArrayRCP<SC> > ghostQvals;
445  ArrayRCP< ArrayRCP<GO> > ghostQcols;
446  ArrayRCP< GO > ghostQrows;
447 
448  Array<GO> ghostGIDs;
449  for (LO j=0; j<numAggs; ++j) {
450  for (LO k=aggStart[j]; k<aggStart[j+1]; ++k) {
451  if (rowMapForPtentRef.isNodeGlobalElement(aggToRowMap[k]) == false) {
452  ghostGIDs.push_back(aggToRowMap[k]);
453  }
454  }
455  }
456  ghostQMap = MapFactory::Build(A->getRowMap()->lib(),
457  Teuchos::OrdinalTraits<Xpetra::global_size_t>::invalid(),
458  ghostGIDs,
459  indexBase, A->getRowMap()->getComm()); //JG:Xpetra::global_size_t>?
460  //Vector to hold bits of Q that go to other processors.
461  ghostQvalues = MultiVectorFactory::Build(ghostQMap,NSDim);
462  //Note that Epetra does not support MultiVectors templated on Scalar != double.
463  //So to work around this, we allocate an array of Vectors. This shouldn't be too
464  //expensive, as the number of Vectors is NSDim.
465  ghostQcolumns.resize(NSDim);
466  for (size_t i=0; i<NSDim; ++i)
467  ghostQcolumns[i] = Xpetra::VectorFactory<GO,LO,GO,Node>::Build(ghostQMap);
468  ghostQrowNums = Xpetra::VectorFactory<GO,LO,GO,Node>::Build(ghostQMap);
469  if (ghostQvalues->getLocalLength() > 0) {
470  ghostQvals.resize(NSDim);
471  ghostQcols.resize(NSDim);
472  for (size_t i=0; i<NSDim; ++i) {
473  ghostQvals[i] = ghostQvalues->getDataNonConst(i);
474  ghostQcols[i] = ghostQcolumns[i]->getDataNonConst(0);
475  }
476  ghostQrows = ghostQrowNums->getDataNonConst(0);
477  }
478 
479  //importer to handle moving Q
480  importer = ImportFactory::Build(ghostQMap, A->getRowMap());
481 
482  // Dense QR solver
483  Teuchos::SerialQRDenseSolver<LO,SC> qrSolver;
484 
485  //Allocate temporary storage for the tentative prolongator.
486  Array<GO> globalColPtr(maxAggSize*NSDim,0);
487  Array<LO> localColPtr(maxAggSize*NSDim,0);
488  Array<SC> valPtr(maxAggSize*NSDim,0.);
489 
490  //Create column map for Ptent, estimate local #nonzeros in Ptent, and create Ptent itself.
491  const Map& coarseMapRef = *coarseMap;
492 
493  // For the 3-arrays constructor
494  ArrayRCP<size_t> ptent_rowptr;
495  ArrayRCP<LO> ptent_colind;
496  ArrayRCP<Scalar> ptent_values;
497 
498  // Because ArrayRCPs are slow...
499  ArrayView<size_t> rowptr_v;
500  ArrayView<LO> colind_v;
501  ArrayView<Scalar> values_v;
502 
503  // For temporary usage
504  Array<size_t> rowptr_temp;
505  Array<LO> colind_temp;
506  Array<Scalar> values_temp;
507 
508  RCP<CrsMatrix> PtentCrs;
509 
510  RCP<CrsMatrixWrap> PtentCrsWrap = rcp(new CrsMatrixWrap(rowMapForPtent, NSDim, Xpetra::StaticProfile));
511  PtentCrs = PtentCrsWrap->getCrsMatrix();
512  Ptentative = PtentCrsWrap;
513 
514  //*****************************************************************
515  //Loop over all aggregates and calculate local QR decompositions.
516  //*****************************************************************
517  GO qctr=0; //for indexing into Ptent data vectors
518  const Map& nonUniqueMapRef = *nonUniqueMap;
519 
520  size_t total_nnz_count=0;
521 
522  for (GO agg=0; agg<numAggs; ++agg)
523  {
524  LO myAggSize = aggStart[agg+1]-aggStart[agg];
525  // For each aggregate, extract the corresponding piece of the nullspace and put it in the flat array,
526  // "localQR" (in column major format) for the QR routine.
527  Teuchos::SerialDenseMatrix<LO,SC> localQR(myAggSize, NSDim);
528  for (size_t j=0; j<NSDim; ++j) {
529  bool bIsZeroNSColumn = true;
530  for (LO k=0; k<myAggSize; ++k)
531  {
532  // aggToRowMap[aggPtr[i]+k] is the kth DOF in the ith aggregate
533  // fineNS[j][n] is the nth entry in the jth NS vector
534  try{
535  SC nsVal = fineNS[j][ nonUniqueMapRef.getLocalElement(aggToRowMap[aggStart[agg]+k]) ]; // extract information from fine level NS
536  localQR(k,j) = nsVal;
537  if (nsVal != zero) bIsZeroNSColumn = false;
538  }
539  catch(...) {
540  GetOStream(Runtime1,-1) << "length of fine level nsp: " << fineNullspace->getGlobalLength() << std::endl;
541  GetOStream(Runtime1,-1) << "length of fine level nsp w overlap: " << fineNullspaceWithOverlap->getGlobalLength() << std::endl;
542  GetOStream(Runtime1,-1) << "(local?) aggId=" << agg << std::endl;
543  GetOStream(Runtime1,-1) << "aggSize=" << myAggSize << std::endl;
544  GetOStream(Runtime1,-1) << "agg DOF=" << k << std::endl;
545  GetOStream(Runtime1,-1) << "NS vector j=" << j << std::endl;
546  GetOStream(Runtime1,-1) << "j*myAggSize + k = " << j*myAggSize + k << std::endl;
547  GetOStream(Runtime1,-1) << "aggToRowMap["<<agg<<"][" << k << "] = " << aggToRowMap[aggStart[agg]+k] << std::endl;
548  GetOStream(Runtime1,-1) << "id aggToRowMap[agg][k]=" << aggToRowMap[aggStart[agg]+k] << " is global element in nonUniqueMap = " <<
549  nonUniqueMapRef.isNodeGlobalElement(aggToRowMap[aggStart[agg]+k]) << std::endl;
550  GetOStream(Runtime1,-1) << "colMap local id aggToRowMap[agg][k]=" << nonUniqueMapRef.getLocalElement(aggToRowMap[aggStart[agg]+k]) << std::endl;
551  GetOStream(Runtime1,-1) << "fineNS...=" << fineNS[j][ nonUniqueMapRef.getLocalElement(aggToRowMap[aggStart[agg]+k]) ] << std::endl;
552  GetOStream(Errors,-1) << "caught an error!" << std::endl;
553  }
554  } //for (LO k=0 ...
555  TEUCHOS_TEST_FOR_EXCEPTION(bIsZeroNSColumn == true, Exceptions::RuntimeError, "MueLu::TentativePFactory::MakeTentative: fine level NS part has a zero column. Error.");
556  } //for (LO j=0 ...
557 
558  Xpetra::global_size_t offset=agg*NSDim;
559 
560  if(myAggSize >= Teuchos::as<LocalOrdinal>(NSDim)) {
561  // calculate QR decomposition (standard)
562  // R is stored in localQR (size: myAggSize x NSDim)
563 
564  // Householder multiplier
565  SC tau = localQR(0,0);
566 
567  if (NSDim == 1) {
568  // Only one nullspace vector, so normalize by hand
569  Magnitude dtemp=0;
570  for (size_t k = 0; k < Teuchos::as<size_t>(myAggSize); ++k) {
571  Magnitude tmag = STS::magnitude(localQR(k,0));
572  dtemp += tmag*tmag;
573  }
574  dtemp = Teuchos::ScalarTraits<Magnitude>::squareroot(dtemp);
575  tau = localQR(0,0);
576  localQR(0,0) = dtemp;
577  } else {
578  qrSolver.setMatrix( Teuchos::rcp(&localQR, false) );
579  qrSolver.factor();
580  }
581 
582  // Extract R, the coarse nullspace. This is stored in upper triangular part of localQR.
583  // Note: coarseNS[i][.] is the ith coarse nullspace vector, which may be counter to your intuition.
584  // This stores the (offset+k)th entry only if it is local according to the coarseMap.
585  for (size_t j=0; j<NSDim; ++j) {
586  for (size_t k=0; k<=j; ++k) {
587  try {
588  if (coarseMapRef.isNodeLocalElement(offset+k)) {
589  coarseNS[j][offset+k] = localQR(k, j); //TODO is offset+k the correct local ID?!
590  }
591  }
592  catch(...) {
593  GetOStream(Errors,-1) << "caught error in coarseNS insert, j="<<j<<", offset+k = "<<offset+k<<std::endl;
594  }
595  }
596  }
597 
598  // Calculate Q, the tentative prolongator.
599  // The Lapack GEQRF call only works for myAggsize >= NSDim
600 
601  if (NSDim == 1) {
602  // Only one nullspace vector, so calculate Q by hand
603  Magnitude dtemp = Teuchos::ScalarTraits<SC>::magnitude(localQR(0,0));
604  localQR(0,0) = tau;
605  dtemp = 1 / dtemp;
606  for (LocalOrdinal i=0; i<myAggSize; ++i) {
607  localQR(i,0) *= dtemp ;
608  }
609  } else {
610  qrSolver.formQ();
611  Teuchos::RCP<Teuchos::SerialDenseMatrix<LO,SC> > qFactor = qrSolver.getQ();
612  for (size_t j=0; j<NSDim; j++) {
613  for (size_t i = 0; i < Teuchos::as<size_t>(myAggSize); i++) {
614  localQR(i,j) = (*qFactor)(i,j);
615  }
616  }
617  }
618 
619  // end default case (myAggSize >= NSDim)
620  } else { // special handling for myAggSize < NSDim (i.e. 1pt nodes)
621  // See comments for the uncoupled case
622 
623  // R = extended (by adding identity rows) localQR
624  for (size_t j = 0; j < NSDim; j++)
625  for (size_t k = 0; k < NSDim; k++) {
626  TEUCHOS_TEST_FOR_EXCEPTION(!coarseMapRef.isNodeLocalElement(offset+k), Exceptions::RuntimeError,
627  "Caught error in coarseNS insert, j=" << j << ", offset+k = " << offset+k);
628 
629  if (k < as<size_t>(myAggSize))
630  coarseNS[j][offset+k] = localQR(k,j);
631  else
632  coarseNS[j][offset+k] = (k == j ? one : zero);
633  }
634 
635  // Q = I (rectangular)
636  for (size_t i = 0; i < as<size_t>(myAggSize); i++)
637  for (size_t j = 0; j < NSDim; j++)
638  localQR(i,j) = (j == i ? one : zero);
639  } // end else (special handling for 1pt aggregates)
640 
641  //Process each row in the local Q factor. If the row is local to the current processor
642  //according to the rowmap, insert it into Ptentative. Otherwise, save it in ghostQ
643  //to be communicated later to the owning processor.
644  //FIXME -- what happens if maps are blocked?
645  for (GO j=0; j<myAggSize; ++j) {
646  //This loop checks whether row associated with current DOF is local, according to rowMapForPtent.
647  //If it is, the row is inserted. If not, the row number, columns, and values are saved in
648  //MultiVectors that will be sent to other processors.
649  GO globalRow = aggToRowMap[aggStart[agg]+j];
650 
651  //TODO is the use of Xpetra::global_size_t below correct?
652  if (rowMapForPtentRef.isNodeGlobalElement(globalRow) == false ) {
653  ghostQrows[qctr] = globalRow;
654  for (size_t k=0; k<NSDim; ++k) {
655  ghostQcols[k][qctr] = coarseMapRef.getGlobalElement(agg*NSDim+k);
656  ghostQvals[k][qctr] = localQR(j,k);
657  }
658  ++qctr;
659  } else {
660  size_t nnz=0;
661  for (size_t k=0; k<NSDim; ++k) {
662  try{
663  if (localQR(j,k) != Teuchos::ScalarTraits<SC>::zero()) {
664  localColPtr[nnz] = agg * NSDim + k;
665  globalColPtr[nnz] = coarseMapRef.getGlobalElement(localColPtr[nnz]);
666  valPtr[nnz] = localQR(j,k);
667  ++total_nnz_count;
668  ++nnz;
669  }
670  }
671  catch(...) {
672  GetOStream(Errors,-1) << "caught error in colPtr/valPtr insert, current index="<<nnz<<std::endl;
673  }
674  } //for (size_t k=0; k<NSDim; ++k)
675 
676  try{
677  Ptentative->insertGlobalValues(globalRow,globalColPtr.view(0,nnz),valPtr.view(0,nnz));
678  }
679  catch(...) {
680  GetOStream(Errors,-1) << "pid " << A->getRowMap()->getComm()->getRank()
681  << "caught error during Ptent row insertion, global row "
682  << globalRow << std::endl;
683  }
684  }
685  } //for (GO j=0; j<myAggSize; ++j)
686 
687  } // for (LO agg=0; agg<numAggs; ++agg)
688 
689 
690  // ***********************************************************
691  // ************* end of aggregate-wise QR ********************
692  // ***********************************************************
693  GetOStream(Runtime1) << "TentativePFactory : aggregates may cross process boundaries" << std::endl;
694  // Import ghost parts of Q factors and insert into Ptentative.
695  // First import just the global row numbers.
696  RCP<Xpetra::Vector<GO,LO,GO,Node> > targetQrowNums = Xpetra::VectorFactory<GO,LO,GO,Node>::Build(rowMapForPtent);
697  targetQrowNums->putScalar(-1);
698  targetQrowNums->doImport(*ghostQrowNums,*importer,Xpetra::INSERT);
699  ArrayRCP< GO > targetQrows = targetQrowNums->getDataNonConst(0);
700 
701  // Now create map based on just the row numbers imported.
702  Array<GO> gidsToImport;
703  gidsToImport.reserve(targetQrows.size());
704  for (typename ArrayRCP<GO>::iterator r=targetQrows.begin(); r!=targetQrows.end(); ++r) {
705  if (*r > -1) {
706  gidsToImport.push_back(*r);
707  }
708  }
709  RCP<const Map > reducedMap = MapFactory::Build( A->getRowMap()->lib(),
710  Teuchos::OrdinalTraits<Xpetra::global_size_t>::invalid(),
711  gidsToImport, indexBase, A->getRowMap()->getComm() );
712 
713  // Import using the row numbers that this processor will receive.
714  importer = ImportFactory::Build(ghostQMap, reducedMap);
715 
716  Array<RCP<Xpetra::Vector<GO,LO,GO,Node> > > targetQcolumns(NSDim);
717  for (size_t i=0; i<NSDim; ++i) {
718  targetQcolumns[i] = Xpetra::VectorFactory<GO,LO,GO,Node>::Build(reducedMap);
719  targetQcolumns[i]->doImport(*(ghostQcolumns[i]),*importer,Xpetra::INSERT);
720  }
721  RCP<MultiVector> targetQvalues = MultiVectorFactory::Build(reducedMap,NSDim);
722  targetQvalues->doImport(*ghostQvalues,*importer,Xpetra::INSERT);
723 
724  ArrayRCP< ArrayRCP<SC> > targetQvals;
725  ArrayRCP<ArrayRCP<GO> > targetQcols;
726  if (targetQvalues->getLocalLength() > 0) {
727  targetQvals.resize(NSDim);
728  targetQcols.resize(NSDim);
729  for (size_t i=0; i<NSDim; ++i) {
730  targetQvals[i] = targetQvalues->getDataNonConst(i);
731  targetQcols[i] = targetQcolumns[i]->getDataNonConst(0);
732  }
733  }
734 
735  valPtr = Array<SC>(NSDim,0.);
736  globalColPtr = Array<GO>(NSDim,0);
737  for (typename Array<GO>::iterator r=gidsToImport.begin(); r!=gidsToImport.end(); ++r) {
738  if (targetQvalues->getLocalLength() > 0) {
739  for (size_t j=0; j<NSDim; ++j) {
740  valPtr[j] = targetQvals[j][reducedMap->getLocalElement(*r)];
741  globalColPtr[j] = targetQcols[j][reducedMap->getLocalElement(*r)];
742  }
743  Ptentative->insertGlobalValues(*r, globalColPtr.view(0,NSDim), valPtr.view(0,NSDim));
744  } //if (targetQvalues->getLocalLength() > 0)
745  }
746 
747  Ptentative->fillComplete(coarseMap, A->getDomainMap());
748  }
749 
750  template <class Scalar,class LocalOrdinal, class GlobalOrdinal, class Node>
751  bool TentativePFactory<Scalar, LocalOrdinal, GlobalOrdinal, Node>::isGoodMap(const Map& rowMap, const Map& colMap) const {
752  ArrayView<const GO> rowElements = rowMap.getNodeElementList();
753  ArrayView<const GO> colElements = colMap.getNodeElementList();
754 
755  const size_t numElements = rowElements.size();
756 
757  bool goodMap = true;
758  for (size_t i = 0; i < numElements; i++)
759  if (rowElements[i] != colElements[i]) {
760  goodMap = false;
761  break;
762  }
763 
764  return goodMap;
765  }
766 
767 } //namespace MueLu
768 
769 // TODO ReUse: If only P or Nullspace is missing, TentativePFactory can be smart and skip part of the computation.
770 
771 #define MUELU_TENTATIVEPFACTORY_SHORT
772 #endif // MUELU_TENTATIVEPFACTORY_DEF_HPP
void Build(Level &fineLevel, Level &coarseLevel) const
Build an object with this factory.
Important warning messages (one line)
void BuildPcoupled(RCP< Matrix > A, RCP< Aggregates > aggregates, RCP< AmalgamationInfo > amalgInfo, RCP< MultiVector > fineNullspace, RCP< const Map > coarseMap, RCP< Matrix > &Ptentative, RCP< MultiVector > &coarseNullspace) const
RCP< const ParameterList > GetValidParameterList() const
Return a const parameter list of valid parameters that setParameterList() will accept.
void DeclareInput(Level &fineLevel, Level &coarseLevel) const
Input.
Timer to be used in factories. Similar to Monitor but with additional timers.
Print more statistics.
void BuildPuncoupled(RCP< Matrix > A, RCP< Aggregates > aggregates, RCP< AmalgamationInfo > amalgInfo, RCP< MultiVector > fineNullspace, RCP< const Map > coarseMap, RCP< Matrix > &Ptentative, RCP< MultiVector > &coarseNullspace) const
Namespace for MueLu classes and methods.
Class that holds all level-specific information.
Definition: MueLu_Level.hpp:99
static std::string PrintMatrixInfo(const Matrix &A, const std::string &msgTag, RCP< const Teuchos::ParameterList > params=Teuchos::null)
bool isGoodMap(const Map &rowMap, const Map &colMap) const
Exception throws to report errors in the internal logical of the program.
void BuildP(Level &fineLevel, Level &coarseLevel) const
Abstract Build method.
Description of what is happening (more verbose)