MueLu  Version of the Day
MueLu_AlgebraicPermutationStrategy_def.hpp
Go to the documentation of this file.
1 /*
2  * MueLu_AlgebraicPermutationStrategy_def.hpp
3  *
4  * Created on: Feb 20, 2013
5  * Author: tobias
6  */
7 
8 #ifndef MUELU_ALGEBRAICPERMUTATIONSTRATEGY_DEF_HPP_
9 #define MUELU_ALGEBRAICPERMUTATIONSTRATEGY_DEF_HPP_
10 
11 #include <queue>
12 
13 #include <Xpetra_MultiVector.hpp>
14 #include <Xpetra_Matrix.hpp>
15 #include <Xpetra_CrsGraph.hpp>
16 #include <Xpetra_Vector.hpp>
17 #include <Xpetra_VectorFactory.hpp>
18 #include <Xpetra_CrsMatrixWrap.hpp>
19 #include <Xpetra_Export.hpp>
20 #include <Xpetra_ExportFactory.hpp>
21 #include <Xpetra_Import.hpp>
22 #include <Xpetra_ImportFactory.hpp>
23 #include <Xpetra_MatrixMatrix.hpp>
24 
25 #include "MueLu_Utilities.hpp"
27 
28 namespace MueLu {
29 
30  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
31  void AlgebraicPermutationStrategy<Scalar, LocalOrdinal, GlobalOrdinal, Node>::BuildPermutation(const Teuchos::RCP<Matrix> & A, const Teuchos::RCP<const Map> permRowMap, Level & currentLevel, const FactoryBase* genFactory) const {
32 #ifndef HAVE_MUELU_INST_COMPLEX_INT_INT
33 
34  const Teuchos::RCP< const Teuchos::Comm< int > > comm = A->getRowMap()->getComm();
35  int numProcs = comm->getSize();
36  int myRank = comm->getRank();
37 
38  /*if( permRowMap == Teuchos::null ) {
39  permRowMap = A->getRowMap(); // use full row map of A
40  }*/
41 
42  size_t nDofsPerNode = 1;
43  if (A->IsView("stridedMaps")) {
44  Teuchos::RCP<const Map> permRowMapStrided = A->getRowMap("stridedMaps");
45  nDofsPerNode = Teuchos::rcp_dynamic_cast<const StridedMap>(permRowMapStrided)->getFixedBlockSize();
46  }
47 
48  //GetOStream(Runtime0) << "Perform generation of permutation operators on " << mapName_ << " map with " << permRowMap->getGlobalNumElements() << " elements" << std::endl;
49 
50  std::vector<std::pair<GlobalOrdinal, GlobalOrdinal> > permutedDiagCandidates;
51  std::vector<std::pair<GlobalOrdinal, GlobalOrdinal> > keepDiagonalEntries;
52  std::vector<Scalar> Weights;
53 
54  // loop over all local rows in matrix A and keep diagonal entries if corresponding
55  // matrix rows are not contained in permRowMap
56  for (size_t row = 0; row < A->getRowMap()->getNodeNumElements(); row++) {
57  GlobalOrdinal grow = A->getRowMap()->getGlobalElement(row);
58 
59  if(permRowMap->isNodeGlobalElement(grow) == true) continue;
60 
61  size_t nnz = A->getNumEntriesInLocalRow(row);
62 
63  // extract local row information from matrix
64  Teuchos::ArrayView<const LocalOrdinal> indices;
65  Teuchos::ArrayView<const Scalar> vals;
66  A->getLocalRowView(row, indices, vals);
67 
68  TEUCHOS_TEST_FOR_EXCEPTION(Teuchos::as<size_t>(indices.size()) != nnz, Exceptions::RuntimeError, "MueLu::PermutationFactory::Build: number of nonzeros not equal to number of indices? Error.");
69 
70  // find column entry with max absolute value
71  GlobalOrdinal gMaxValIdx = 0;
72  Scalar norm1 = 0.0;
73  Scalar maxVal = 0.0;
74  for (size_t j = 0; j < Teuchos::as<size_t>(indices.size()); j++) {
75  norm1 += Teuchos::ScalarTraits<Scalar>::magnitude(vals[j]);
76  if(Teuchos::ScalarTraits<Scalar>::magnitude(vals[j]) > maxVal) {
77  maxVal = Teuchos::ScalarTraits<Scalar>::magnitude(vals[j]);
78  gMaxValIdx = A->getColMap()->getGlobalElement(indices[j]);
79  }
80  }
81 
82  if(grow == gMaxValIdx) // only keep row/col pair if it's diagonal dominant!!!
83  keepDiagonalEntries.push_back(std::make_pair(grow,grow));
84  }
85 
87  // handle rows that are marked to be relevant for permutations
88  for (size_t row = 0; row < permRowMap->getNodeNumElements(); row++) {
89  GlobalOrdinal grow = permRowMap->getGlobalElement(row);
90  LocalOrdinal lArow = A->getRowMap()->getLocalElement(grow);
91  size_t nnz = A->getNumEntriesInLocalRow(lArow);
92 
93  // extract local row information from matrix
94  Teuchos::ArrayView<const LocalOrdinal> indices;
95  Teuchos::ArrayView<const Scalar> vals;
96  A->getLocalRowView(lArow, indices, vals);
97 
98  TEUCHOS_TEST_FOR_EXCEPTION(Teuchos::as<size_t>(indices.size()) != nnz, Exceptions::RuntimeError, "MueLu::PermutationFactory::Build: number of nonzeros not equal to number of indices? Error.");
99 
100  // find column entry with max absolute value
101  GlobalOrdinal gMaxValIdx = 0;
102  Scalar norm1 = 0.0;
103  Scalar maxVal = 0.0;
104  for (size_t j = 0; j < Teuchos::as<size_t>(indices.size()); j++) {
105  norm1 += Teuchos::ScalarTraits<Scalar>::magnitude(vals[j]);
106  if(Teuchos::ScalarTraits<Scalar>::magnitude(vals[j]) > maxVal) {
107  maxVal = Teuchos::ScalarTraits<Scalar>::magnitude(vals[j]);
108  gMaxValIdx = A->getColMap()->getGlobalElement(indices[j]);
109  }
110  }
111 
112  if(Teuchos::ScalarTraits<Scalar>::magnitude(maxVal) > 0.0) { // keep only max Entries \neq 0.0
113  permutedDiagCandidates.push_back(std::make_pair(grow,gMaxValIdx));
114  Weights.push_back(maxVal/(norm1*Teuchos::as<Scalar>(nnz)));
115  } else {
116  std::cout << "ATTENTION: row " << grow << " has only zero entries -> singular matrix!" << std::endl;
117  }
118 
119  }
120 
121  // sort Weights in descending order
122  std::vector<int> permutation;
123  sortingPermutation(Weights,permutation);
124 
125  // create new vector with exactly one possible entry for each column
126 
127  // each processor which requests the global column id gcid adds 1 to gColVec
128  // gColVec will be summed up over all processors and communicated to gDomVec
129  // which is based on the non-overlapping domain map of A.
130 
131  Teuchos::RCP<Vector> gColVec = VectorFactory::Build(A->getColMap());
132  Teuchos::RCP<Vector> gDomVec = VectorFactory::Build(A->getDomainMap());
133  gColVec->putScalar(0.0);
134  gDomVec->putScalar(0.0);
135 
136  // put in all keep diagonal entries
137  for (typename std::vector<std::pair<GlobalOrdinal, GlobalOrdinal> >::const_iterator p = keepDiagonalEntries.begin(); p != keepDiagonalEntries.end(); ++p) {
138  gColVec->sumIntoGlobalValue((*p).second,1.0);
139  }
140 
141  Teuchos::RCP<Export> exporter = ExportFactory::Build(gColVec->getMap(), gDomVec->getMap());
142  gDomVec->doExport(*gColVec,*exporter,Xpetra::ADD); // communicate blocked gcolids to all procs
143  gColVec->doImport(*gDomVec,*exporter,Xpetra::INSERT);
144 
145  std::vector<std::pair<GlobalOrdinal, GlobalOrdinal> > permutedDiagCandidatesFiltered; // TODO reserve memory
146  std::map<GlobalOrdinal, Scalar> gColId2Weight;
147 
148  Teuchos::ArrayRCP< Scalar > ddata = gColVec->getDataNonConst(0);
149  for(size_t i = 0; i < permutedDiagCandidates.size(); ++i) {
150  // loop over all candidates
151  std::pair<GlobalOrdinal, GlobalOrdinal> pp = permutedDiagCandidates[permutation[i]];
152  GlobalOrdinal grow = pp.first;
153  GlobalOrdinal gcol = pp.second;
154 
155  LocalOrdinal lcol = A->getColMap()->getLocalElement(gcol);
156  //Teuchos::ArrayRCP< Scalar > ddata = gColVec->getDataNonConst(0);
157  if(ddata[lcol] > 0.0){
158  continue; // skip lcol: column already handled by another row
159  }
160 
161  // mark column as already taken
162  ddata[lcol]++;
163 
164  permutedDiagCandidatesFiltered.push_back(std::make_pair(grow,gcol));
165  gColId2Weight[gcol] = Weights[permutation[i]];
166  }
167 
168  // communicate how often each column index is requested by the different procs
169  gDomVec->doExport(*gColVec,*exporter,Xpetra::ADD);
170  gColVec->doImport(*gDomVec,*exporter,Xpetra::INSERT); // probably not needed // TODO check me
171 
172  //*****************************************************************************************
173  // first communicate ALL global ids of column indices which are requested by more
174  // than one proc to all other procs
175  // detect which global col indices are requested by more than one proc
176  // and store them in the multipleColRequests vector
177  std::vector<GlobalOrdinal> multipleColRequests; // store all global column indices from current processor that are also
178  // requested by another processor. This is possible, since they are stored
179  // in gDomVec which is based on the nonoverlapping domain map. That is, each
180  // global col id is handled by exactly one proc.
181  std::queue<GlobalOrdinal> unusedColIdx; // unused column indices on current processor
182 
183  for(size_t sz = 0; sz<gDomVec->getLocalLength(); ++sz) {
184  Teuchos::ArrayRCP< const Scalar > arrDomVec = gDomVec->getData(0);
185  if(arrDomVec[sz] > 1.0) {
186  multipleColRequests.push_back(gDomVec->getMap()->getGlobalElement(sz));
187  } else if(arrDomVec[sz] == 0.0) {
188  unusedColIdx.push(gDomVec->getMap()->getGlobalElement(sz));
189  }
190  }
191 
192  // communicate the global number of column indices which are requested by more than one proc
193  LocalOrdinal localMultColRequests = Teuchos::as<LocalOrdinal>(multipleColRequests.size());
194  LocalOrdinal globalMultColRequests = 0;
195 
196  // sum up all entries in multipleColRequests over all processors
197  MueLu_sumAll(gDomVec->getMap()->getComm(), (LocalOrdinal)localMultColRequests, globalMultColRequests);
198 
199  if(globalMultColRequests > 0) {
200  // special handling: two processors request the same global column id.
201  // decide which processor gets it
202 
203  // distribute number of multipleColRequests to all processors
204  // each processor stores how many column ids for exchange are handled by the cur proc
205  std::vector<GlobalOrdinal> numMyMultColRequests(numProcs,0);
206  std::vector<GlobalOrdinal> numGlobalMultColRequests(numProcs,0);
207  numMyMultColRequests[myRank] = localMultColRequests;
208  Teuchos::reduceAll(*comm,Teuchos::REDUCE_MAX,numProcs,&numMyMultColRequests[0],&numGlobalMultColRequests[0]);
209 
210  // communicate multipleColRequests entries to all processors
211  int nMyOffset = 0;
212  for (int i=0; i<myRank-1; i++)
213  nMyOffset += numGlobalMultColRequests[i]; // calculate offset to store the weights on the corresponding place in procOverlappingWeights
214 
215  GlobalOrdinal zero=0;
216  std::vector<GlobalOrdinal> procMultRequestedColIds(globalMultColRequests,zero);
217  std::vector<GlobalOrdinal> global_procMultRequestedColIds(globalMultColRequests,zero);
218 
219  // loop over all local column GIDs that are also requested by other procs
220  for(size_t i = 0; i < multipleColRequests.size(); i++) {
221  procMultRequestedColIds[nMyOffset + i] = multipleColRequests[i]; // all weights are > 0 ?
222  }
223 
224  // template ordinal, package (double)
225  Teuchos::reduceAll(*comm, Teuchos::REDUCE_MAX, Teuchos::as<int>(globalMultColRequests), &procMultRequestedColIds[0], &global_procMultRequestedColIds[0]);
226 
227  // loop over global_procOverlappingWeights and eliminate wrong entries...
228  for (size_t k = 0; k<global_procMultRequestedColIds.size(); k++) {
229  GlobalOrdinal globColId = global_procMultRequestedColIds[k];
230 
231  std::vector<Scalar> MyWeightForColId(numProcs,0);
232  std::vector<Scalar> GlobalWeightForColId(numProcs,0);
233 
234  if(gColVec->getMap()->isNodeGlobalElement(globColId)) {
235  MyWeightForColId[myRank] = gColId2Weight[globColId];
236  } else {
237  MyWeightForColId[myRank] = 0.0;
238  }
239 
240  Teuchos::reduceAll(*comm, Teuchos::REDUCE_MAX, numProcs, &MyWeightForColId[0], &GlobalWeightForColId[0]);
241 
242  if(gColVec->getMap()->isNodeGlobalElement(globColId)) {
243  // note: 2 procs could have the same weight for a column index.
244  // pick the first one.
245  Scalar winnerValue = 0.0;
246  int winnerProcRank = 0;
247  for (int proc = 0; proc < numProcs; proc++) {
248  if(GlobalWeightForColId[proc] > winnerValue) {
249  winnerValue = GlobalWeightForColId[proc];
250  winnerProcRank = proc;
251  }
252  }
253 
254  // winnerProcRank is the winner for handling globColId.
255  // winnerProcRank is unique (even if two procs have the same weight for a column index)
256 
257  if(myRank != winnerProcRank) {
258  // remove corresponding entry from permutedDiagCandidatesFiltered
259  typename std::vector<std::pair<GlobalOrdinal, GlobalOrdinal> >::iterator p = permutedDiagCandidatesFiltered.begin();
260  while(p != permutedDiagCandidatesFiltered.end() )
261  {
262  if((*p).second == globColId)
263  p = permutedDiagCandidatesFiltered.erase(p);
264  else
265  p++;
266  }
267  }
268 
269  } // end if isNodeGlobalElement
270  } // end loop over global_procOverlappingWeights and eliminate wrong entries...
271  } // end if globalMultColRequests > 0
272 
273  // put together all pairs:
274  //size_t sizeRowColPairs = keepDiagonalEntries.size() + permutedDiagCandidatesFiltered.size();
275  std::vector<std::pair<GlobalOrdinal, GlobalOrdinal> > RowColPairs;
276  RowColPairs.insert( RowColPairs.end(), keepDiagonalEntries.begin(), keepDiagonalEntries.end());
277  RowColPairs.insert( RowColPairs.end(), permutedDiagCandidatesFiltered.begin(), permutedDiagCandidatesFiltered.end());
278 
279 #ifdef DEBUG_OUTPUT
280  //&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&
281  // plausibility check
282  gColVec->putScalar(0.0);
283  gDomVec->putScalar(0.0);
284  typename std::vector<std::pair<GlobalOrdinal, GlobalOrdinal> >::iterator pl = RowColPairs.begin();
285  while(pl != RowColPairs.end() )
286  {
287  //GlobalOrdinal ik = (*pl).first;
288  GlobalOrdinal jk = (*pl).second;
289 
290  gColVec->sumIntoGlobalValue(jk,1.0);
291  pl++;
292  }
293  gDomVec->doExport(*gColVec,*exporter,Xpetra::ADD);
294  for(size_t sz = 0; sz<gDomVec->getLocalLength(); ++sz) {
295  Teuchos::ArrayRCP< const Scalar > arrDomVec = gDomVec->getData(0);
296  if(arrDomVec[sz] > 1.0) {
297  GetOStream(Runtime0) << "RowColPairs has multiple column [" << sz << "]=" << arrDomVec[sz] << std::endl;
298  } else if(arrDomVec[sz] == 0.0) {
299  GetOStream(Runtime0) << "RowColPairs has empty column [" << sz << "]=" << arrDomVec[sz] << std::endl;
300  }
301  }
302  //&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&
303 #endif
304 
306  // assumption: on each processor RowColPairs now contains
307  // a valid set of (row,column) pairs, where the row entries
308  // are a subset of the processor's rows and the column entries
309  // are unique throughout all processors.
310  // Note: the RowColPairs are only defined for a subset of all rows,
311  // so there might be rows without an entry in RowColPairs.
312  // It can be, that some rows seem to be missing in RowColPairs, since
313  // the entry in that row with maximum absolute value has been reserved
314  // by another row already (e.g. as already diagonal dominant row outside
315  // of perRowMap).
316  // In fact, the RowColPairs vector only defines the (row,column) pairs
317  // that will be definitely moved to the diagonal after permutation.
318 
319 #ifdef DEBUG_OUTPUT
320  // for (typename std::vector<std::pair<GlobalOrdinal, GlobalOrdinal> >::const_iterator p = RowColPairs.begin(); p != RowColPairs.end(); ++p) {
321  // std::cout << "proc: " << myRank << " r/c: " << (*p).first << "/" << (*p).second << std::endl;
322  // }
323  // for (typename std::vector<std::pair<GlobalOrdinal, GlobalOrdinal> >::const_iterator p = RowColPairs.begin(); p != RowColPairs.end(); ++p)
324  // {
326  // std::cout << (*p).first +1 << " " << (*p).second+1 << std::endl;
327  // }
328  // std::cout << "\n";
329 #endif
330 
331  // vectors to store permutation information
332  Teuchos::RCP<Vector> Pperm = VectorFactory::Build(A->getRowMap());
333  Teuchos::RCP<Vector> Qperm = VectorFactory::Build(A->getDomainMap()); // global variant (based on domain map)
334  Teuchos::RCP<Vector> lQperm = VectorFactory::Build(A->getColMap()); // local variant (based on column map)
335 
336  Teuchos::ArrayRCP< Scalar > PpermData = Pperm->getDataNonConst(0);
337  Teuchos::ArrayRCP< Scalar > QpermData = Qperm->getDataNonConst(0);
338 
339  Pperm->putScalar(0.0);
340  Qperm->putScalar(0.0);
341  lQperm->putScalar(0.0);
342 
343  // setup exporter for Qperm
344  Teuchos::RCP<Export> QpermExporter = ExportFactory::Build(lQperm->getMap(), Qperm->getMap());
345 
346  Teuchos::RCP<Vector> RowIdStatus = VectorFactory::Build(A->getRowMap());
347  Teuchos::RCP<Vector> ColIdStatus = VectorFactory::Build(A->getDomainMap()); // global variant (based on domain map)
348  Teuchos::RCP<Vector> lColIdStatus = VectorFactory::Build(A->getColMap()); // local variant (based on column map)
349  Teuchos::RCP<Vector> ColIdUsed = VectorFactory::Build(A->getDomainMap()); // mark column ids to be already in use
350  Teuchos::ArrayRCP< Scalar > RowIdStatusArray = RowIdStatus->getDataNonConst(0);
351  Teuchos::ArrayRCP< Scalar > ColIdStatusArray = ColIdStatus->getDataNonConst(0);
352  Teuchos::ArrayRCP< Scalar > lColIdStatusArray = lColIdStatus->getDataNonConst(0);
353  Teuchos::ArrayRCP< Scalar > ColIdUsedArray = ColIdUsed->getDataNonConst(0); // not sure about this
354  RowIdStatus->putScalar(0.0);
355  ColIdStatus->putScalar(0.0);
356  lColIdStatus->putScalar(0.0);
357  ColIdUsed->putScalar(0.0); // no column ids are used
358 
359  // count wide-range permutations
360  // a wide-range permutation is defined as a permutation of rows/columns which do not
361  // belong to the same node
362  LocalOrdinal lWideRangeRowPermutations = 0;
363  GlobalOrdinal gWideRangeRowPermutations = 0;
364  LocalOrdinal lWideRangeColPermutations = 0;
365  GlobalOrdinal gWideRangeColPermutations = 0;
366 
367  // run 1: mark all "identity" permutations
368  typename std::vector<std::pair<GlobalOrdinal, GlobalOrdinal> >::iterator p = RowColPairs.begin();
369  while(p != RowColPairs.end() )
370  {
371  GlobalOrdinal ik = (*p).first;
372  GlobalOrdinal jk = (*p).second;
373 
374  LocalOrdinal lik = A->getRowMap()->getLocalElement(ik);
375  LocalOrdinal ljk = A->getColMap()->getLocalElement(jk);
376 
377  if(RowIdStatusArray[lik] == 0.0) {
378  RowIdStatusArray[lik] = 1.0; // use this row id
379  lColIdStatusArray[ljk] = 1.0; // use this column id
380  Pperm->replaceLocalValue(lik, ik);
381  lQperm->replaceLocalValue(ljk, ik); // use column map
382  ColIdUsed->replaceGlobalValue(ik,1.0); // ik is now used
383  p = RowColPairs.erase(p);
384 
385  // detect wide range permutations
386  if(floor(ik/nDofsPerNode) != floor(jk/nDofsPerNode)) {
387  lWideRangeColPermutations++;
388  }
389  }
390  else
391  p++;
392  }
393 
394  // communicate column map -> domain map
395  Qperm->doExport(*lQperm,*QpermExporter,Xpetra::ABSMAX);
396  ColIdStatus->doExport(*lColIdStatus,*QpermExporter,Xpetra::ABSMAX);
397 
398  // plausibility check
399  if(RowColPairs.size()>0) GetOStream(Warnings0) << "MueLu::PermutationFactory: There are Row/Col pairs left!!!" << std::endl; // TODO fix me
400 
401  // close Pperm
402 
403  // count, how many row permutations are missing on current proc
404  size_t cntFreeRowIdx = 0;
405  std::queue<GlobalOrdinal> qFreeGRowIdx; // store global row ids of "free" rows
406  for (size_t lik = 0; lik < RowIdStatus->getLocalLength(); ++lik) {
407  if(RowIdStatusArray[lik] == 0.0) {
408  cntFreeRowIdx++;
409  qFreeGRowIdx.push(RowIdStatus->getMap()->getGlobalElement(lik));
410  }
411  }
412 
413  // fix Pperm
414  for (size_t lik = 0; lik < RowIdStatus->getLocalLength(); ++lik) {
415  if(RowIdStatusArray[lik] == 0.0) {
416  RowIdStatusArray[lik] = 1.0; // use this row id
417  Pperm->replaceLocalValue(lik, qFreeGRowIdx.front());
418  // detect wide range permutations
419  if(floor(qFreeGRowIdx.front()/nDofsPerNode) != floor(RowIdStatus->getMap()->getGlobalElement(lik)/nDofsPerNode)) {
420  lWideRangeRowPermutations++;
421  }
422  qFreeGRowIdx.pop();
423  }
424  }
425 
426  // close Qperm (free permutation entries in Qperm)
427  size_t cntFreeColIdx = 0;
428  std::queue<GlobalOrdinal> qFreeGColIdx; // store global column ids of "free" available columns
429  for (size_t ljk = 0; ljk < ColIdStatus->getLocalLength(); ++ljk) {
430  if(ColIdStatusArray[ljk] == 0.0) {
431  cntFreeColIdx++;
432  qFreeGColIdx.push(ColIdStatus->getMap()->getGlobalElement(ljk));
433  }
434  }
435 
436  size_t cntUnusedColIdx = 0;
437  std::queue<GlobalOrdinal> qUnusedGColIdx; // store global column ids of "free" available columns
438  for (size_t ljk = 0; ljk < ColIdUsed->getLocalLength(); ++ljk) {
439  if(ColIdUsedArray[ljk] == 0.0) {
440  cntUnusedColIdx++;
441  qUnusedGColIdx.push(ColIdUsed->getMap()->getGlobalElement(ljk));
442  }
443  }
444 
445  // fix Qperm with local entries
446  for (size_t ljk = 0; ljk < ColIdStatus->getLocalLength(); ++ljk) {
447  // stop if no (local) unused column idx are left
448  if(cntUnusedColIdx == 0) break;
449 
450  if(ColIdStatusArray[ljk] == 0.0) {
451  ColIdStatusArray[ljk] = 1.0; // use this row id
452  Qperm->replaceLocalValue(ljk, qUnusedGColIdx.front()); // loop over ColIdStatus (lives on domain map)
453  ColIdUsed->replaceGlobalValue(qUnusedGColIdx.front(),1.0); // ljk is now used, too
454  // detect wide range permutations
455  if(floor(qUnusedGColIdx.front()/nDofsPerNode) != floor(ColIdStatus->getMap()->getGlobalElement(ljk)/nDofsPerNode)) {
456  lWideRangeColPermutations++;
457  }
458  qUnusedGColIdx.pop();
459  cntUnusedColIdx--;
460  cntFreeColIdx--;
461  }
462  }
463 
464  //Qperm->doExport(*lQperm,*QpermExporter,Xpetra::ABSMAX); // no export necessary, since changes only locally
465  //ColIdStatus->doExport(*lColIdStatus,*QpermExporter,Xpetra::ABSMAX);
466 
467  // count, how many unused column idx are needed on current processor
468  // to complete Qperm
469  cntFreeColIdx = 0;
470  for (size_t ljk = 0; ljk < ColIdStatus->getLocalLength(); ++ljk) { // TODO avoid this loop
471  if(ColIdStatusArray[ljk] == 0.0) {
472  cntFreeColIdx++;
473  }
474  }
475 
476  GlobalOrdinal global_cntFreeColIdx = 0;
477  LocalOrdinal local_cntFreeColIdx = cntFreeColIdx;
478  MueLu_sumAll(comm, Teuchos::as<GlobalOrdinal>(local_cntFreeColIdx), global_cntFreeColIdx);
479 #ifdef DEBUG_OUTPUT
480  std::cout << "global # of empty column idx entries in Qperm: " << global_cntFreeColIdx << std::endl;
481 #endif
482 
483  // avoid global communication if possible
484  if(global_cntFreeColIdx > 0) {
485 
486  // 1) count how many unused column ids are left
487  GlobalOrdinal global_cntUnusedColIdx = 0;
488  LocalOrdinal local_cntUnusedColIdx = cntUnusedColIdx;
489  MueLu_sumAll(comm, Teuchos::as<GlobalOrdinal>(local_cntUnusedColIdx), global_cntUnusedColIdx);
490 #ifdef DEBUG_OUTPUT
491  std::cout << "global # of unused column idx: " << global_cntUnusedColIdx << std::endl;
492 #endif
493 
494  // 2) communicate how many unused column ids are available on procs
495  std::vector<LocalOrdinal> local_UnusedColIdxOnProc (numProcs);
496  std::vector<LocalOrdinal> global_UnusedColIdxOnProc(numProcs);
497  local_UnusedColIdxOnProc[myRank] = local_cntUnusedColIdx;
498  Teuchos::reduceAll(*comm, Teuchos::REDUCE_MAX, numProcs, &local_UnusedColIdxOnProc[0], &global_UnusedColIdxOnProc[0]);
499 
500 #ifdef DEBUG_OUTPUT
501  std::cout << "PROC " << myRank << " global num unused indices per proc: ";
502  for (size_t ljk = 0; ljk < global_UnusedColIdxOnProc.size(); ++ljk) {
503  std::cout << " " << global_UnusedColIdxOnProc[ljk];
504  }
505  std::cout << std::endl;
506 #endif
507 
508  // 3) build array of length global_cntUnusedColIdx to globally replicate unused column idx
509  std::vector<GlobalOrdinal> local_UnusedColIdxVector(Teuchos::as<size_t>(global_cntUnusedColIdx));
510  std::vector<GlobalOrdinal> global_UnusedColIdxVector(Teuchos::as<size_t>(global_cntUnusedColIdx));
511  GlobalOrdinal global_cntUnusedColIdxStartIter = 0;
512  for(int proc=0; proc<myRank; proc++) {
513  global_cntUnusedColIdxStartIter += global_UnusedColIdxOnProc[proc];
514  }
515  for(GlobalOrdinal k = global_cntUnusedColIdxStartIter; k < global_cntUnusedColIdxStartIter+local_cntUnusedColIdx; k++) {
516  local_UnusedColIdxVector[k] = qUnusedGColIdx.front();
517  qUnusedGColIdx.pop();
518  }
519  Teuchos::reduceAll(*comm, Teuchos::REDUCE_MAX, Teuchos::as<int>(global_cntUnusedColIdx), &local_UnusedColIdxVector[0], &global_UnusedColIdxVector[0]);
520 #ifdef DEBUG_OUTPUT
521  std::cout << "PROC " << myRank << " global UnusedGColIdx: ";
522  for (size_t ljk = 0; ljk < global_UnusedColIdxVector.size(); ++ljk) {
523  std::cout << " " << global_UnusedColIdxVector[ljk];
524  }
525  std::cout << std::endl;
526 #endif
527 
528 
529 
530  // 4) communicate, how many column idx are needed on each processor
531  // to complete Qperm
532  std::vector<LocalOrdinal> local_EmptyColIdxOnProc (numProcs);
533  std::vector<LocalOrdinal> global_EmptyColIdxOnProc(numProcs);
534  local_EmptyColIdxOnProc[myRank] = local_cntFreeColIdx;
535  Teuchos::reduceAll(*comm, Teuchos::REDUCE_MAX, numProcs, &local_EmptyColIdxOnProc[0], &global_EmptyColIdxOnProc[0]);
536 
537 #ifdef DEBUG_OUTPUT
538  std::cout << "PROC " << myRank << " global num of needed column indices: ";
539  for (size_t ljk = 0; ljk < global_EmptyColIdxOnProc.size(); ++ljk) {
540  std::cout << " " << global_EmptyColIdxOnProc[ljk];
541  }
542  std::cout << std::endl;
543 #endif
544 
545  // 5) determine first index in global_UnusedColIdxVector for unused column indices,
546  // that are marked to be used by this processor
547  GlobalOrdinal global_UnusedColStartIdx = 0;
548  for(int proc=0; proc<myRank; proc++) {
549  global_UnusedColStartIdx += global_EmptyColIdxOnProc[proc];
550  }
551 
552 #ifdef DEBUG_OUTPUT
553  GetOStream(Statistics0) << "PROC " << myRank << " is allowd to use the following column gids: ";
554  for(GlobalOrdinal k = global_UnusedColStartIdx; k < global_UnusedColStartIdx + Teuchos::as<GlobalOrdinal>(cntFreeColIdx); k++) {
555  GetOStream(Statistics0) << global_UnusedColIdxVector[k] << " ";
556  }
557  GetOStream(Statistics0) << std::endl;
558 #endif
559 
560  // 6.) fix Qperm with global entries
561  GlobalOrdinal array_iter = 0;
562  for (size_t ljk = 0; ljk < ColIdStatus->getLocalLength(); ++ljk) {
563 
564  if(ColIdStatusArray[ljk] == 0.0) {
565  ColIdStatusArray[ljk] = 1.0; // use this row id
566  Qperm->replaceLocalValue(ljk, global_UnusedColIdxVector[global_UnusedColStartIdx + array_iter]);
567  ColIdUsed->replaceGlobalValue(global_UnusedColIdxVector[global_UnusedColStartIdx + array_iter],1.0);
568  // detect wide range permutations
569  if(floor(global_UnusedColIdxVector[global_UnusedColStartIdx + array_iter]/nDofsPerNode) != floor(ColIdStatus->getMap()->getGlobalElement(ljk)/nDofsPerNode)) {
570  lWideRangeColPermutations++;
571  }
572  array_iter++;
573  //cntUnusedColIdx--; // check me
574  }
575  }
576  } // end if global_cntFreeColIdx > 0
578 
579 
580  // create new empty Matrix
581  Teuchos::RCP<CrsMatrixWrap> permPTmatrix = Teuchos::rcp(new CrsMatrixWrap(A->getRowMap(),1,Xpetra::StaticProfile));
582  Teuchos::RCP<CrsMatrixWrap> permQTmatrix = Teuchos::rcp(new CrsMatrixWrap(A->getRowMap(),1,Xpetra::StaticProfile));
583 
584  for(size_t row=0; row<A->getNodeNumRows(); row++) {
585  Teuchos::ArrayRCP<GlobalOrdinal> indoutP(1,Teuchos::as<GO>(PpermData[row])); // column idx for Perm^T
586  Teuchos::ArrayRCP<GlobalOrdinal> indoutQ(1,Teuchos::as<GO>(QpermData[row])); // column idx for Qperm
587  Teuchos::ArrayRCP<Scalar> valout(1,1.0);
588  permPTmatrix->insertGlobalValues(A->getRowMap()->getGlobalElement(row), indoutP.view(0,indoutP.size()), valout.view(0,valout.size()));
589  permQTmatrix->insertGlobalValues (A->getRowMap()->getGlobalElement(row), indoutQ.view(0,indoutQ.size()), valout.view(0,valout.size()));
590  }
591 
592  permPTmatrix->fillComplete();
593  permQTmatrix->fillComplete();
594 
595  Teuchos::RCP<Matrix> permPmatrix = Utils2::Transpose(*permPTmatrix, true);
596 
597  for(size_t row=0; row<permPTmatrix->getNodeNumRows(); row++) {
598  if(permPTmatrix->getNumEntriesInLocalRow(row) != 1)
599  GetOStream(Warnings0) <<"#entries in row " << row << " of permPTmatrix is " << permPTmatrix->getNumEntriesInLocalRow(row) << std::endl;
600  if(permPmatrix->getNumEntriesInLocalRow(row) != 1)
601  GetOStream(Warnings0) <<"#entries in row " << row << " of permPmatrix is " << permPmatrix->getNumEntriesInLocalRow(row) << std::endl;
602  if(permQTmatrix->getNumEntriesInLocalRow(row) != 1)
603  GetOStream(Warnings0) <<"#entries in row " << row << " of permQmatrix is " << permQTmatrix->getNumEntriesInLocalRow(row) << std::endl;
604  }
605 
606  // build permP * A * permQT
607  Teuchos::RCP<Matrix> ApermQt = Xpetra::MatrixMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>::Multiply(*A, false, *permQTmatrix, false, GetOStream(Statistics2));
608  Teuchos::RCP<Matrix> permPApermQt = Xpetra::MatrixMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>::Multiply(*permPmatrix, false, *ApermQt, false, GetOStream(Statistics2));
609 
610  /*
611  MueLu::Utils<Scalar, LocalOrdinal, GlobalOrdinal, Node>::Write("A.mat", *A);
612  MueLu::Utils<Scalar, LocalOrdinal, GlobalOrdinal, Node>::Write("permP.mat", *permPmatrix);
613  MueLu::Utils<Scalar, LocalOrdinal, GlobalOrdinal, Node>::Write("permQt.mat", *permQTmatrix);
614  MueLu::Utils<Scalar, LocalOrdinal, GlobalOrdinal, Node>::Write("permPApermQt.mat", *permPApermQt);
615  */
616  // build scaling matrix
617  Teuchos::RCP<Vector> diagVec = VectorFactory::Build(permPApermQt->getRowMap(),true);
618  Teuchos::RCP<Vector> invDiagVec = VectorFactory::Build(permPApermQt->getRowMap(),true);
619  Teuchos::ArrayRCP< const Scalar > diagVecData = diagVec->getData(0);
620  Teuchos::ArrayRCP< Scalar > invDiagVecData = invDiagVec->getDataNonConst(0);
621 
622  permPApermQt->getLocalDiagCopy(*diagVec);
623  for(size_t i = 0; i<diagVec->getMap()->getNodeNumElements(); ++i) {
624  if(diagVecData[i] != 0.0)
625  invDiagVecData[i] = 1/diagVecData[i];
626  else {
627  invDiagVecData[i] = 1.0;
628  GetOStream(Statistics0) << "MueLu::PermutationFactory: found zero on diagonal in row " << i << std::endl;
629  }
630  }
631 
632  Teuchos::RCP<CrsMatrixWrap> diagScalingOp = Teuchos::rcp(new CrsMatrixWrap(permPApermQt->getRowMap(),1,Xpetra::StaticProfile));
633 
634  for(size_t row=0; row<A->getNodeNumRows(); row++) {
635  Teuchos::ArrayRCP<GlobalOrdinal> indout(1,permPApermQt->getRowMap()->getGlobalElement(row)); // column idx for Perm^T
636  Teuchos::ArrayRCP<Scalar> valout(1,invDiagVecData[row]);
637  diagScalingOp->insertGlobalValues(A->getRowMap()->getGlobalElement(row), indout.view(0,indout.size()), valout.view(0,valout.size()));
638  }
639  diagScalingOp->fillComplete();
640 
641  Teuchos::RCP<Matrix> scaledA = Xpetra::MatrixMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>::Multiply(*diagScalingOp, false, *permPApermQt, false, GetOStream(Statistics2));
642  currentLevel.Set("A", Teuchos::rcp_dynamic_cast<Matrix>(scaledA), genFactory/*this*/);
643 
644  currentLevel.Set("permA", Teuchos::rcp_dynamic_cast<Matrix>(permPApermQt), genFactory/*this*/); // TODO careful with this!!!
645  currentLevel.Set("permP", Teuchos::rcp_dynamic_cast<Matrix>(permPmatrix), genFactory/*this*/);
646  currentLevel.Set("permQT", Teuchos::rcp_dynamic_cast<Matrix>(permQTmatrix), genFactory/*this*/);
647  currentLevel.Set("permScaling", Teuchos::rcp_dynamic_cast<Matrix>(diagScalingOp), genFactory/*this*/);
648 
650  // count zeros on diagonal in P -> number of row permutations
651  Teuchos::RCP<Vector> diagPVec = VectorFactory::Build(permPmatrix->getRowMap(),true);
652  permPmatrix->getLocalDiagCopy(*diagPVec);
653  Teuchos::ArrayRCP< const Scalar > diagPVecData = diagPVec->getData(0);
654  LocalOrdinal lNumRowPermutations = 0;
655  GlobalOrdinal gNumRowPermutations = 0;
656  for(size_t i = 0; i<diagPVec->getMap()->getNodeNumElements(); ++i) {
657  if(diagPVecData[i] == 0.0) {
658  lNumRowPermutations++;
659  }
660  }
661 
662  // sum up all entries in multipleColRequests over all processors
663  MueLu_sumAll(diagPVec->getMap()->getComm(), Teuchos::as<GlobalOrdinal>(lNumRowPermutations), gNumRowPermutations);
664 
666  // count zeros on diagonal in Q^T -> number of column permutations
667  Teuchos::RCP<Vector> diagQTVec = VectorFactory::Build(permQTmatrix->getRowMap(),true);
668  permQTmatrix->getLocalDiagCopy(*diagQTVec);
669  Teuchos::ArrayRCP< const Scalar > diagQTVecData = diagQTVec->getData(0);
670  LocalOrdinal lNumColPermutations = 0;
671  GlobalOrdinal gNumColPermutations = 0;
672  for(size_t i = 0; i<diagQTVec->getMap()->getNodeNumElements(); ++i) {
673  if(diagQTVecData[i] == 0.0) {
674  lNumColPermutations++;
675  }
676  }
677 
678  // sum up all entries in multipleColRequests over all processors
679  MueLu_sumAll(diagQTVec->getMap()->getComm(), Teuchos::as<GlobalOrdinal>(lNumColPermutations), gNumColPermutations);
680 
681  currentLevel.Set("#RowPermutations", gNumRowPermutations, genFactory/*this*/);
682  currentLevel.Set("#ColPermutations", gNumColPermutations, genFactory/*this*/);
683  currentLevel.Set("#WideRangeRowPermutations", gWideRangeRowPermutations, genFactory/*this*/);
684  currentLevel.Set("#WideRangeColPermutations", gWideRangeColPermutations, genFactory/*this*/);
685 
686  GetOStream(Statistics0) << "#Row permutations/max possible permutations: " << gNumRowPermutations << "/" << diagPVec->getMap()->getGlobalNumElements() << std::endl;
687  GetOStream(Statistics0) << "#Column permutations/max possible permutations: " << gNumColPermutations << "/" << diagQTVec->getMap()->getGlobalNumElements() << std::endl;
688  GetOStream(Runtime1) << "#wide range row permutations: " << gWideRangeRowPermutations << " #wide range column permutations: " << gWideRangeColPermutations << std::endl;
689 
690 #endif // #ifndef HAVE_MUELU_INST_COMPLEX_INT_INT
691 
692 
693  }
694 
695 } // namespace MueLu
696 
697 #endif /* MUELU_ALGEBRAICPERMUTATIONSTRATEGY_DEF_HPP_ */
Important warning messages (one line)
static RCP< Matrix > Transpose(Matrix &Op, bool optimizeTranspose=false, const std::string &label=std::string())
Transpose a Xpetra::Matrix.
void sortingPermutation(const std::vector< Scalar > &values, std::vector< LocalOrdinal > &v)
One-liner description of what is happening.
Namespace for MueLu classes and methods.
Print even more statistics.
Base class for factories (e.g., R, P, and A_coarse).
Print statistics that do not involve significant additional computation.
#define MueLu_sumAll(rcpComm, in, out)
Class that holds all level-specific information.
Definition: MueLu_Level.hpp:99
void Set(const std::string &ename, const T &entry, const FactoryBase *factory=NoFactory::get())
void BuildPermutation(const Teuchos::RCP< Matrix > &A, const Teuchos::RCP< const Map > permRowMap, Level &currentLevel, const FactoryBase *genFactory) const
build permutation operators
Exception throws to report errors in the internal logical of the program.
Description of what is happening (more verbose)