46 #ifndef MUELU_REPARTITIONFACTORY_DEF_HPP 47 #define MUELU_REPARTITIONFACTORY_DEF_HPP 56 #include <Teuchos_DefaultMpiComm.hpp> 57 #include <Teuchos_CommHelpers.hpp> 59 #include <Xpetra_Map.hpp> 60 #include <Xpetra_MapFactory.hpp> 61 #include <Xpetra_VectorFactory.hpp> 62 #include <Xpetra_Import.hpp> 63 #include <Xpetra_ImportFactory.hpp> 64 #include <Xpetra_Export.hpp> 65 #include <Xpetra_ExportFactory.hpp> 66 #include <Xpetra_Matrix.hpp> 67 #include <Xpetra_MatrixFactory.hpp> 69 #include "MueLu_Utilities.hpp" 77 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
79 RCP<ParameterList> validParamList = rcp(
new ParameterList());
81 #define SET_VALID_ENTRY(name) validParamList->setEntry(name, MasterList::getEntry(name)) 88 #undef SET_VALID_ENTRY 90 validParamList->set< RCP<const FactoryBase> >(
"A", Teuchos::null,
"Factory of the matrix A");
91 validParamList->set< RCP<const FactoryBase> >(
"Partition", Teuchos::null,
"Factory of the partition");
93 return validParamList;
96 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
98 Input(currentLevel,
"A");
99 Input(currentLevel,
"Partition");
107 #ifdef HAVE_XPETRA_INT_LONG_LONG 108 template<>
class MpiTypeTraits<long long> {
public:
static MPI_Datatype
getType() {
return MPI_LONG_LONG; } };
111 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
115 const Teuchos::ParameterList & pL = GetParameterList();
118 const int startLevel = pL.get<
int> (
"repartition: start level");
119 const LO minRowsPerProcessor = pL.get<LO> (
"repartition: min rows per proc");
120 const double nonzeroImbalance = pL.get<
double>(
"repartition: max imbalance");
121 const bool remapPartitions = pL.get<
bool> (
"repartition: remap parts");
124 RCP<Matrix> A = Get< RCP<Matrix> >(currentLevel,
"A");
134 if (currentLevel.GetLevelID() < startLevel) {
135 GetOStream(
Statistics0) <<
"Repartitioning? NO:" <<
137 ", first level where repartitioning can happen is " +
Teuchos::toString(startLevel) << std::endl;
139 Set<RCP<const Import> >(currentLevel,
"Importer", Teuchos::null);
143 RCP<const Map> rowMap = A->getRowMap();
148 RCP<const Teuchos::Comm<int> > origComm = rowMap->getComm();
149 RCP<const Teuchos::Comm<int> > comm = origComm->duplicate();
155 int numActiveProcesses = 0;
156 MueLu_sumAll(comm, Teuchos::as<int>((A->getNodeNumRows() > 0) ? 1 : 0), numActiveProcesses);
158 if (numActiveProcesses == 1) {
159 GetOStream(
Statistics0) <<
"Repartitioning? NO:" <<
160 "\n # processes with rows = " <<
Teuchos::toString(numActiveProcesses) << std::endl;
162 Set<RCP<const Import> >(currentLevel,
"Importer", Teuchos::null);
167 bool test3 =
false, test4 =
false;
168 std::string msg3, msg4;
172 if (minRowsPerProcessor > 0) {
173 LO numMyRows = Teuchos::as<LO>(A->getNodeNumRows()), minNumRows, LOMAX = Teuchos::OrdinalTraits<LO>::max();
174 LO haveFewRows = (numMyRows < minRowsPerProcessor ? 1 : 0), numWithFewRows = 0;
176 MueLu_minAll(comm, (numMyRows > 0 ? numMyRows : LOMAX), minNumRows);
181 if (numWithFewRows > 0)
189 GO minNnz, maxNnz, numMyNnz = Teuchos::as<GO>(A->getNodeNumEntries());
191 MueLu_minAll(comm, (numMyNnz > 0 ? numMyNnz : maxNnz), minNnz);
192 double imbalance = Teuchos::as<double>(maxNnz)/minNnz;
194 if (imbalance > nonzeroImbalance)
200 if (!test3 && !test4) {
201 GetOStream(
Statistics0) <<
"Repartitioning? NO:" << msg3 + msg4 << std::endl;
203 Set<RCP<const Import> >(currentLevel,
"Importer", Teuchos::null);
207 GetOStream(
Statistics0) <<
"Repartitioning? YES:" << msg3 + msg4 << std::endl;
209 GO indexBase = rowMap->getIndexBase();
210 Xpetra::UnderlyingLib lib = rowMap->lib();
211 int myRank = comm->getRank();
212 int numProcs = comm->getSize();
214 RCP<const Teuchos::MpiComm<int> > tmpic = rcp_dynamic_cast<
const Teuchos::MpiComm<int> >(comm);
215 TEUCHOS_TEST_FOR_EXCEPTION(tmpic == Teuchos::null,
Exceptions::RuntimeError,
"Cannot cast base Teuchos::Comm to Teuchos::MpiComm object.");
216 RCP<const Teuchos::OpaqueWrapper<MPI_Comm> > rawMpiComm = tmpic->getRawMpiComm();
224 if (currentLevel.IsAvailable(
"number of partitions")) {
225 numPartitions = currentLevel.Get<GO>(
"number of partitions");
226 GetOStream(
Warnings0) <<
"Using user-provided \"number of partitions\", the performance is unknown" << std::endl;
229 if (Teuchos::as<GO>(A->getGlobalNumRows()) < minRowsPerProcessor) {
235 numPartitions = A->getGlobalNumRows() / minRowsPerProcessor;
237 numPartitions = std::min(numPartitions, Teuchos::as<GO>(numProcs));
239 currentLevel.Set(
"number of partitions", numPartitions,
NoFactory::get());
241 GetOStream(
Statistics0) <<
"Number of partitions to use = " << numPartitions << std::endl;
246 RCP<GOVector> decomposition;
247 if (numPartitions == 1) {
252 GetOStream(
Warnings0) <<
"Only one partition: Skip call to the repartitioner." << std::endl;
253 decomposition = Xpetra::VectorFactory<GO, LO, GO, NO>::Build(A->getRowMap(),
true);
256 decomposition = Get<RCP<GOVector> >(currentLevel,
"Partition");
258 if (decomposition.is_null()) {
259 GetOStream(
Warnings0) <<
"No repartitioning necessary: partitions were left unchanged by the repartitioner" << std::endl;
260 Set<RCP<const Import> >(currentLevel,
"Importer", Teuchos::null);
302 if (remapPartitions) {
305 DeterminePartitionPlacement(*A, *decomposition, numPartitions);
316 ArrayRCP<const GO> decompEntries;
317 if (decomposition->getLocalLength() > 0)
318 decompEntries = decomposition->getData(0);
320 #ifdef HAVE_MUELU_DEBUG 322 int incorrectRank = -1;
323 for (
int i = 0; i < decompEntries.size(); i++)
324 if (decompEntries[i] >= numProcs || decompEntries[i] < 0) {
325 incorrectRank = myRank;
329 int incorrectGlobalRank = -1;
335 myGIDs.reserve(decomposition->getLocalLength());
340 typedef std::map<GO, Array<GO> > map_type;
342 for (LO i = 0; i < decompEntries.size(); i++) {
343 GO
id = decompEntries[i];
344 GO GID = rowMap->getGlobalElement(i);
347 myGIDs .push_back(GID);
349 sendMap[id].push_back(GID);
351 decompEntries = Teuchos::null;
354 GO numLocalKept = myGIDs.size(), numGlobalKept, numGlobalRows = A->getGlobalNumRows();
356 GetOStream(
Statistics2) <<
"Unmoved rows: " << numGlobalKept <<
" / " << numGlobalRows <<
" (" << 100*Teuchos::as<double>(numGlobalKept)/numGlobalRows <<
"%)" << std::endl;
359 int numSend = sendMap.size(), numRecv;
362 Array<GO> myParts(numSend), myPart(1);
365 for (
typename map_type::const_iterator it = sendMap.begin(); it != sendMap.end(); it++)
366 myParts[cnt++] = it->first;
370 GO partsIndexBase = 0;
371 RCP<Map> partsIHave = MapFactory ::Build(lib, Teuchos::OrdinalTraits<Xpetra::global_size_t>::invalid(), myParts(), partsIndexBase, comm);
372 RCP<Map> partsIOwn = MapFactory ::Build(lib, numProcs, myPart(), partsIndexBase, comm);
373 RCP<Export> partsExport = ExportFactory::Build(partsIHave, partsIOwn);
375 RCP<GOVector> partsISend = Xpetra::VectorFactory<GO, LO, GO, NO>::Build(partsIHave);
376 RCP<GOVector> numPartsIRecv = Xpetra::VectorFactory<GO, LO, GO, NO>::Build(partsIOwn);
378 ArrayRCP<GO> partsISendData = partsISend->getDataNonConst(0);
379 for (
int i = 0; i < numSend; i++)
380 partsISendData[i] = 1;
382 (numPartsIRecv->getDataNonConst(0))[0] = 0;
384 numPartsIRecv->doExport(*partsISend, *partsExport, Xpetra::ADD);
385 numRecv = (numPartsIRecv->getData(0))[0];
392 Array<MPI_Request> sendReqs(numSend);
394 for (
typename map_type::iterator it = sendMap.begin(); it != sendMap.end(); it++)
395 MPI_Isend(static_cast<void*>(it->second.getRawPtr()), it->second.size(), MpiType, Teuchos::as<GO>(it->first), msgTag, *rawMpiComm, &sendReqs[cnt++]);
398 size_t totalGIDs = myGIDs.size();
399 for (
int i = 0; i < numRecv; i++) {
401 MPI_Probe(MPI_ANY_SOURCE, msgTag, *rawMpiComm, &status);
404 int fromRank = status.MPI_SOURCE, count;
405 MPI_Get_count(&status, MpiType, &count);
407 recvMap[fromRank].resize(count);
408 MPI_Recv(static_cast<void*>(recvMap[fromRank].getRawPtr()), count, MpiType, fromRank, msgTag, *rawMpiComm, &status);
415 Array<MPI_Status> sendStatuses(numSend);
416 MPI_Waitall(numSend, sendReqs.getRawPtr(), sendStatuses.getRawPtr());
420 myGIDs.reserve(totalGIDs);
421 for (
typename map_type::const_iterator it = recvMap.begin(); it != recvMap.end(); it++) {
422 int offset = myGIDs.size(), len = it->second.size();
424 myGIDs.resize(offset + len);
425 memcpy(myGIDs.getRawPtr() + offset, it->second.getRawPtr(), len*
sizeof(GO));
430 std::sort(myGIDs.begin(), myGIDs.end());
433 RCP<Map> newRowMap = MapFactory ::Build(lib, rowMap->getGlobalNumElements(), myGIDs(), indexBase, origComm);
434 RCP<const Import> rowMapImporter;
437 rowMapImporter = ImportFactory::Build(rowMap, newRowMap);
440 Set(currentLevel,
"Importer", rowMapImporter);
445 if (pL.get<
bool>(
"repartition: print partition distribution") && IsPrint(
Statistics2)) {
447 GetOStream(
Statistics2) <<
"Partition distribution over cores (ownership is indicated by '+')" << std::endl;
449 char amActive = (myGIDs.size() ? 1 : 0);
450 std::vector<char> areActive(numProcs, 0);
451 MPI_Gather(&amActive, 1, MPI_CHAR, &areActive[0], 1, MPI_CHAR, 0, *rawMpiComm);
453 int rowWidth = std::min(Teuchos::as<int>(ceil(sqrt(numProcs))), 100);
454 for (
int proc = 0; proc < numProcs; proc += rowWidth) {
455 for (
int j = 0; j < rowWidth; j++)
456 if (proc + j < numProcs)
457 GetOStream(
Statistics2) << (areActive[proc + j] ?
"+" :
".");
461 GetOStream(
Statistics2) <<
" " << proc <<
":" << std::min(proc + rowWidth, numProcs) - 1 << std::endl;
468 template<
typename T,
typename W>
473 template<
typename T,
typename W>
478 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
481 RCP<const Map> rowMap = A.getRowMap();
483 RCP<const Teuchos::Comm<int> > comm = rowMap->getComm()->duplicate();
484 int numProcs = comm->getSize();
486 RCP<const Teuchos::MpiComm<int> > tmpic = rcp_dynamic_cast<
const Teuchos::MpiComm<int> >(comm);
487 TEUCHOS_TEST_FOR_EXCEPTION(tmpic == Teuchos::null,
Exceptions::RuntimeError,
"Cannot cast base Teuchos::Comm to Teuchos::MpiComm object.");
488 RCP<const Teuchos::OpaqueWrapper<MPI_Comm> > rawMpiComm = tmpic->getRawMpiComm();
490 const Teuchos::ParameterList& pL = GetParameterList();
496 const int maxLocal = pL.get<
int>(
"repartition: remap num values");
497 const int dataSize = 2*maxLocal;
499 ArrayRCP<GO> decompEntries;
500 if (decomposition.getLocalLength() > 0)
501 decompEntries = decomposition.getDataNonConst(0);
513 std::map<GO,GO> lEdges;
514 for (LO i = 0; i < decompEntries.size(); i++)
515 lEdges[decompEntries[i]] += A.getNumEntriesInLocalRow(i);
519 std::multimap<GO,GO> revlEdges;
520 for (
typename std::map<GO,GO>::const_iterator it = lEdges.begin(); it != lEdges.end(); it++)
521 revlEdges.insert(std::make_pair(it->second, it->first));
526 Array<GO> lData(dataSize, -1), gData(numProcs * dataSize);
528 for (
typename std::multimap<GO,GO>::reverse_iterator rit = revlEdges.rbegin(); rit != revlEdges.rend() && numEdges < maxLocal; rit++) {
529 lData[2*numEdges+0] = rit->second;
530 lData[2*numEdges+1] = rit->first;
537 MPI_Allgather(static_cast<void*>(lData.getRawPtr()), dataSize, MpiType, static_cast<void*>(gData.getRawPtr()), dataSize, MpiType, *rawMpiComm);
542 std::vector<Triplet<int,int> > gEdges(numProcs * maxLocal);
544 for (LO i = 0; i < gData.size(); i += 2) {
545 GO part = gData[i+0];
546 GO weight = gData[i+1];
548 gEdges[k].i = i/dataSize;
550 gEdges[k].v = weight;
558 std::sort(gEdges.begin(), gEdges.end(), compareTriplets<int,int>);
561 std::map<int,int> match;
562 std::vector<char> matchedRanks(numProcs, 0), matchedParts(numProcs, 0);
564 for (
typename std::vector<
Triplet<int,int> >::const_iterator it = gEdges.begin(); it != gEdges.end(); it++) {
567 if (matchedRanks[rank] == 0 && matchedParts[part] == 0) {
568 matchedRanks[rank] = 1;
569 matchedParts[part] = 1;
574 GetOStream(
Statistics0) <<
"Number of unassigned paritions before cleanup stage: " << (numPartitions - numMatched) <<
" / " << numPartitions << std::endl;
579 for (
int part = 0, matcher = 0; part < numProcs; part++)
580 if (match.count(part) == 0) {
582 while (matchedRanks[matcher])
585 match[part] = matcher++;
589 for (LO i = 0; i < decompEntries.size(); i++)
590 decompEntries[i] = match[decompEntries[i]];
595 #endif //ifdef HAVE_MPI 597 #endif // MUELU_REPARTITIONFACTORY_DEF_HPP Important warning messages (one line)
void Build(Level ¤tLevel) const
Build an object with this factory.
#define MueLu_maxAll(rcpComm, in, out)
static MPI_Datatype getType()
std::string toString(const T &what)
Little helper function to convert non-string types to strings.
Timer to be used in factories. Similar to Monitor but with additional timers.
static MPI_Datatype getType()
Namespace for MueLu classes and methods.
#define SET_VALID_ENTRY(name)
static const NoFactory * get()
Print even more statistics.
static MPI_Datatype getType()
Print statistics that do not involve significant additional computation.
static bool compareTriplets(const Triplet< T, W > &a, const Triplet< T, W > &b)
#define MueLu_sumAll(rcpComm, in, out)
Class that holds all level-specific information.
Timer to be used in factories. Similar to SubMonitor but adds a timer level by level.
static MPI_Datatype getType()
void DeclareInput(Level ¤tLevel) const
Determines the data that RepartitionFactory needs, and the factories that generate that data...
static MPI_Datatype getType()
Exception throws to report errors in the internal logical of the program.
RCP< const ParameterList > GetValidParameterList() const
Return a const parameter list of valid parameters that setParameterList() will accept.
void DeterminePartitionPlacement(const Matrix &A, GOVector &decomposition, GO numPartitions) const
Determine which process should own each partition.
#define MueLu_minAll(rcpComm, in, out)