42 #include "Tpetra_Distributor.hpp" 43 #include "Teuchos_StandardParameterEntryValidators.hpp" 44 #include "Teuchos_VerboseObjectParameterListHelpers.hpp" 52 if (sendType == DISTRIBUTOR_ISEND) {
55 else if (sendType == DISTRIBUTOR_RSEND) {
58 else if (sendType == DISTRIBUTOR_SEND) {
61 else if (sendType == DISTRIBUTOR_SSEND) {
65 TEUCHOS_TEST_FOR_EXCEPTION(
true, std::invalid_argument,
"Invalid " 66 "EDistributorSendType enum value " << sendType <<
".");
74 case Details::DISTRIBUTOR_NOT_INITIALIZED:
75 return "Not initialized yet";
76 case Details::DISTRIBUTOR_INITIALIZED_BY_CREATE_FROM_SENDS:
77 return "By createFromSends";
78 case Details::DISTRIBUTOR_INITIALIZED_BY_CREATE_FROM_RECVS:
79 return "By createFromRecvs";
80 case Details::DISTRIBUTOR_INITIALIZED_BY_REVERSE:
81 return "By createReverseDistributor";
82 case Details::DISTRIBUTOR_INITIALIZED_BY_COPY:
83 return "By copy constructor";
93 Array<std::string> sendTypes;
94 sendTypes.push_back (
"Isend");
95 sendTypes.push_back (
"Rsend");
96 sendTypes.push_back (
"Send");
97 sendTypes.push_back (
"Ssend");
107 const bool tpetraDistributorDebugDefault =
false;
109 const bool barrierBetween_default =
false;
111 const bool useDistinctTags_default =
true;
113 #ifdef TPETRA_ENABLE_MPI_CUDA_RDMA 114 const bool enable_cuda_rdma_default =
true;
116 const bool enable_cuda_rdma_default =
false;
120 int Distributor::getTag (
const int pathTag)
const {
121 return useDistinctTags_ ? pathTag : comm_->getTag ();
125 #ifdef TPETRA_DISTRIBUTOR_TIMERS 126 void Distributor::makeTimers () {
127 const std::string name_doPosts3 =
"Tpetra::Distributor: doPosts(3)";
128 const std::string name_doPosts4 =
"Tpetra::Distributor: doPosts(4)";
129 const std::string name_doWaits =
"Tpetra::Distributor: doWaits";
130 const std::string name_doPosts3_recvs =
"Tpetra::Distributor: doPosts(3): recvs";
131 const std::string name_doPosts4_recvs =
"Tpetra::Distributor: doPosts(4): recvs";
132 const std::string name_doPosts3_barrier =
"Tpetra::Distributor: doPosts(3): barrier";
133 const std::string name_doPosts4_barrier =
"Tpetra::Distributor: doPosts(4): barrier";
134 const std::string name_doPosts3_sends =
"Tpetra::Distributor: doPosts(3): sends";
135 const std::string name_doPosts4_sends =
"Tpetra::Distributor: doPosts(4): sends";
137 timer_doPosts3_ = Teuchos::TimeMonitor::getNewTimer (name_doPosts3);
138 timer_doPosts4_ = Teuchos::TimeMonitor::getNewTimer (name_doPosts4);
139 timer_doWaits_ = Teuchos::TimeMonitor::getNewTimer (name_doWaits);
140 timer_doPosts3_recvs_ = Teuchos::TimeMonitor::getNewTimer (name_doPosts3_recvs);
141 timer_doPosts4_recvs_ = Teuchos::TimeMonitor::getNewTimer (name_doPosts4_recvs);
142 timer_doPosts3_barrier_ = Teuchos::TimeMonitor::getNewTimer (name_doPosts3_barrier);
143 timer_doPosts4_barrier_ = Teuchos::TimeMonitor::getNewTimer (name_doPosts4_barrier);
144 timer_doPosts3_sends_ = Teuchos::TimeMonitor::getNewTimer (name_doPosts3_sends);
145 timer_doPosts4_sends_ = Teuchos::TimeMonitor::getNewTimer (name_doPosts4_sends);
147 #endif // TPETRA_DISTRIBUTOR_TIMERS 150 Distributor::init (
const Teuchos::RCP<
const Teuchos::Comm<int> >& comm,
151 const Teuchos::RCP<Teuchos::ParameterList>& plist)
153 this->setVerbLevel (debug_ ? Teuchos::VERB_EXTREME : Teuchos::VERB_NONE);
154 this->setOStream (out_);
155 if (! plist.is_null ()) {
158 this->setParameterList (plist);
161 #ifdef TPETRA_DISTRIBUTOR_TIMERS 163 #endif // TPETRA_DISTRIBUTOR_TIMERS 166 Teuchos::OSTab tab (out_);
167 std::ostringstream os;
168 os << comm_->getRank ()
169 <<
": Distributor ctor done" << std::endl;
176 , out_ (
Teuchos::getFancyOStream (
Teuchos::rcpFromRef (std::cerr)))
177 , howInitialized_ (
Details::DISTRIBUTOR_NOT_INITIALIZED)
178 , sendType_ (
Details::DISTRIBUTOR_SEND)
179 , barrierBetween_ (barrierBetween_default)
180 , debug_ (tpetraDistributorDebugDefault)
181 , enable_cuda_rdma_ (enable_cuda_rdma_default)
183 , selfMessage_ (false)
187 , totalReceiveLength_ (0)
188 , lastRoundBytesSend_ (0)
189 , lastRoundBytesRecv_ (0)
190 , useDistinctTags_ (useDistinctTags_default)
192 init (comm, Teuchos::null);
196 const Teuchos::RCP<Teuchos::FancyOStream>& out)
198 , out_ (out.is_null () ?
Teuchos::getFancyOStream (
Teuchos::rcpFromRef (std::cerr)) : out)
199 , howInitialized_ (
Details::DISTRIBUTOR_NOT_INITIALIZED)
200 , sendType_ (
Details::DISTRIBUTOR_SEND)
201 , barrierBetween_ (barrierBetween_default)
202 , debug_ (tpetraDistributorDebugDefault)
203 , enable_cuda_rdma_ (enable_cuda_rdma_default)
205 , selfMessage_ (false)
209 , totalReceiveLength_ (0)
210 , lastRoundBytesSend_ (0)
211 , lastRoundBytesRecv_ (0)
212 , useDistinctTags_ (useDistinctTags_default)
214 init (comm, Teuchos::null);
218 const Teuchos::RCP<Teuchos::ParameterList>& plist)
220 , out_ (
Teuchos::getFancyOStream (
Teuchos::rcpFromRef (std::cerr)))
221 , howInitialized_ (
Details::DISTRIBUTOR_NOT_INITIALIZED)
222 , sendType_ (
Details::DISTRIBUTOR_SEND)
223 , barrierBetween_ (barrierBetween_default)
224 , debug_ (tpetraDistributorDebugDefault)
225 , enable_cuda_rdma_ (enable_cuda_rdma_default)
227 , selfMessage_ (false)
231 , totalReceiveLength_ (0)
232 , lastRoundBytesSend_ (0)
233 , lastRoundBytesRecv_ (0)
234 , useDistinctTags_ (useDistinctTags_default)
240 const Teuchos::RCP<Teuchos::FancyOStream>& out,
241 const Teuchos::RCP<Teuchos::ParameterList>& plist)
243 , out_ (out.is_null () ?
Teuchos::getFancyOStream (
Teuchos::rcpFromRef (std::cerr)) : out)
244 , howInitialized_ (
Details::DISTRIBUTOR_NOT_INITIALIZED)
245 , sendType_ (
Details::DISTRIBUTOR_SEND)
246 , barrierBetween_ (barrierBetween_default)
247 , debug_ (tpetraDistributorDebugDefault)
248 , enable_cuda_rdma_ (enable_cuda_rdma_default)
250 , selfMessage_ (false)
254 , totalReceiveLength_ (0)
255 , lastRoundBytesSend_ (0)
256 , lastRoundBytesRecv_ (0)
257 , useDistinctTags_ (useDistinctTags_default)
263 : comm_ (distributor.comm_)
264 , out_ (distributor.out_)
265 , howInitialized_ (
Details::DISTRIBUTOR_INITIALIZED_BY_COPY)
266 , sendType_ (distributor.sendType_)
267 , barrierBetween_ (distributor.barrierBetween_)
268 , debug_ (distributor.debug_)
269 , enable_cuda_rdma_ (distributor.enable_cuda_rdma_)
270 , numExports_ (distributor.numExports_)
271 , selfMessage_ (distributor.selfMessage_)
272 , numSends_ (distributor.numSends_)
273 , imagesTo_ (distributor.imagesTo_)
274 , startsTo_ (distributor.startsTo_)
275 , lengthsTo_ (distributor.lengthsTo_)
276 , maxSendLength_ (distributor.maxSendLength_)
277 , indicesTo_ (distributor.indicesTo_)
278 , numReceives_ (distributor.numReceives_)
279 , totalReceiveLength_ (distributor.totalReceiveLength_)
280 , lengthsFrom_ (distributor.lengthsFrom_)
281 , imagesFrom_ (distributor.imagesFrom_)
282 , startsFrom_ (distributor.startsFrom_)
283 , indicesFrom_ (distributor.indicesFrom_)
284 , reverseDistributor_ (distributor.reverseDistributor_)
285 , lastRoundBytesSend_ (distributor.lastRoundBytesSend_)
286 , lastRoundBytesRecv_ (distributor.lastRoundBytesRecv_)
287 , useDistinctTags_ (distributor.useDistinctTags_)
289 using Teuchos::ParameterList;
290 using Teuchos::parameterList;
294 this->setVerbLevel (distributor.getVerbLevel ());
295 this->setOStream (out_);
305 RCP<const ParameterList> rhsList = distributor.getParameterList ();
306 if (! rhsList.is_null ()) {
307 this->setMyParamList (parameterList (* rhsList));
310 #ifdef TPETRA_DISTRIBUTOR_TIMERS 312 #endif // TPETRA_DISTRIBUTOR_TIMERS 315 Teuchos::OSTab tab (out_);
316 std::ostringstream os;
317 os << comm_->getRank ()
318 <<
": Distributor copy ctor done" << std::endl;
324 using Teuchos::ParameterList;
325 using Teuchos::parameterList;
328 std::swap (comm_, rhs.comm_);
329 std::swap (out_, rhs.out_);
330 std::swap (howInitialized_, rhs.howInitialized_);
331 std::swap (sendType_, rhs.sendType_);
332 std::swap (barrierBetween_, rhs.barrierBetween_);
333 std::swap (debug_, rhs.debug_);
334 std::swap (enable_cuda_rdma_, rhs.enable_cuda_rdma_);
335 std::swap (numExports_, rhs.numExports_);
336 std::swap (selfMessage_, rhs.selfMessage_);
337 std::swap (numSends_, rhs.numSends_);
338 std::swap (imagesTo_, rhs.imagesTo_);
339 std::swap (startsTo_, rhs.startsTo_);
340 std::swap (lengthsTo_, rhs.lengthsTo_);
341 std::swap (maxSendLength_, rhs.maxSendLength_);
342 std::swap (indicesTo_, rhs.indicesTo_);
343 std::swap (numReceives_, rhs.numReceives_);
344 std::swap (totalReceiveLength_, rhs.totalReceiveLength_);
345 std::swap (lengthsFrom_, rhs.lengthsFrom_);
346 std::swap (imagesFrom_, rhs.imagesFrom_);
347 std::swap (startsFrom_, rhs.startsFrom_);
348 std::swap (indicesFrom_, rhs.indicesFrom_);
349 std::swap (reverseDistributor_, rhs.reverseDistributor_);
350 std::swap (lastRoundBytesSend_, rhs.lastRoundBytesSend_);
351 std::swap (lastRoundBytesRecv_, rhs.lastRoundBytesRecv_);
352 std::swap (useDistinctTags_, rhs.useDistinctTags_);
355 const Teuchos::EVerbosityLevel lhsVerb = this->getVerbLevel ();
356 const Teuchos::EVerbosityLevel rhsVerb = rhs.getVerbLevel ();
357 this->setVerbLevel (rhsVerb);
358 rhs.setVerbLevel (lhsVerb);
362 this->setOStream (out_);
363 rhs.setOStream (rhs.out_);
367 RCP<ParameterList> lhsList = this->getNonconstParameterList ();
368 RCP<ParameterList> rhsList = rhs.getNonconstParameterList ();
369 if (lhsList.getRawPtr () == rhsList.getRawPtr () && ! rhsList.is_null ()) {
370 rhsList = parameterList (*rhsList);
372 if (! rhsList.is_null ()) {
373 this->setMyParamList (rhsList);
375 if (! lhsList.is_null ()) {
376 rhs.setMyParamList (lhsList);
387 TEUCHOS_TEST_FOR_EXCEPTION(requests_.size() != 0, std::runtime_error,
388 "Tpetra::Distributor: Destructor called with " << requests_.size()
389 <<
" outstanding posts (unfulfilled communication requests). There " 390 "should be none at this point. Please report this bug to the Tpetra " 397 using Teuchos::FancyOStream;
398 using Teuchos::getIntegralValue;
399 using Teuchos::includesVerbLevel;
400 using Teuchos::OSTab;
401 using Teuchos::ParameterList;
402 using Teuchos::parameterList;
407 plist->validateParametersAndSetDefaults (*validParams);
409 const bool barrierBetween =
410 plist->get<
bool> (
"Barrier between receives and sends");
412 getIntegralValue<Details::EDistributorSendType> (*plist,
"Send type");
413 const bool useDistinctTags = plist->get<
bool> (
"Use distinct tags");
414 const bool debug = plist->get<
bool> (
"Debug");
415 const bool enable_cuda_rdma = plist->get<
bool> (
"Enable MPI CUDA RDMA support");
421 TEUCHOS_TEST_FOR_EXCEPTION(
422 ! barrierBetween && sendType == Details::DISTRIBUTOR_RSEND,
423 std::invalid_argument,
"Tpetra::Distributor::setParameterList: " << endl
424 <<
"You specified \"Send type\"=\"Rsend\", but turned off the barrier " 425 "between receives and sends." << endl <<
"This is invalid; you must " 426 "include the barrier if you use ready sends." << endl <<
"Ready sends " 427 "require that their corresponding receives have already been posted, " 428 "and the only way to guarantee that in general is with a barrier.");
430 if (plist->isSublist (
"VerboseObject")) {
434 Teuchos::readVerboseObjectSublist (&*plist,
this);
438 sendType_ = sendType;
439 barrierBetween_ = barrierBetween;
440 useDistinctTags_ = useDistinctTags;
442 enable_cuda_rdma_ = enable_cuda_rdma;
446 this->setMyParamList (plist);
449 Teuchos::RCP<const Teuchos::ParameterList>
452 using Teuchos::Array;
453 using Teuchos::ParameterList;
454 using Teuchos::parameterList;
456 using Teuchos::setStringToIntegralParameter;
458 const bool barrierBetween = barrierBetween_default;
459 const bool useDistinctTags = useDistinctTags_default;
460 const bool debug = tpetraDistributorDebugDefault;
461 const bool enable_cuda_rdma = enable_cuda_rdma_default;
464 const std::string defaultSendType (
"Send");
465 Array<Details::EDistributorSendType> sendTypeEnums;
466 sendTypeEnums.push_back (Details::DISTRIBUTOR_ISEND);
467 sendTypeEnums.push_back (Details::DISTRIBUTOR_RSEND);
468 sendTypeEnums.push_back (Details::DISTRIBUTOR_SEND);
469 sendTypeEnums.push_back (Details::DISTRIBUTOR_SSEND);
471 RCP<ParameterList> plist = parameterList (
"Tpetra::Distributor");
472 plist->set (
"Barrier between receives and sends", barrierBetween,
473 "Whether to execute a barrier between receives and sends in do" 474 "[Reverse]Posts(). Required for correctness when \"Send type\"" 475 "=\"Rsend\", otherwise correct but not recommended.");
476 setStringToIntegralParameter<Details::EDistributorSendType> (
"Send type",
477 defaultSendType,
"When using MPI, the variant of send to use in " 478 "do[Reverse]Posts()", sendTypes(), sendTypeEnums(), plist.getRawPtr());
479 plist->set (
"Use distinct tags", useDistinctTags,
"Whether to use distinct " 480 "MPI message tags for different code paths.");
481 plist->set (
"Debug", debug,
"Whether to print copious debugging output on " 483 plist->set (
"Enable MPI CUDA RDMA support", enable_cuda_rdma,
484 "Whether to enable RDMA support for MPI communication between " 485 "CUDA GPUs. Only enable this if you know for sure your MPI " 486 "library supports it.");
488 Teuchos::setupVerboseObjectSublist (&*plist);
489 return Teuchos::rcp_const_cast<
const ParameterList> (plist);
494 {
return totalReceiveLength_; }
497 {
return numReceives_; }
500 {
return selfMessage_; }
503 {
return numSends_; }
506 {
return maxSendLength_; }
509 {
return imagesFrom_; }
512 {
return lengthsFrom_; }
515 {
return imagesTo_; }
518 {
return lengthsTo_; }
520 Teuchos::RCP<Distributor>
522 if (reverseDistributor_.is_null ()) {
523 createReverseDistributor ();
525 return reverseDistributor_;
530 Distributor::createReverseDistributor()
const 532 reverseDistributor_ = Teuchos::rcp (
new Distributor (comm_));
537 size_t totalSendLength =
538 std::accumulate (lengthsTo_.begin(), lengthsTo_.end(), 0);
543 size_t maxReceiveLength = 0;
544 const int myImageID = comm_->getRank();
545 for (
size_t i=0; i < numReceives_; ++i) {
546 if (imagesFrom_[i] != myImageID) {
548 if (lengthsFrom_[i] > maxReceiveLength) {
549 maxReceiveLength = lengthsFrom_[i];
557 reverseDistributor_->lengthsTo_ = lengthsFrom_;
558 reverseDistributor_->imagesTo_ = imagesFrom_;
559 reverseDistributor_->indicesTo_ = indicesFrom_;
560 reverseDistributor_->startsTo_ = startsFrom_;
561 reverseDistributor_->lengthsFrom_ = lengthsTo_;
562 reverseDistributor_->imagesFrom_ = imagesTo_;
563 reverseDistributor_->indicesFrom_ = indicesTo_;
564 reverseDistributor_->startsFrom_ = startsTo_;
565 reverseDistributor_->numSends_ = numReceives_;
566 reverseDistributor_->numReceives_ = numSends_;
567 reverseDistributor_->selfMessage_ = selfMessage_;
568 reverseDistributor_->maxSendLength_ = maxReceiveLength;
569 reverseDistributor_->totalReceiveLength_ = totalSendLength;
570 reverseDistributor_->howInitialized_ = Details::DISTRIBUTOR_INITIALIZED_BY_REVERSE;
580 using Teuchos::Array;
581 using Teuchos::CommRequest;
582 using Teuchos::FancyOStream;
583 using Teuchos::includesVerbLevel;
584 using Teuchos::is_null;
585 using Teuchos::OSTab;
587 using Teuchos::waitAll;
590 Teuchos::OSTab tab (out_);
592 #ifdef TPETRA_DISTRIBUTOR_TIMERS 593 Teuchos::TimeMonitor timeMon (*timer_doWaits_);
594 #endif // TPETRA_DISTRIBUTOR_TIMERS 596 const int myRank = comm_->getRank ();
599 std::ostringstream os;
600 os << myRank <<
": doWaits: # reqs = " 601 << requests_.size () << endl;
605 if (requests_.size() > 0) {
606 waitAll (*comm_, requests_());
608 #ifdef HAVE_TEUCHOS_DEBUG 610 for (Array<RCP<CommRequest<int> > >::const_iterator it = requests_.begin();
611 it != requests_.end(); ++it)
613 TEUCHOS_TEST_FOR_EXCEPTION( ! is_null (*it), std::runtime_error,
614 Teuchos::typeName(*
this) <<
"::doWaits(): Communication requests " 615 "should all be null aftr calling Teuchos::waitAll() on them, but " 616 "at least one request is not null.");
618 #endif // HAVE_TEUCHOS_DEBUG 621 requests_.resize (0);
624 #ifdef HAVE_TEUCHOS_DEBUG 626 const int localSizeNonzero = (requests_.size () != 0) ? 1 : 0;
627 int globalSizeNonzero = 0;
628 Teuchos::reduceAll<int, int> (*comm_, Teuchos::REDUCE_MAX,
630 Teuchos::outArg (globalSizeNonzero));
631 TEUCHOS_TEST_FOR_EXCEPTION(
632 globalSizeNonzero != 0, std::runtime_error,
633 "Tpetra::Distributor::doWaits: After waitAll, at least one process has " 634 "a nonzero number of outstanding posts. There should be none at this " 635 "point. Please report this bug to the Tpetra developers.");
637 #endif // HAVE_TEUCHOS_DEBUG 640 std::ostringstream os;
641 os << myRank <<
": doWaits done" << endl;
648 if (! reverseDistributor_.is_null()) {
649 reverseDistributor_->doWaits();
654 std::ostringstream out;
656 out <<
"\"Tpetra::Distributor\": {";
657 const std::string label = this->getObjectLabel ();
659 out <<
"Label: " << label <<
", ";
661 out <<
"How initialized: " 665 << DistributorSendTypeEnumToString (sendType_)
666 <<
", Barrier between receives and sends: " 667 << (barrierBetween_ ?
"true" :
"false")
668 <<
", Use distinct tags: " 669 << (useDistinctTags_ ?
"true" :
"false")
670 <<
", Debug: " << (debug_ ?
"true" :
"false")
671 <<
", Enable MPI CUDA RDMA support: " 672 << (enable_cuda_rdma_ ?
"true" :
"false")
679 const Teuchos::EVerbosityLevel verbLevel)
const 683 using Teuchos::VERB_DEFAULT;
684 using Teuchos::VERB_NONE;
685 using Teuchos::VERB_LOW;
686 using Teuchos::VERB_MEDIUM;
687 using Teuchos::VERB_HIGH;
688 using Teuchos::VERB_EXTREME;
689 Teuchos::EVerbosityLevel vl = verbLevel;
690 if (vl == VERB_DEFAULT) vl = VERB_LOW;
691 const int myImageID = comm_->getRank();
692 const int numImages = comm_->getSize();
693 Teuchos::OSTab tab (out);
695 if (vl == VERB_NONE) {
698 if (myImageID == 0) {
702 out <<
"\"Tpetra::Distributor\":" << endl;
703 Teuchos::OSTab tab2 (out);
704 const std::string label = this->getObjectLabel ();
706 out <<
"Label: " << label << endl;
708 out <<
"How initialized: " 710 << endl <<
"Parameters: " << endl;
712 Teuchos::OSTab tab3 (out);
713 out <<
"\"Send type\": " 714 << DistributorSendTypeEnumToString (sendType_) << endl
715 <<
"\"Barrier between receives and sends\": " 716 << (barrierBetween_ ?
"true" :
"false") << endl;
717 out <<
"\"Use distinct tags\": " 718 << (useDistinctTags_ ?
"true" :
"false") << endl;
719 out <<
"\"Debug\": " << (debug_ ?
"true" :
"false") << endl;
720 out <<
"\"Enable MPI CUDA RDMA support\": " <<
721 (enable_cuda_rdma_ ?
"true" :
"false") << endl;
724 if (vl == VERB_LOW) {
727 Teuchos::OSTab tab2 (out);
732 for (
int imageCtr = 0; imageCtr < numImages; ++imageCtr) {
733 if (myImageID == imageCtr) {
734 if (myImageID == 0) {
735 out <<
"Number of processes: " << numImages << endl;
737 out <<
"Process: " << myImageID << endl;
738 Teuchos::OSTab tab3 (out);
741 if (vl == VERB_HIGH || vl == VERB_EXTREME) {
742 out <<
"imagesTo: " << toString (imagesTo_) << endl;
743 out <<
"lengthsTo: " << toString (lengthsTo_) << endl;
746 if (vl == VERB_EXTREME) {
747 out <<
"startsTo: " << toString (startsTo_) << endl;
748 out <<
"indicesTo: " << toString (indicesTo_) << endl;
750 if (vl == VERB_HIGH || vl == VERB_EXTREME) {
753 out <<
"lengthsFrom: " << toString (lengthsFrom_) << endl;
754 out <<
"startsFrom: " << toString (startsFrom_) << endl;
755 out <<
"imagesFrom: " << toString (imagesFrom_) << endl;
772 Distributor::computeReceives ()
774 using Teuchos::Array;
776 using Teuchos::CommStatus;
777 using Teuchos::CommRequest;
778 using Teuchos::ireceive;
781 using Teuchos::REDUCE_SUM;
782 using Teuchos::receive;
783 using Teuchos::reduce;
784 using Teuchos::scatter;
786 using Teuchos::waitAll;
789 Teuchos::OSTab tab (out_);
790 const int myRank = comm_->getRank();
791 const int numProcs = comm_->getSize();
794 const int pathTag = 2;
795 const int tag = this->getTag (pathTag);
798 std::ostringstream os;
799 os << myRank <<
": computeReceives: " 800 "{selfMessage_: " << (selfMessage_ ?
"true" :
"false")
801 <<
", tag: " << tag <<
"}" << endl;
811 Array<int> toNodesFromMe (numProcs, 0);
812 #ifdef HAVE_TEUCHOS_DEBUG 813 bool counting_error =
false;
814 #endif // HAVE_TEUCHOS_DEBUG 815 for (
size_t i = 0; i < (numSends_ + (selfMessage_ ? 1 : 0)); ++i) {
816 #ifdef HAVE_TEUCHOS_DEBUG 817 if (toNodesFromMe[imagesTo_[i]] != 0) {
818 counting_error =
true;
820 #endif // HAVE_TEUCHOS_DEBUG 821 toNodesFromMe[imagesTo_[i]] = 1;
823 #ifdef HAVE_TEUCHOS_DEBUG 825 "Tpetra::Distributor::computeReceives: There was an error on at least " 826 "one process in counting the number of messages send by that process to " 827 "the other processs. Please report this bug to the Tpetra developers.",
829 #endif // HAVE_TEUCHOS_DEBUG 832 std::ostringstream os;
833 os << myRank <<
": computeReceives: Calling reduce and scatter" << endl;
890 Array<int> numRecvsOnEachProc;
891 if (myRank == root) {
892 numRecvsOnEachProc.resize (numProcs);
894 int numReceivesAsInt = 0;
895 reduce<int, int> (toNodesFromMe.getRawPtr (),
896 numRecvsOnEachProc.getRawPtr (),
897 numProcs, REDUCE_SUM, root, *comm_);
898 scatter<int, int> (numRecvsOnEachProc.getRawPtr (), 1,
899 &numReceivesAsInt, 1, root, *comm_);
900 numReceives_ =
static_cast<size_t> (numReceivesAsInt);
906 lengthsFrom_.assign (numReceives_, 0);
907 imagesFrom_.assign (numReceives_, 0);
923 const size_t actualNumReceives = numReceives_ - (selfMessage_ ? 1 : 0);
929 Array<RCP<CommRequest<int> > > requests (actualNumReceives);
930 Array<ArrayRCP<size_t> > lengthsFromBuffers (actualNumReceives);
931 Array<RCP<CommStatus<int> > > statuses (actualNumReceives);
936 const int anySourceProc = MPI_ANY_SOURCE;
938 const int anySourceProc = -1;
942 std::ostringstream os;
943 os << myRank <<
": computeReceives: Posting " 944 << actualNumReceives <<
" irecvs" << endl;
949 for (
size_t i = 0; i < actualNumReceives; ++i) {
954 lengthsFromBuffers[i].resize (1);
955 lengthsFromBuffers[i][0] = as<size_t> (0);
956 requests[i] = ireceive<int, size_t> (lengthsFromBuffers[i], anySourceProc, tag, *comm_);
958 std::ostringstream os;
959 os << myRank <<
": computeReceives: " 960 "Posted any-proc irecv w/ specified tag " << tag << endl;
966 std::ostringstream os;
967 os << myRank <<
": computeReceives: " 968 "posting " << numSends_ <<
" sends" << endl;
979 for (
size_t i = 0; i < numSends_ + (selfMessage_ ? 1 : 0); ++i) {
980 if (imagesTo_[i] != myRank) {
984 const size_t*
const lengthsTo_i = &lengthsTo_[i];
985 send<int, size_t> (lengthsTo_i, 1, as<int> (imagesTo_[i]), tag, *comm_);
987 std::ostringstream os;
988 os << myRank <<
": computeReceives: " 989 "Posted send to Proc " << imagesTo_[i] <<
" w/ specified tag " 1001 lengthsFrom_[numReceives_-1] = lengthsTo_[i];
1002 imagesFrom_[numReceives_-1] = myRank;
1007 std::ostringstream os;
1008 os << myRank <<
": computeReceives: waitAll on " 1009 << requests.size () <<
" requests" << endl;
1018 waitAll (*comm_, requests (), statuses ());
1019 for (
size_t i = 0; i < actualNumReceives; ++i) {
1020 lengthsFrom_[i] = *lengthsFromBuffers[i];
1021 imagesFrom_[i] = statuses[i]->getSourceRank ();
1027 sort2 (imagesFrom_.begin(), imagesFrom_.end(), lengthsFrom_.begin());
1030 totalReceiveLength_ = std::accumulate (lengthsFrom_.begin(), lengthsFrom_.end(), 0);
1031 indicesFrom_.clear ();
1032 indicesFrom_.reserve (totalReceiveLength_);
1033 for (
size_t i = 0; i < totalReceiveLength_; ++i) {
1034 indicesFrom_.push_back(i);
1037 startsFrom_.clear ();
1038 startsFrom_.reserve (numReceives_);
1039 for (
size_t i = 0, j = 0; i < numReceives_; ++i) {
1040 startsFrom_.push_back(j);
1041 j += lengthsFrom_[i];
1049 std::ostringstream os;
1050 os << myRank <<
": computeReceives: done" << endl;
1058 using Teuchos::outArg;
1059 using Teuchos::REDUCE_MAX;
1060 using Teuchos::reduceAll;
1063 Teuchos::OSTab tab (out_);
1065 numExports_ = exportNodeIDs.size();
1067 const int myImageID = comm_->getRank();
1068 const int numImages = comm_->getSize();
1070 std::ostringstream os;
1071 os << myImageID <<
": createFromSends" << endl;
1123 Teuchos::Array<size_t> starts (numImages + 1, 0);
1126 size_t numActive = 0;
1127 int needSendBuff = 0;
1129 #ifdef HAVE_TPETRA_DEBUG 1131 #endif // HAVE_TPETRA_DEBUG 1132 for (
size_t i = 0; i < numExports_; ++i) {
1133 const int exportID = exportNodeIDs[i];
1134 if (exportID >= numImages) {
1135 #ifdef HAVE_TPETRA_DEBUG 1137 #endif // HAVE_TPETRA_DEBUG 1140 else if (exportID >= 0) {
1154 if (needSendBuff==0 && starts[exportID] > 1 && exportID != exportNodeIDs[i-1]) {
1161 #ifdef HAVE_TPETRA_DEBUG 1168 reduceAll<int, int> (*comm_, REDUCE_MAX, badID, outArg (gbl_badID));
1169 TEUCHOS_TEST_FOR_EXCEPTION(gbl_badID >= 0, std::runtime_error,
1170 Teuchos::typeName(*
this) <<
"::createFromSends(): Process " << gbl_badID
1171 <<
", perhaps among other processes, got a bad send process ID.");
1186 #endif // HAVE_TPETRA_DEBUG 1188 #if defined(HAVE_TPETRA_THROW_EFFICIENCY_WARNINGS) || defined(HAVE_TPETRA_PRINT_EFFICIENCY_WARNINGS) 1190 int global_needSendBuff;
1191 reduceAll<int, int> (*comm_, REDUCE_MAX, needSendBuff,
1192 outArg (global_needSendBuff));
1194 global_needSendBuff != 0, std::runtime_error,
1195 "::createFromSends: Grouping export IDs together by process rank often " 1196 "improves performance.");
1202 if (starts[myImageID] != 0) {
1203 selfMessage_ =
true;
1206 selfMessage_ =
false;
1209 #ifdef HAVE_TEUCHOS_DEBUG 1210 bool index_neq_numActive =
false;
1211 bool send_neq_numSends =
false;
1213 if (! needSendBuff) {
1218 for (
int i = 0; i < numImages; ++i) {
1226 indicesTo_.resize(0);
1229 imagesTo_.assign(numSends_,0);
1230 startsTo_.assign(numSends_,0);
1231 lengthsTo_.assign(numSends_,0);
1238 size_t index = 0, nodeIndex = 0;
1239 for (
size_t i = 0; i < numSends_; ++i) {
1240 while (exportNodeIDs[nodeIndex] < 0) {
1243 startsTo_[i] = nodeIndex;
1244 int imageID = exportNodeIDs[nodeIndex];
1245 imagesTo_[i] = imageID;
1246 index += starts[imageID];
1247 nodeIndex += starts[imageID];
1249 #ifdef HAVE_TEUCHOS_DEBUG 1250 if (index != numActive) {
1251 index_neq_numActive =
true;
1257 if (numSends_ > 0) {
1258 sort2(imagesTo_.begin(), imagesTo_.end(), startsTo_.begin());
1262 for (
size_t i = 0; i < numSends_; ++i) {
1263 int imageID = imagesTo_[i];
1264 lengthsTo_[i] = starts[imageID];
1265 if ((imageID != myImageID) && (lengthsTo_[i] > maxSendLength_)) {
1266 maxSendLength_ = lengthsTo_[i];
1277 if (starts[0] == 0 ) {
1283 for (Teuchos::Array<size_t>::iterator i=starts.begin()+1,
1285 i != starts.end(); ++i)
1287 if (*i != 0) ++numSends_;
1293 for (Teuchos::Array<size_t>::reverse_iterator ip1=starts.rbegin(),
1294 i=starts.rbegin()+1;
1295 i != starts.rend(); ++i)
1304 indicesTo_.resize(numActive);
1306 for (
size_t i = 0; i < numExports_; ++i) {
1307 if (exportNodeIDs[i] >= 0) {
1309 indicesTo_[starts[exportNodeIDs[i]]] = i;
1311 ++starts[exportNodeIDs[i]];
1323 for (
int node = numImages-1; node != 0; --node) {
1324 starts[node] = starts[node-1];
1327 starts[numImages] = numActive;
1334 imagesTo_.resize(numSends_);
1335 startsTo_.resize(numSends_);
1336 lengthsTo_.resize(numSends_);
1343 for (
int node = 0; node < numImages; ++node ) {
1344 if (starts[node+1] != starts[node]) {
1345 lengthsTo_[snd] = starts[node+1] - starts[node];
1346 startsTo_[snd] = starts[node];
1348 if ((node != myImageID) && (lengthsTo_[snd] > maxSendLength_)) {
1349 maxSendLength_ = lengthsTo_[snd];
1351 imagesTo_[snd] = node;
1355 #ifdef HAVE_TEUCHOS_DEBUG 1356 if (snd != numSends_) {
1357 send_neq_numSends =
true;
1361 #ifdef HAVE_TEUCHOS_DEBUG 1363 "Tpetra::Distributor::createFromSends: logic error. Please notify the Tpetra team.",*comm_);
1365 "Tpetra::Distributor::createFromSends: logic error. Please notify the Tpetra team.",*comm_);
1368 if (selfMessage_) --numSends_;
1374 std::ostringstream os;
1375 os << myImageID <<
": createFromSends: done" << endl;
1381 howInitialized_ = Details::DISTRIBUTOR_INITIALIZED_BY_CREATE_FROM_SENDS;
1383 return totalReceiveLength_;
Namespace Tpetra contains the class and methods constituting the Tpetra library.
size_t getNumReceives() const
The number of processes from which we will receive data.
std::string description() const
A simple one-line description of this object.
ArrayView< const int > getImagesTo() const
Ranks of the processes to which this process will send values.
EDistributorHowInitialized
Enum indicating how and whether a Distributor was initialized.
Teuchos::RCP< const Teuchos::ParameterList > getValidParameters() const
List of valid Distributor parameters.
void swap(Distributor &rhs)
Swap the contents of rhs with those of *this.
std::string DistributorSendTypeEnumToString(EDistributorSendType sendType)
Convert an EDistributorSendType enum value to a string.
ArrayView< const size_t > getLengthsFrom() const
Number of values this process will receive from each process.
Implementation details of Tpetra.
bool hasSelfMessage() const
Whether the calling process will send or receive messages to itself.
Sets up and executes a communication plan for a Tpetra DistObject.
size_t getTotalReceiveLength() const
Total number of values this process will receive from other processes.
void setParameterList(const Teuchos::RCP< Teuchos::ParameterList > &plist)
Set Distributor parameters.
size_t createFromSends(const ArrayView< const int > &exportNodeIDs)
Set up Distributor using list of process ranks to which this process will send.
#define TPETRA_EFFICIENCY_WARNING(throw_exception_test, Exception, msg)
Print or throw an efficency warning.
ArrayView< const size_t > getLengthsTo() const
Number of values this process will send to each process.
virtual ~Distributor()
Destructor (virtual for memory safety).
void sort2(const IT1 &first1, const IT1 &last1, const IT2 &first2)
Sort the first array, and apply the resulting permutation to the second array.
std::string DistributorHowInitializedEnumToString(EDistributorHowInitialized how)
Convert an EDistributorHowInitialized enum value to a string.
ArrayView< const int > getImagesFrom() const
Ranks of the processes sending values to this process.
size_t getNumSends() const
The number of processes to which we will send data.
void describe(Teuchos::FancyOStream &out, const Teuchos::EVerbosityLevel verbLevel=Teuchos::Describable::verbLevel_default) const
Print the object with some verbosity level to an FancyOStream.
size_t getMaxSendLength() const
Maximum number of values this process will send to another single process.
#define SHARED_TEST_FOR_EXCEPTION(throw_exception_test, Exception, msg, comm)
Test for exception, with reduction over the given communicator.
Array< std::string > distributorSendTypes()
Valid values for Distributor's "Send type" parameter.
RCP< Distributor > getReverse() const
A reverse communication plan Distributor.
EDistributorSendType
The type of MPI send that Distributor should use.
Distributor(const Teuchos::RCP< const Teuchos::Comm< int > > &comm)
Construct using the specified communicator and default parameters.