42 #ifndef TPETRA_IMPORT_UTIL2_HPP 43 #define TPETRA_IMPORT_UTIL2_HPP 50 #include "Tpetra_ConfigDefs.hpp" 52 #include "Tpetra_Import.hpp" 53 #include "Tpetra_HashTable.hpp" 54 #include "Tpetra_Map.hpp" 56 #include "Tpetra_Distributor.hpp" 57 #include <Teuchos_Array.hpp> 69 template<
class T,
class D>
70 Kokkos::View<T*, D, Kokkos::MemoryUnmanaged>
71 getNonconstView (
const Teuchos::ArrayView<T>& x)
73 typedef Kokkos::View<T*, D, Kokkos::MemoryUnmanaged> view_type;
74 typedef typename view_type::size_type size_type;
75 const size_type numEnt =
static_cast<size_type
> (x.size ());
76 return view_type (x.getRawPtr (), numEnt);
79 template<
class T,
class D>
80 Kokkos::View<const T*, D, Kokkos::MemoryUnmanaged>
81 getConstView (
const Teuchos::ArrayView<const T>& x)
83 typedef Kokkos::View<const T*, D, Kokkos::MemoryUnmanaged> view_type;
84 typedef typename view_type::size_type size_type;
85 const size_type numEnt =
static_cast<size_type
> (x.size ());
86 return view_type (x.getRawPtr (), numEnt);
90 template<
class NodeType>
91 struct NodeToExecSpace {
92 #ifdef KOKKOS_HAVE_SERIAL 93 typedef Kokkos::Serial execution_space;
95 typedef Kokkos::HostSpace::execution_space execution_space;
96 #endif // KOKKOS_HAVE_SERIAL 100 template<
class ExecSpace>
101 struct NodeToExecSpace<
Kokkos::Compat::KokkosDeviceWrapperNode<ExecSpace> > {
102 typedef typename ExecSpace::execution_space execution_space;
107 template<
class Space>
108 struct GetHostExecSpace {
109 typedef typename Space::execution_space execution_space;
110 typedef typename Kokkos::View<int*, execution_space>::HostMirror::execution_space host_execution_space;
116 namespace Import_Util {
132 template<
typename Scalar,
133 typename LocalOrdinal,
134 typename GlobalOrdinal,
138 const Teuchos::ArrayView<const LocalOrdinal>& exportLIDs,
139 Teuchos::Array<char>& exports,
140 const Teuchos::ArrayView<size_t>& numPacketsPerLID,
141 size_t& constantNumPackets,
143 const Teuchos::ArrayView<const int>& SourcePids);
160 template<
typename Scalar,
161 typename LocalOrdinal,
162 typename GlobalOrdinal,
166 const Teuchos::ArrayView<const LocalOrdinal> &importLIDs,
167 const Teuchos::ArrayView<const char> &imports,
168 const Teuchos::ArrayView<size_t> &numPacketsPerLID,
169 size_t constantNumPackets,
173 const Teuchos::ArrayView<const LocalOrdinal>& permuteToLIDs,
174 const Teuchos::ArrayView<const LocalOrdinal>& permuteFromLIDs);
190 template<
typename Scalar,
191 typename LocalOrdinal,
192 typename GlobalOrdinal,
196 const Teuchos::ArrayView<const LocalOrdinal>& importLIDs,
197 const Teuchos::ArrayView<const char>& imports,
198 const Teuchos::ArrayView<size_t>& numPacketsPerLID,
199 size_t constantNumPackets,
203 const Teuchos::ArrayView<const LocalOrdinal>& permuteToLIDs,
204 const Teuchos::ArrayView<const LocalOrdinal>& permuteFromLIDs,
205 size_t TargetNumRows,
206 size_t TargetNumNonzeros,
208 const Teuchos::ArrayView<size_t>& rowPointers,
209 const Teuchos::ArrayView<GlobalOrdinal>& columnIndices,
210 const Teuchos::ArrayView<Scalar>& values,
211 const Teuchos::ArrayView<const int>& SourcePids,
212 Teuchos::Array<int>& TargetPids);
216 template<
typename Scalar,
typename Ordinal>
219 const Teuchos::ArrayView<Ordinal>& CRS_colind,
220 const Teuchos::ArrayView<Scalar>&CRS_vals);
226 template<
typename Scalar,
typename Ordinal>
229 const Teuchos::ArrayView<Ordinal>& CRS_colind,
230 const Teuchos::ArrayView<Scalar>& CRS_vals);
247 template <
typename LocalOrdinal,
typename GlobalOrdinal,
typename Node>
250 const Teuchos::ArrayView<LocalOrdinal> &columnIndices_LID,
251 const Teuchos::ArrayView<GlobalOrdinal> &columnIndices_GID,
253 const Teuchos::ArrayView<const int> &owningPids,
254 Teuchos::Array<int> &remotePids,
284 template<
class LO,
class GO,
class D>
286 packRowCount (
const size_t numEnt,
287 const size_t numBytesPerValue)
300 const size_t numEntLen = PackTraits<LO, D>::packValueCount (numEntLO);
301 const size_t gidsLen = numEnt * PackTraits<GO, D>::packValueCount (gid);
302 const size_t pidsLen = numEnt * PackTraits<int, D>::packValueCount (lid);
303 const size_t valsLen = numEnt * numBytesPerValue;
304 return numEntLen + gidsLen + pidsLen + valsLen;
308 template<
class LO,
class D>
312 const size_t numBytes,
313 const size_t numBytesPerValue)
315 using Kokkos::subview;
317 typedef typename PackTraits<LO, D>::input_buffer_type input_buffer_type;
318 typedef typename input_buffer_type::size_type size_type;
322 return static_cast<size_t> (0);
326 const size_t theNumBytes = PackTraits<LO, D>::packValueCount (numEntLO);
327 #ifdef HAVE_TPETRA_DEBUG 328 TEUCHOS_TEST_FOR_EXCEPTION(
329 theNumBytes > numBytes, std::logic_error,
"unpackRowCount: " 330 "theNumBytes = " << theNumBytes <<
" < numBytes = " << numBytes
332 #endif // HAVE_TPETRA_DEBUG 333 const std::pair<size_type, size_type> rng (offset, offset + theNumBytes);
334 input_buffer_type inBuf = subview (imports, rng);
335 const size_t actualNumBytes = PackTraits<LO, D>::unpackValue (numEntLO, inBuf);
336 (void)actualNumBytes;
337 #ifdef HAVE_TPETRA_DEBUG 338 TEUCHOS_TEST_FOR_EXCEPTION(
339 theNumBytes > numBytes, std::logic_error,
"unpackRowCount: " 340 "actualNumBytes = " << actualNumBytes <<
" < numBytes = " << numBytes
342 #endif // HAVE_TPETRA_DEBUG 343 return static_cast<size_t> (numEntLO);
348 template<
class ST,
class LO,
class GO,
class D>
356 const size_t numBytesPerValue)
358 using Kokkos::subview;
365 typedef typename PackTraits<LO, D>::output_buffer_type output_buffer_type;
366 typedef typename output_buffer_type::size_type size_type;
367 typedef typename std::pair<size_type, size_type> pair_type;
375 const LO numEntLO =
static_cast<size_t> (numEnt);
378 const size_t numEntBeg = offset;
379 const size_t numEntLen = PackTraits<LO, D>::packValueCount (numEntLO);
380 const size_t gidsBeg = numEntBeg + numEntLen;
381 const size_t gidsLen = numEnt * PackTraits<GO, D>::packValueCount (gid);
382 const size_t pidsBeg = gidsBeg + gidsLen;
383 const size_t pidsLen = numEnt * PackTraits<int, D>::packValueCount (pid);
384 const size_t valsBeg = pidsBeg + pidsLen;
385 const size_t valsLen = numEnt * numBytesPerValue;
387 output_buffer_type numEntOut =
388 subview (exports, pair_type (numEntBeg, numEntBeg + numEntLen));
389 output_buffer_type gidsOut =
390 subview (exports, pair_type (gidsBeg, gidsBeg + gidsLen));
391 output_buffer_type pidsOut =
392 subview (exports, pair_type (pidsBeg, pidsBeg + pidsLen));
393 output_buffer_type valsOut =
394 subview (exports, pair_type (valsBeg, valsBeg + valsLen));
396 size_t numBytesOut = 0;
397 numBytesOut += PackTraits<LO, D>::packValue (numEntOut, numEntLO);
398 numBytesOut += PackTraits<GO, D>::packArray (gidsOut, gidsIn, numEnt);
399 numBytesOut += PackTraits<int, D>::packArray (pidsOut, pidsIn, numEnt);
400 numBytesOut += PackTraits<ST, D>::packArray (valsOut, valsIn, numEnt);
402 const size_t expectedNumBytes = numEntLen + gidsLen + pidsLen + valsLen;
403 TEUCHOS_TEST_FOR_EXCEPTION(
404 numBytesOut != expectedNumBytes, std::logic_error,
"unpackRow: " 405 "numBytesOut = " << numBytesOut <<
" != expectedNumBytes = " 406 << expectedNumBytes <<
".");
412 template<
class ST,
class LO,
class GO,
class D>
419 const size_t numBytes,
421 const size_t numBytesPerValue)
423 using Kokkos::subview;
430 typedef typename PackTraits<LO, D>::input_buffer_type input_buffer_type;
431 typedef typename input_buffer_type::size_type size_type;
432 typedef typename std::pair<size_type, size_type> pair_type;
438 TEUCHOS_TEST_FOR_EXCEPTION(
439 static_cast<size_t> (imports.dimension_0 ()) <= offset, std::logic_error,
440 "unpackRow: imports.dimension_0() = " << imports.dimension_0 () <<
441 " <= offset = " << offset <<
".");
442 TEUCHOS_TEST_FOR_EXCEPTION(
443 static_cast<size_t> (imports.dimension_0 ()) < offset + numBytes,
444 std::logic_error,
"unpackRow: imports.dimension_0() = " 445 << imports.dimension_0 () <<
" < offset + numBytes = " 446 << (offset + numBytes) <<
".");
452 const size_t numEntBeg = offset;
453 const size_t numEntLen = PackTraits<LO, D>::packValueCount (lid);
454 const size_t gidsBeg = numEntBeg + numEntLen;
455 const size_t gidsLen = numEnt * PackTraits<GO, D>::packValueCount (gid);
456 const size_t pidsBeg = gidsBeg + gidsLen;
457 const size_t pidsLen = numEnt * PackTraits<int, D>::packValueCount (pid);
458 const size_t valsBeg = pidsBeg + pidsLen;
459 const size_t valsLen = numEnt * numBytesPerValue;
461 input_buffer_type numEntIn = subview (imports, pair_type (numEntBeg, numEntBeg + numEntLen));
462 input_buffer_type gidsIn = subview (imports, pair_type (gidsBeg, gidsBeg + gidsLen));
463 input_buffer_type pidsIn = subview (imports, pair_type (pidsBeg, pidsBeg + pidsLen));
464 input_buffer_type valsIn = subview (imports, pair_type (valsBeg, valsBeg + valsLen));
466 size_t numBytesOut = 0;
468 numBytesOut += PackTraits<LO, D>::unpackValue (numEntOut, numEntIn);
469 TEUCHOS_TEST_FOR_EXCEPTION(
470 static_cast<size_t> (numEntOut) != numEnt, std::logic_error,
471 "unpackRow: Expected number of entries " << numEnt
472 <<
" != actual number of entries " << numEntOut <<
".");
474 numBytesOut += PackTraits<GO, D>::unpackArray (gidsOut, gidsIn, numEnt);
475 numBytesOut += PackTraits<int, D>::unpackArray (pidsOut, pidsIn, numEnt);
476 numBytesOut += PackTraits<ST, D>::unpackArray (valsOut, valsIn, numEnt);
478 TEUCHOS_TEST_FOR_EXCEPTION(
479 numBytesOut != numBytes, std::logic_error,
"unpackRow: numBytesOut = " 480 << numBytesOut <<
" != numBytes = " << numBytes <<
".");
481 const size_t expectedNumBytes = numEntLen + gidsLen + pidsLen + valsLen;
482 TEUCHOS_TEST_FOR_EXCEPTION(
483 numBytesOut != expectedNumBytes, std::logic_error,
"unpackRow: " 484 "numBytesOut = " << numBytesOut <<
" != expectedNumBytes = " 485 << expectedNumBytes <<
".");
493 namespace Import_Util {
495 template<
typename Scalar,
496 typename LocalOrdinal,
497 typename GlobalOrdinal,
501 const Teuchos::ArrayView<const LocalOrdinal>& exportLIDs,
502 Teuchos::Array<char>& exports,
503 const Teuchos::ArrayView<size_t>& numPacketsPerLID,
504 size_t& constantNumPackets,
506 const Teuchos::ArrayView<const int>& SourcePids)
509 using Kokkos::MemoryUnmanaged;
510 using Kokkos::subview;
512 using Teuchos::Array;
513 using Teuchos::ArrayView;
516 typedef LocalOrdinal LO;
517 typedef GlobalOrdinal GO;
519 typedef typename matrix_type::impl_scalar_type ST;
520 typedef typename NodeToExecSpace<Node>::execution_space execution_space;
521 typedef typename GetHostExecSpace<execution_space>::host_execution_space HES;
523 typedef typename ArrayView<const LO>::size_type size_type;
524 typedef std::pair<typename View<int*, HES>::size_type,
525 typename View<int*, HES>::size_type> pair_type;
526 const char prefix[] =
"Tpetra::Import_Util::packAndPrepareWithOwningPIDs: ";
533 TEUCHOS_TEST_FOR_EXCEPTION(
535 prefix <<
"SourceMatrix must be locally indexed.");
536 TEUCHOS_TEST_FOR_EXCEPTION(
537 SourceMatrix.
getColMap ().is_null (), std::logic_error,
538 prefix <<
"The source matrix claims to be locally indexed, but its column " 539 "Map is null. This should never happen. Please report this bug to the " 540 "Tpetra developers.");
541 const size_type numExportLIDs = exportLIDs.size ();
542 TEUCHOS_TEST_FOR_EXCEPTION(
543 numExportLIDs != numPacketsPerLID.size (), std::invalid_argument, prefix
544 <<
"exportLIDs.size() = " << numExportLIDs <<
"!= numPacketsPerLID.size() " 545 <<
" = " << numPacketsPerLID.size () <<
".");
546 TEUCHOS_TEST_FOR_EXCEPTION(
547 static_cast<size_t> (SourcePids.size ()) != SourceMatrix.
getColMap ()->getNodeNumElements (),
548 std::invalid_argument, prefix <<
"SourcePids.size() = " 549 << SourcePids.size ()
550 <<
"!= SourceMatrix.getColMap()->getNodeNumElements() = " 551 << SourceMatrix.
getColMap ()->getNodeNumElements () <<
".");
556 constantNumPackets = 0;
561 size_t totalNumBytes = 0;
562 size_t totalNumEntries = 0;
563 size_t maxRowLength = 0;
564 for (size_type i = 0; i < numExportLIDs; ++i) {
565 const LO lclRow = exportLIDs[i];
570 size_t numBytesPerValue = 0;
576 ArrayView<const Scalar> valsView;
577 ArrayView<const LO> lidsView;
579 const ST* valsViewRaw =
reinterpret_cast<const ST*
> (valsView.getRawPtr ());
580 View<const ST*, HES, MemoryUnmanaged> valsViewK (valsViewRaw, valsView.size ());
581 TEUCHOS_TEST_FOR_EXCEPTION(
582 static_cast<size_t> (valsViewK.dimension_0 ()) != numEnt,
583 std::logic_error, prefix <<
"Local row " << i <<
" claims to have " 584 << numEnt <<
"entry/ies, but the View returned by getLocalRowView() " 585 "has " << valsViewK.dimension_0 () <<
" entry/ies. This should never " 586 "happen. Please report this bug to the Tpetra developers.");
591 numBytesPerValue = PackTraits<ST, HES>::packValueCount (valsViewK(0));
594 const size_t numBytes =
595 packRowCount<LO, GO, HES> (numEnt, numBytesPerValue);
596 numPacketsPerLID[i] = numBytes;
597 totalNumBytes += numBytes;
598 totalNumEntries += numEnt;
599 maxRowLength = std::max (maxRowLength, numEnt);
605 if (totalNumEntries > 0) {
606 exports.resize (totalNumBytes);
607 View<char*, HES, MemoryUnmanaged> exportsK (exports.getRawPtr (),
616 const map_type& colMap = * (SourceMatrix.
getColMap ());
620 View<int*, HES> pids;
624 gids = PackTraits<GO, HES>::allocateArray (gid, maxRowLength,
"gids");
625 pids = PackTraits<int, HES>::allocateArray (pid, maxRowLength,
"pids");
628 for (size_type i = 0; i < numExportLIDs; i++) {
629 const LO lclRow = exportLIDs[i];
632 ArrayView<const Scalar> valsView;
633 ArrayView<const LO> lidsView;
635 const ST* valsViewRaw =
reinterpret_cast<const ST*
> (valsView.getRawPtr ());
636 View<const ST*, HES, MemoryUnmanaged> valsViewK (valsViewRaw, valsView.size ());
637 const size_t numEnt =
static_cast<size_t> (valsViewK.dimension_0 ());
642 const size_t numBytesPerValue = numEnt == 0 ?
643 static_cast<size_t> (0) :
644 PackTraits<ST, HES>::packValueCount (valsViewK(0));
647 View<GO*, HES> gidsView = subview (gids, pair_type (0, numEnt));
648 View<int*, HES> pidsView = subview (pids, pair_type (0, numEnt));
649 for (
size_t k = 0; k < numEnt; ++k) {
650 gidsView(k) = colMap.getGlobalElement (lidsView[k]);
651 pidsView(k) = SourcePids[lidsView[k]];
655 const size_t numBytes =
656 packRow<ST, LO, GO, HES> (exportsK, offset, numEnt,
657 gidsView, pidsView, valsViewK,
663 #ifdef HAVE_TPETRA_DEBUG 664 TEUCHOS_TEST_FOR_EXCEPTION(
665 offset != totalNumBytes, std::logic_error, prefix <<
"At end of method, " 666 "the final offset (in bytes) " << offset <<
" does not equal the total " 667 "number of bytes packed " << totalNumBytes <<
". Please report this bug " 668 "to the Tpetra developers.");
669 #endif // HAVE_TPETRA_DEBUG 673 template<
typename Scalar,
typename LocalOrdinal,
typename GlobalOrdinal,
typename Node>
676 const Teuchos::ArrayView<const LocalOrdinal> &importLIDs,
677 const Teuchos::ArrayView<const char> &imports,
678 const Teuchos::ArrayView<size_t> &numPacketsPerLID,
679 size_t constantNumPackets,
683 const Teuchos::ArrayView<const LocalOrdinal>& permuteToLIDs,
684 const Teuchos::ArrayView<const LocalOrdinal>& permuteFromLIDs)
686 using Kokkos::MemoryUnmanaged;
688 typedef LocalOrdinal LO;
689 typedef GlobalOrdinal GO;
691 typedef typename matrix_type::impl_scalar_type ST;
692 typedef typename Teuchos::ArrayView<const LO>::size_type size_type;
693 typedef typename NodeToExecSpace<Node>::execution_space execution_space;
694 typedef typename GetHostExecSpace<execution_space>::host_execution_space HES;
695 const char prefix[] =
"unpackAndCombineWithOwningPIDsCount: ";
697 TEUCHOS_TEST_FOR_EXCEPTION(
698 permuteToLIDs.size () != permuteFromLIDs.size (), std::invalid_argument,
699 prefix <<
"permuteToLIDs.size() = " << permuteToLIDs.size () <<
" != " 700 "permuteFromLIDs.size() = " << permuteFromLIDs.size() <<
".");
704 TEUCHOS_TEST_FOR_EXCEPTION(
705 ! locallyIndexed, std::invalid_argument, prefix <<
"The input CrsMatrix " 706 "'SourceMatrix' must be locally indexed.");
707 TEUCHOS_TEST_FOR_EXCEPTION(
708 importLIDs.size () != numPacketsPerLID.size (), std::invalid_argument,
709 prefix <<
"importLIDs.size() = " << importLIDs.size () <<
" != " 710 "numPacketsPerLID.size() = " << numPacketsPerLID.size () <<
".");
712 View<const char*, HES, MemoryUnmanaged> importsK (imports.getRawPtr (),
719 for (
size_t sourceLID = 0; sourceLID < numSameIDs; ++sourceLID) {
720 const LO srcLID =
static_cast<LO
> (sourceLID);
725 const size_type numPermuteToLIDs = permuteToLIDs.size ();
726 for (size_type p = 0; p < numPermuteToLIDs; ++p) {
732 const size_type numImportLIDs = importLIDs.size ();
733 for (size_type i = 0; i < numImportLIDs; ++i) {
734 const size_t numBytes = numPacketsPerLID[i];
738 const size_t numEnt = unpackRowCount<LO, HES> (importsK, offset,
739 numBytes,
sizeof (ST));
746 template<
typename Scalar,
typename LocalOrdinal,
typename GlobalOrdinal,
typename Node>
749 const Teuchos::ArrayView<const LocalOrdinal>& importLIDs,
750 const Teuchos::ArrayView<const char>& imports,
751 const Teuchos::ArrayView<size_t>& numPacketsPerLID,
752 const size_t constantNumPackets,
755 const size_t numSameIDs,
756 const Teuchos::ArrayView<const LocalOrdinal>& permuteToLIDs,
757 const Teuchos::ArrayView<const LocalOrdinal>& permuteFromLIDs,
758 size_t TargetNumRows,
759 size_t TargetNumNonzeros,
760 const int MyTargetPID,
761 const Teuchos::ArrayView<size_t>& CSR_rowptr,
762 const Teuchos::ArrayView<GlobalOrdinal>& CSR_colind,
763 const Teuchos::ArrayView<Scalar>& CSR_vals,
764 const Teuchos::ArrayView<const int>& SourcePids,
765 Teuchos::Array<int>& TargetPids)
768 using Kokkos::MemoryUnmanaged;
769 using Kokkos::subview;
771 using Teuchos::ArrayView;
773 using Teuchos::av_reinterpret_cast;
774 using Teuchos::outArg;
775 using Teuchos::REDUCE_MAX;
776 using Teuchos::reduceAll;
777 typedef LocalOrdinal LO;
778 typedef GlobalOrdinal GO;
779 typedef typename NodeToExecSpace<Node>::execution_space execution_space;
780 typedef typename GetHostExecSpace<execution_space>::host_execution_space HES;
782 typedef typename matrix_type::impl_scalar_type ST;
784 typedef typename ArrayView<const LO>::size_type size_type;
787 const char prefix[] =
"Tpetra::Import_Util::unpackAndCombineIntoCrsArrays: ";
789 const size_t N = TargetNumRows;
790 const size_t mynnz = TargetNumNonzeros;
793 const int MyPID = MyTargetPID;
795 TEUCHOS_TEST_FOR_EXCEPTION(
796 TargetNumRows + 1 != static_cast<size_t> (CSR_rowptr.size ()),
797 std::invalid_argument, prefix <<
"CSR_rowptr.size() = " <<
798 CSR_rowptr.size () <<
"!= TargetNumRows+1 = " << TargetNumRows+1 <<
".");
799 TEUCHOS_TEST_FOR_EXCEPTION(
800 permuteToLIDs.size () != permuteFromLIDs.size (), std::invalid_argument,
801 prefix <<
"permuteToLIDs.size() = " << permuteToLIDs.size ()
802 <<
"!= permuteFromLIDs.size() = " << permuteFromLIDs.size () <<
".");
803 const size_type numImportLIDs = importLIDs.size ();
804 TEUCHOS_TEST_FOR_EXCEPTION(
805 numImportLIDs != numPacketsPerLID.size (), std::invalid_argument,
806 prefix <<
"importLIDs.size() = " << numImportLIDs <<
" != " 807 "numPacketsPerLID.size() = " << numPacketsPerLID.size() <<
".");
810 View<const char*, HES, MemoryUnmanaged> importsK (imports.getRawPtr (),
813 for (
size_t i = 0; i< N+1; ++i) {
818 for (
size_t i = 0; i < numSameIDs; ++i) {
823 size_t numPermuteIDs = permuteToLIDs.size ();
824 for (
size_t i = 0; i < numPermuteIDs; ++i) {
825 CSR_rowptr[permuteToLIDs[i]] =
832 for (size_type k = 0; k < numImportLIDs; ++k) {
833 const size_t numBytes = numPacketsPerLID[k];
837 const size_t numEnt = unpackRowCount<LO, HES> (importsK, offset,
838 numBytes,
sizeof (ST));
839 CSR_rowptr[importLIDs[k]] += numEnt;
846 Teuchos::Array<size_t> NewStartRow (N + 1);
849 size_t last_len = CSR_rowptr[0];
851 for (
size_t i = 1; i < N+1; ++i) {
852 size_t new_len = CSR_rowptr[i];
853 CSR_rowptr[i] = last_len + CSR_rowptr[i-1];
854 NewStartRow[i] = CSR_rowptr[i];
858 TEUCHOS_TEST_FOR_EXCEPTION(
859 CSR_rowptr[N] != mynnz, std::invalid_argument, prefix <<
"CSR_rowptr[last]" 860 " = " << CSR_rowptr[N] <<
"!= mynnz = " << mynnz <<
".");
863 if (static_cast<size_t> (TargetPids.size ()) != mynnz) {
864 TargetPids.resize (mynnz);
866 TargetPids.assign (mynnz, -1);
869 ArrayRCP<const size_t> Source_rowptr_RCP;
870 ArrayRCP<const LO> Source_colind_RCP;
871 ArrayRCP<const Scalar> Source_vals_RCP;
872 SourceMatrix.getAllValues (Source_rowptr_RCP, Source_colind_RCP, Source_vals_RCP);
873 ArrayView<const size_t> Source_rowptr = Source_rowptr_RCP ();
874 ArrayView<const LO> Source_colind = Source_colind_RCP ();
875 ArrayView<const Scalar> Source_vals = Source_vals_RCP ();
877 const map_type& sourceColMap = * (SourceMatrix.
getColMap());
878 ArrayView<const GO> globalColElts = sourceColMap.getNodeElementList();
881 for (
size_t i = 0; i < numSameIDs; ++i) {
882 size_t FromRow = Source_rowptr[i];
883 size_t ToRow = CSR_rowptr[i];
884 NewStartRow[i] += Source_rowptr[i+1] - Source_rowptr[i];
886 for (
size_t j = Source_rowptr[i]; j < Source_rowptr[i+1]; ++j) {
887 CSR_vals[ToRow + j - FromRow] = Source_vals[j];
888 CSR_colind[ToRow + j - FromRow] = globalColElts[Source_colind[j]];
889 TargetPids[ToRow + j - FromRow] =
890 (SourcePids[Source_colind[j]] != MyPID) ?
891 SourcePids[Source_colind[j]] : -1;
895 size_t numBytesPerValue = 0;
896 if (PackTraits<ST, HES>::compileTimeSize) {
898 numBytesPerValue = PackTraits<ST, HES>::packValueCount (val);
913 if (Source_rowptr.size () == 0 || Source_rowptr[Source_rowptr.size () - 1] == 0) {
914 numBytesPerValue = PackTraits<ST, HES>::packValueCount (CSR_vals[0]);
917 numBytesPerValue = PackTraits<ST, HES>::packValueCount (Source_vals[0]);
919 size_t lclNumBytesPerValue = numBytesPerValue;
920 Teuchos::RCP<const Teuchos::Comm<int> > comm = SourceMatrix.
getComm ();
921 reduceAll<int, size_t> (*comm, REDUCE_MAX, lclNumBytesPerValue,
922 outArg (numBytesPerValue));
926 for (
size_t i = 0; i < numPermuteIDs; ++i) {
927 LO FromLID = permuteFromLIDs[i];
928 size_t FromRow = Source_rowptr[FromLID];
929 size_t ToRow = CSR_rowptr[permuteToLIDs[i]];
931 NewStartRow[permuteToLIDs[i]] += Source_rowptr[FromLID+1]-Source_rowptr[FromLID];
933 for (
size_t j = Source_rowptr[FromLID]; j < Source_rowptr[FromLID+1]; ++j) {
934 CSR_vals[ToRow + j - FromRow] = Source_vals[j];
935 CSR_colind[ToRow + j - FromRow] = globalColElts[Source_colind[j]];
936 TargetPids[ToRow + j - FromRow] =
937 (SourcePids[Source_colind[j]] != MyPID) ?
938 SourcePids[Source_colind[j]] : -1;
943 if (imports.size () > 0) {
945 #ifdef HAVE_TPETRA_DEBUG 949 for (
size_t i = 0; i < static_cast<size_t> (numImportLIDs); ++i) {
950 const size_t numBytes = numPacketsPerLID[i];
958 const size_t numEnt = unpackRowCount<LO, HES> (importsK, offset, numBytes,
960 const LO lclRow = importLIDs[i];
961 const size_t StartRow = NewStartRow[lclRow];
962 NewStartRow[lclRow] += numEnt;
964 View<GO*, HES, MemoryUnmanaged> gidsOut =
965 getNonconstView<GO, HES> (CSR_colind (StartRow, numEnt));
966 View<int*, HES, MemoryUnmanaged> pidsOut =
967 getNonconstView<int, HES> (TargetPids (StartRow, numEnt));
968 ArrayView<Scalar> valsOutS = CSR_vals (StartRow, numEnt);
969 View<ST*, HES, MemoryUnmanaged> valsOut =
970 getNonconstView<ST, HES> (av_reinterpret_cast<ST> (valsOutS));
972 const size_t numBytesOut =
973 unpackRow<ST, LO, GO, HES> (gidsOut, pidsOut, valsOut, importsK,
974 offset, numBytes, numEnt, numBytesPerValue);
975 if (numBytesOut != numBytes) {
976 #ifdef HAVE_TPETRA_DEBUG 983 for (
size_t j = 0; j < numEnt; ++j) {
984 const int pid = pidsOut[j];
985 pidsOut[j] = (pid != MyPID) ? pid : -1;
989 #ifdef HAVE_TPETRA_DEBUG 990 TEUCHOS_TEST_FOR_EXCEPTION(
991 offset != static_cast<size_t> (imports.size ()), std::logic_error, prefix
992 <<
"After unpacking and counting all the import packets, the final offset" 993 " in bytes " << offset <<
" != imports.size() = " << imports.size () <<
994 ". Please report this bug to the Tpetra developers.");
995 TEUCHOS_TEST_FOR_EXCEPTION(
996 lclErr != 0, std::logic_error, prefix <<
"numBytes != numBytesOut " 997 "somewhere in unpack loop. This should never happen. " 998 "Please report this bug to the Tpetra developers.");
999 #endif // HAVE_TPETRA_DEBUG 1004 template<
typename Scalar,
typename Ordinal>
1007 const Teuchos::ArrayView<Ordinal> & CRS_colind,
1008 const Teuchos::ArrayView<Scalar> &CRS_vals)
1013 size_t NumRows = CRS_rowptr.size()-1;
1014 size_t nnz = CRS_colind.size();
1016 for(
size_t i = 0; i < NumRows; i++){
1017 size_t start=CRS_rowptr[i];
1018 if(start >= nnz)
continue;
1020 Scalar* locValues = &CRS_vals[start];
1021 size_t NumEntries = CRS_rowptr[i+1] - start;
1022 Ordinal* locIndices = &CRS_colind[start];
1024 Ordinal n = NumEntries;
1028 Ordinal max = n - m;
1029 for(Ordinal j = 0; j < max; j++) {
1030 for(Ordinal k = j; k >= 0; k-=m) {
1031 if(locIndices[k+m] >= locIndices[k])
1033 Scalar dtemp = locValues[k+m];
1034 locValues[k+m] = locValues[k];
1035 locValues[k] = dtemp;
1036 Ordinal itemp = locIndices[k+m];
1037 locIndices[k+m] = locIndices[k];
1038 locIndices[k] = itemp;
1047 template<
typename Scalar,
typename Ordinal>
1050 const Teuchos::ArrayView<Ordinal> & CRS_colind,
1051 const Teuchos::ArrayView<Scalar> &CRS_vals)
1058 size_t NumRows = CRS_rowptr.size()-1;
1059 size_t nnz = CRS_colind.size();
1060 size_t new_curr=CRS_rowptr[0], old_curr=CRS_rowptr[0];
1062 for(
size_t i = 0; i < NumRows; i++){
1063 size_t start=CRS_rowptr[i];
1064 if(start >= nnz)
continue;
1066 Scalar* locValues = &CRS_vals[start];
1067 size_t NumEntries = CRS_rowptr[i+1] - start;
1068 Ordinal* locIndices = &CRS_colind[start];
1071 Ordinal n = NumEntries;
1075 Ordinal max = n - m;
1076 for(Ordinal j = 0; j < max; j++) {
1077 for(Ordinal k = j; k >= 0; k-=m) {
1078 if(locIndices[k+m] >= locIndices[k])
1080 Scalar dtemp = locValues[k+m];
1081 locValues[k+m] = locValues[k];
1082 locValues[k] = dtemp;
1083 Ordinal itemp = locIndices[k+m];
1084 locIndices[k+m] = locIndices[k];
1085 locIndices[k] = itemp;
1092 for(
size_t j=CRS_rowptr[i]; j < CRS_rowptr[i+1]; j++) {
1093 if(j > CRS_rowptr[i] && CRS_colind[j]==CRS_colind[new_curr-1]) {
1094 CRS_vals[new_curr-1] += CRS_vals[j];
1096 else if(new_curr==j) {
1100 CRS_colind[new_curr] = CRS_colind[j];
1101 CRS_vals[new_curr] = CRS_vals[j];
1106 CRS_rowptr[i] = old_curr;
1110 CRS_rowptr[NumRows] = new_curr;
1113 template <
typename LocalOrdinal,
typename GlobalOrdinal,
typename Node>
1116 const ArrayView<LocalOrdinal> &colind_LID,
1117 const ArrayView<GlobalOrdinal> &colind_GID,
1119 const Teuchos::ArrayView<const int> &owningPIDs,
1120 Teuchos::Array<int> &remotePIDs,
1124 typedef LocalOrdinal LO;
1125 typedef GlobalOrdinal GO;
1128 const char prefix[] =
"lowCommunicationMakeColMapAndReindex: ";
1132 const map_type& domainMap = *domainMapRCP;
1138 Teuchos::Array<bool> LocalGIDs;
1139 if (numDomainElements > 0) {
1140 LocalGIDs.resize (numDomainElements,
false);
1151 const size_t numMyRows = rowptr.size () - 1;
1152 const int hashsize = std::max (static_cast<int> (numMyRows), 100);
1155 Teuchos::Array<GO> RemoteGIDList;
1156 RemoteGIDList.reserve (hashsize);
1157 Teuchos::Array<int> PIDList;
1158 PIDList.reserve (hashsize);
1169 size_t NumLocalColGIDs = 0;
1170 LO NumRemoteColGIDs = 0;
1171 for (
size_t i = 0; i < numMyRows; ++i) {
1172 for(
size_t j = rowptr[i]; j < rowptr[i+1]; ++j) {
1173 const GO GID = colind_GID[j];
1175 const LO LID = domainMap.getLocalElement (GID);
1177 const bool alreadyFound = LocalGIDs[LID];
1178 if (! alreadyFound) {
1179 LocalGIDs[LID] =
true;
1182 colind_LID[j] = LID;
1185 const LO hash_value = RemoteGIDs.
get (GID);
1186 if (hash_value == -1) {
1187 const int PID = owningPIDs[j];
1188 TEUCHOS_TEST_FOR_EXCEPTION(
1189 PID == -1, std::invalid_argument, prefix <<
"Cannot figure out if " 1191 colind_LID[j] =
static_cast<LO
> (numDomainElements + NumRemoteColGIDs);
1192 RemoteGIDs.
add (GID, NumRemoteColGIDs);
1193 RemoteGIDList.push_back (GID);
1194 PIDList.push_back (PID);
1198 colind_LID[j] =
static_cast<LO
> (numDomainElements + hash_value);
1206 if (domainMap.getComm ()->getSize () == 1) {
1209 TEUCHOS_TEST_FOR_EXCEPTION(
1210 NumRemoteColGIDs != 0, std::runtime_error, prefix <<
"There is only one " 1211 "process in the domain Map's communicator, which means that there are no " 1212 "\"remote\" indices. Nevertheless, some column indices are not in the " 1214 if (static_cast<size_t> (NumLocalColGIDs) == numDomainElements) {
1218 colMap = domainMapRCP;
1225 const LO numMyCols = NumLocalColGIDs + NumRemoteColGIDs;
1226 Teuchos::Array<GO> ColIndices;
1227 GO* RemoteColIndices = NULL;
1228 if (numMyCols > 0) {
1229 ColIndices.resize (numMyCols);
1230 if (NumLocalColGIDs != static_cast<size_t> (numMyCols)) {
1231 RemoteColIndices = &ColIndices[NumLocalColGIDs];
1235 for (LO i = 0; i < NumRemoteColGIDs; ++i) {
1236 RemoteColIndices[i] = RemoteGIDList[i];
1240 Teuchos::Array<LO> RemotePermuteIDs (NumRemoteColGIDs);
1241 for (LO i = 0; i < NumRemoteColGIDs; ++i) {
1242 RemotePermuteIDs[i]=i;
1249 ColIndices.begin () + NumLocalColGIDs,
1250 RemotePermuteIDs.begin ());
1256 remotePIDs = PIDList;
1265 LO StartCurrent = 0, StartNext = 1;
1266 while (StartNext < NumRemoteColGIDs) {
1267 if (PIDList[StartNext]==PIDList[StartNext-1]) {
1271 Tpetra::sort2 (ColIndices.begin () + NumLocalColGIDs + StartCurrent,
1272 ColIndices.begin () + NumLocalColGIDs + StartNext,
1273 RemotePermuteIDs.begin () + StartCurrent);
1274 StartCurrent = StartNext;
1278 Tpetra::sort2 (ColIndices.begin () + NumLocalColGIDs + StartCurrent,
1279 ColIndices.begin () + NumLocalColGIDs + StartNext,
1280 RemotePermuteIDs.begin () + StartCurrent);
1283 Teuchos::Array<LO> ReverseRemotePermuteIDs (NumRemoteColGIDs);
1284 for (LO i = 0; i < NumRemoteColGIDs; ++i) {
1285 ReverseRemotePermuteIDs[RemotePermuteIDs[i]] = i;
1289 bool use_local_permute =
false;
1290 Teuchos::Array<LO> LocalPermuteIDs (numDomainElements);
1302 Teuchos::ArrayView<const GO> domainGlobalElements = domainMap.getNodeElementList();
1303 if (static_cast<size_t> (NumLocalColGIDs) == numDomainElements) {
1304 if (NumLocalColGIDs > 0) {
1306 std::copy (domainGlobalElements.begin (), domainGlobalElements.end (),
1307 ColIndices.begin ());
1311 LO NumLocalAgain = 0;
1312 use_local_permute =
true;
1313 for (
size_t i = 0; i < numDomainElements; ++i) {
1315 LocalPermuteIDs[i] = NumLocalAgain;
1316 ColIndices[NumLocalAgain++] = domainGlobalElements[i];
1319 TEUCHOS_TEST_FOR_EXCEPTION(
1320 static_cast<size_t> (NumLocalAgain) != NumLocalColGIDs,
1321 std::runtime_error, prefix <<
"Local ID count test failed.");
1325 const GST minus_one = Teuchos::OrdinalTraits<GST>::invalid ();
1326 colMap = rcp (
new map_type (minus_one, ColIndices, domainMap.getIndexBase (),
1327 domainMap.getComm (), domainMap.getNode ()));
1330 for (
size_t i = 0; i < numMyRows; ++i) {
1331 for (
size_t j = rowptr[i]; j < rowptr[i+1]; ++j) {
1332 const LO ID = colind_LID[j];
1333 if (static_cast<size_t> (ID) < numDomainElements) {
1334 if (use_local_permute) {
1335 colind_LID[j] = LocalPermuteIDs[colind_LID[j]];
1342 colind_LID[j] = NumLocalColGIDs + ReverseRemotePermuteIDs[colind_LID[j]-numDomainElements];
1354 #include "Tpetra_CrsMatrix.hpp" 1356 #endif // TPETRA_IMPORT_UTIL_HPP void sortAndMergeCrsEntries(const Teuchos::ArrayView< size_t > &CRS_rowptr, const Teuchos::ArrayView< Ordinal > &CRS_colind, const Teuchos::ArrayView< Scalar > &CRS_vals)
Sort and merge the entries of the (raw CSR) matrix by column index within each row.
Namespace Tpetra contains the class and methods constituting the Tpetra library.
Sparse matrix that presents a row-oriented interface that lets users read or modify entries...
void sort3(const IT1 &first1, const IT1 &last1, const IT2 &first2, const IT3 &first3)
Sort the first array, and apply the same permutation to the second and third arrays.
Traits class for packing / unpacking data of type T, using Kokkos data structures that live in the gi...
Teuchos::RCP< const map_type > getColMap() const
The Map that describes the column distribution in this matrix.
Declaration of the Tpetra::CrsMatrix class.
bool isLocallyIndexed() const
Whether the matrix is locally indexed on the calling process.
Declaration and generic definition of traits class that tells Tpetra::CrsMatrix how to pack and unpac...
Kokkos::View< char *, D, Kokkos::MemoryUnmanaged > output_buffer_type
The type of an output buffer of bytes.
void getLocalRowView(LocalOrdinal LocalRow, Teuchos::ArrayView< const LocalOrdinal > &indices, Teuchos::ArrayView< const Scalar > &values) const
Get a constant, nonpersisting view of a row of this matrix, using local row and column indices...
void sortCrsEntries(const Teuchos::ArrayView< size_t > &CRS_rowptr, const Teuchos::ArrayView< Ordinal > &CRS_colind, const Teuchos::ArrayView< Scalar > &CRS_vals)
Sort the entries of the (raw CSR) matrix by column index within each row.
size_t global_size_t
Global size_t object.
size_t unpackAndCombineWithOwningPIDsCount(const CrsMatrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > &SourceMatrix, const Teuchos::ArrayView< const LocalOrdinal > &importLIDs, const Teuchos::ArrayView< const char > &imports, const Teuchos::ArrayView< size_t > &numPacketsPerLID, size_t constantNumPackets, Distributor &distor, CombineMode combineMode, size_t numSameIDs, const Teuchos::ArrayView< const LocalOrdinal > &permuteToLIDs, const Teuchos::ArrayView< const LocalOrdinal > &permuteFromLIDs)
Special version of Tpetra::CrsMatrix::unpackAndCombine that also unpacks owning process ranks...
Teuchos::RCP< const Teuchos::Comm< int > > getComm() const
The communicator over which the matrix is distributed.
void lowCommunicationMakeColMapAndReindex(const Teuchos::ArrayView< const size_t > &rowPointers, const Teuchos::ArrayView< LocalOrdinal > &columnIndices_LID, const Teuchos::ArrayView< GlobalOrdinal > &columnIndices_GID, const Tpetra::RCP< const Tpetra::Map< LocalOrdinal, GlobalOrdinal, Node > > &domainMap, const Teuchos::ArrayView< const int > &owningPids, Teuchos::Array< int > &remotePids, Teuchos::RCP< const Tpetra::Map< LocalOrdinal, GlobalOrdinal, Node > > &colMap)
lowCommunicationMakeColMapAndReindex
Sets up and executes a communication plan for a Tpetra DistObject.
CombineMode
Rule for combining data in an Import or Export.
Kokkos::View< const value_type *, D, Kokkos::MemoryUnmanaged > input_array_type
The type of an input array of value_type.
Kokkos::View< const char *, D, Kokkos::MemoryUnmanaged > input_buffer_type
The type of an input buffer of bytes.
void unpackAndCombineIntoCrsArrays(const CrsMatrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > &SourceMatrix, const Teuchos::ArrayView< const LocalOrdinal > &importLIDs, const Teuchos::ArrayView< const char > &imports, const Teuchos::ArrayView< size_t > &numPacketsPerLID, size_t constantNumPackets, Distributor &distor, CombineMode combineMode, size_t numSameIDs, const Teuchos::ArrayView< const LocalOrdinal > &permuteToLIDs, const Teuchos::ArrayView< const LocalOrdinal > &permuteFromLIDs, size_t TargetNumRows, size_t TargetNumNonzeros, int MyTargetPID, const Teuchos::ArrayView< size_t > &rowPointers, const Teuchos::ArrayView< GlobalOrdinal > &columnIndices, const Teuchos::ArrayView< Scalar > &values, const Teuchos::ArrayView< const int > &SourcePids, Teuchos::Array< int > &TargetPids)
unpackAndCombineIntoCrsArrays
void sort2(const IT1 &first1, const IT1 &last1, const IT2 &first2)
Sort the first array, and apply the resulting permutation to the second array.
size_t getNodeNumElements() const
The number of elements belonging to the calling process.
ValueType get(const KeyType key)
Get the value corresponding to the given key.
Describes a parallel distribution of objects over processes.
void add(const KeyType key, const ValueType value)
Add a key and its value to the hash table.
Kokkos::View< value_type *, D, Kokkos::MemoryUnmanaged > output_array_type
The type of an output array of value_type.
Stand-alone utility functions and macros.
size_t getNumEntriesInLocalRow(LocalOrdinal localRow) const
Returns the current number of entries on this node in the specified local row.
void packAndPrepareWithOwningPIDs(const CrsMatrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > &SourceMatrix, const Teuchos::ArrayView< const LocalOrdinal > &exportLIDs, Teuchos::Array< char > &exports, const Teuchos::ArrayView< size_t > &numPacketsPerLID, size_t &constantNumPackets, Distributor &distor, const Teuchos::ArrayView< const int > &SourcePids)
Special version of Tpetra::CrsMatrix::packAndPrepare that also packs owning process ranks...