42 #ifndef TPETRA_DETAILS_PACKCRSMATRIX_DEF_HPP
43 #define TPETRA_DETAILS_PACKCRSMATRIX_DEF_HPP
45 #include "TpetraCore_config.h"
46 #include "Teuchos_Array.hpp"
47 #include "Teuchos_ArrayView.hpp"
85 #ifndef DOXYGEN_SHOULD_SKIP_THIS
95 namespace PackCrsMatrixImpl {
103 template<
class OutputOffsetsViewType,
104 class CountsViewType,
105 class InputOffsetsViewType,
106 class InputLocalRowIndicesViewType,
107 class InputLocalRowPidsViewType,
109 #ifdef HAVE_TPETRA_DEBUG
117 typedef typename OutputOffsetsViewType::non_const_value_type output_offset_type;
118 typedef typename CountsViewType::non_const_value_type count_type;
119 typedef typename InputOffsetsViewType::non_const_value_type input_offset_type;
120 typedef typename InputLocalRowIndicesViewType::non_const_value_type local_row_index_type;
121 typedef typename InputLocalRowPidsViewType::non_const_value_type local_row_pid_type;
123 typedef typename OutputOffsetsViewType::device_type device_type;
124 static_assert (std::is_same<
typename CountsViewType::device_type::execution_space,
125 typename device_type::execution_space>::value,
126 "OutputOffsetsViewType and CountsViewType must have the same execution space.");
127 static_assert (Kokkos::Impl::is_view<OutputOffsetsViewType>::value,
128 "OutputOffsetsViewType must be a Kokkos::View.");
129 static_assert (std::is_same<typename OutputOffsetsViewType::value_type, output_offset_type>::value,
130 "OutputOffsetsViewType must be a nonconst Kokkos::View.");
131 static_assert (std::is_integral<output_offset_type>::value,
132 "The type of each entry of OutputOffsetsViewType must be a built-in integer type.");
133 static_assert (Kokkos::Impl::is_view<CountsViewType>::value,
134 "CountsViewType must be a Kokkos::View.");
135 static_assert (std::is_same<typename CountsViewType::value_type, output_offset_type>::value,
136 "CountsViewType must be a nonconst Kokkos::View.");
137 static_assert (std::is_integral<count_type>::value,
138 "The type of each entry of CountsViewType must be a built-in integer type.");
139 static_assert (Kokkos::Impl::is_view<InputOffsetsViewType>::value,
140 "InputOffsetsViewType must be a Kokkos::View.");
141 static_assert (std::is_integral<input_offset_type>::value,
142 "The type of each entry of InputOffsetsViewType must be a built-in integer type.");
143 static_assert (Kokkos::Impl::is_view<InputLocalRowIndicesViewType>::value,
144 "InputLocalRowIndicesViewType must be a Kokkos::View.");
145 static_assert (std::is_integral<local_row_index_type>::value,
146 "The type of each entry of InputLocalRowIndicesViewType must be a built-in integer type.");
149 const CountsViewType& counts,
150 const InputOffsetsViewType& rowOffsets,
151 const InputLocalRowIndicesViewType& lclRowInds,
152 const InputLocalRowPidsViewType& lclRowPids,
153 const count_type sizeOfLclCount,
154 const count_type sizeOfGblColInd,
155 const count_type sizeOfPid,
156 const count_type sizeOfValue) :
157 outputOffsets_ (outputOffsets),
159 rowOffsets_ (rowOffsets),
160 lclRowInds_ (lclRowInds),
161 lclRowPids_ (lclRowPids),
162 sizeOfLclCount_ (sizeOfLclCount),
163 sizeOfGblColInd_ (sizeOfGblColInd),
164 sizeOfPid_ (sizeOfPid),
165 sizeOfValue_ (sizeOfValue),
169 const size_t numRowsToPack =
static_cast<size_t> (lclRowInds_.extent (0));
171 if (numRowsToPack !=
static_cast<size_t> (counts_.extent (0))) {
172 std::ostringstream os;
173 os <<
"lclRowInds.extent(0) = " << numRowsToPack
174 <<
" != counts.extent(0) = " << counts_.extent (0)
176 TEUCHOS_TEST_FOR_EXCEPTION(
true, std::invalid_argument, os.str ());
178 if (
static_cast<size_t> (numRowsToPack + 1) !=
179 static_cast<size_t> (outputOffsets_.extent (0))) {
180 std::ostringstream os;
181 os <<
"lclRowInds.extent(0) + 1 = " << (numRowsToPack + 1)
182 <<
" != outputOffsets.extent(0) = " << outputOffsets_.extent (0)
184 TEUCHOS_TEST_FOR_EXCEPTION(
true, std::invalid_argument, os.str ());
189 KOKKOS_INLINE_FUNCTION
void
190 operator() (
const local_row_index_type& curInd,
191 output_offset_type& update,
192 const bool final)
const
195 if (curInd <
static_cast<local_row_index_type
> (0)) {
203 if (curInd >=
static_cast<local_row_index_type
> (outputOffsets_.extent (0))) {
208 outputOffsets_(curInd) = update;
211 if (curInd <
static_cast<local_row_index_type
> (counts_.extent (0))) {
212 const auto lclRow = lclRowInds_(curInd);
213 if (
static_cast<size_t> (lclRow + 1) >=
static_cast<size_t> (rowOffsets_.extent (0)) ||
214 static_cast<local_row_index_type
> (lclRow) <
static_cast<local_row_index_type
> (0)) {
222 const count_type count =
223 static_cast<count_type
> (rowOffsets_(lclRow+1) - rowOffsets_(lclRow));
229 const count_type numBytes = (count == 0) ?
230 static_cast<count_type
> (0) :
231 sizeOfLclCount_ + count * (sizeOfGblColInd_ +
232 (lclRowPids_.size() > 0 ? sizeOfPid_ : 0) +
236 counts_(curInd) = numBytes;
248 auto error_h = Kokkos::create_mirror_view (error_);
254 OutputOffsetsViewType outputOffsets_;
255 CountsViewType counts_;
256 typename InputOffsetsViewType::const_type rowOffsets_;
257 typename InputLocalRowIndicesViewType::const_type lclRowInds_;
258 typename InputLocalRowPidsViewType::const_type lclRowPids_;
259 count_type sizeOfLclCount_;
260 count_type sizeOfGblColInd_;
261 count_type sizeOfPid_;
262 count_type sizeOfValue_;
263 Kokkos::View<int, device_type> error_;
275 template<
class OutputOffsetsViewType,
276 class CountsViewType,
277 class InputOffsetsViewType,
278 class InputLocalRowIndicesViewType,
279 class InputLocalRowPidsViewType>
280 typename CountsViewType::non_const_value_type
281 computeNumPacketsAndOffsets (
const OutputOffsetsViewType& outputOffsets,
282 const CountsViewType& counts,
283 const InputOffsetsViewType& rowOffsets,
284 const InputLocalRowIndicesViewType& lclRowInds,
285 const InputLocalRowPidsViewType& lclRowPids,
286 const typename CountsViewType::non_const_value_type sizeOfLclCount,
287 const typename CountsViewType::non_const_value_type sizeOfGblColInd,
288 const typename CountsViewType::non_const_value_type sizeOfPid,
289 const typename CountsViewType::non_const_value_type sizeOfValue)
291 typedef NumPacketsAndOffsetsFunctor<OutputOffsetsViewType,
292 CountsViewType,
typename InputOffsetsViewType::const_type,
293 typename InputLocalRowIndicesViewType::const_type,
294 typename InputLocalRowPidsViewType::const_type> functor_type;
295 typedef typename CountsViewType::non_const_value_type count_type;
296 typedef typename OutputOffsetsViewType::size_type size_type;
297 typedef typename OutputOffsetsViewType::execution_space execution_space;
298 typedef typename functor_type::local_row_index_type LO;
299 typedef Kokkos::RangePolicy<execution_space, LO> range_type;
300 const char prefix[] =
"computeNumPacketsAndOffsets: ";
302 count_type count = 0;
303 const count_type numRowsToPack = lclRowInds.extent (0);
305 if (numRowsToPack == 0) {
309 TEUCHOS_TEST_FOR_EXCEPTION
310 (rowOffsets.extent (0) <=
static_cast<size_type
> (1),
311 std::invalid_argument, prefix <<
"There is at least one row to pack, "
312 "but the matrix has no rows. lclRowInds.extent(0) = " <<
313 numRowsToPack <<
", but rowOffsets.extent(0) = " <<
314 rowOffsets.extent (0) <<
" <= 1.");
315 TEUCHOS_TEST_FOR_EXCEPTION
316 (outputOffsets.extent (0) !=
317 static_cast<size_type
> (numRowsToPack + 1), std::invalid_argument,
318 prefix <<
"Output dimension does not match number of rows to pack. "
319 <<
"outputOffsets.extent(0) = " << outputOffsets.extent (0)
320 <<
" != lclRowInds.extent(0) + 1 = "
321 <<
static_cast<size_type
> (numRowsToPack + 1) <<
".");
322 TEUCHOS_TEST_FOR_EXCEPTION
323 (counts.extent (0) != numRowsToPack, std::invalid_argument,
324 prefix <<
"counts.extent(0) = " << counts.extent (0)
325 <<
" != numRowsToPack = " << numRowsToPack <<
".");
327 functor_type f (outputOffsets, counts, rowOffsets,
328 lclRowInds, lclRowPids, sizeOfLclCount,
329 sizeOfGblColInd, sizeOfPid, sizeOfValue);
330 Kokkos::parallel_scan (range_type (0, numRowsToPack + 1), f);
333 const int errCode = f.getError ();
334 TEUCHOS_TEST_FOR_EXCEPTION
335 (errCode != 0, std::runtime_error, prefix <<
"parallel_scan error code "
336 << errCode <<
" != 0.");
340 for (LO k = 0; k < numRowsToPack; ++k) {
343 if (outputOffsets(numRowsToPack) != total) {
344 if (errStr.get () == NULL) {
345 errStr = std::unique_ptr<std::ostringstream> (
new std::ostringstream ());
347 std::ostringstream& os = *errStr;
349 <<
"outputOffsets(numRowsToPack=" << numRowsToPack <<
") "
350 << outputOffsets(numRowsToPack) <<
" != sum of counts = "
351 << total <<
"." << std::endl;
352 if (numRowsToPack != 0) {
354 if (numRowsToPack <
static_cast<LO
> (10)) {
355 os <<
"outputOffsets: [";
356 for (LO i = 0; i <= numRowsToPack; ++i) {
357 os << outputOffsets(i);
358 if (
static_cast<LO
> (i + 1) <= numRowsToPack) {
362 os <<
"]" << std::endl;
364 for (LO i = 0; i < numRowsToPack; ++i) {
366 if (
static_cast<LO
> (i + 1) < numRowsToPack) {
370 os <<
"]" << std::endl;
373 os <<
"outputOffsets(" << (numRowsToPack-1) <<
") = "
374 << outputOffsets(numRowsToPack-1) <<
"." << std::endl;
377 count = outputOffsets(numRowsToPack);
378 return {
false, errStr};
384 using Tpetra::Details::getEntryOnHost;
385 return static_cast<count_type
> (getEntryOnHost (outputOffsets,
405 template<
class ST,
class ColumnMap,
class BufferDeviceType>
407 Kokkos::pair<int, size_t>
408 packCrsMatrixRow (
const ColumnMap& col_map,
409 const Kokkos::View<char*, BufferDeviceType>& exports,
414 const size_t num_ent,
415 const size_t num_bytes_per_value,
416 const bool pack_pids)
418 using Kokkos::subview;
419 typedef typename ColumnMap::local_ordinal_type LO;
420 typedef typename ColumnMap::global_ordinal_type GO;
421 typedef BufferDeviceType BDT;
422 typedef Kokkos::pair<int, size_t> return_type;
426 return return_type (0, 0);
429 const LO num_ent_LO =
static_cast<LO
> (num_ent);
430 const size_t num_ent_beg = offset;
433 const size_t gids_beg = num_ent_beg + num_ent_len;
436 const size_t pids_beg = gids_beg + gids_len;
437 const size_t pids_len = pack_pids ?
439 static_cast<size_t> (0);
441 const size_t vals_beg = gids_beg + gids_len + pids_len;
442 const size_t vals_len = num_ent * num_bytes_per_value;
444 char*
const num_ent_out = exports.data () + num_ent_beg;
445 char*
const gids_out = exports.data () + gids_beg;
446 char*
const pids_out = pack_pids ? exports.data () + pids_beg : NULL;
447 char*
const vals_out = exports.data () + vals_beg;
449 size_t num_bytes_out = 0;
456 for (
size_t k = 0; k < num_ent; ++k) {
457 const LO lid = lids_in[k];
458 const GO gid = col_map.getGlobalElement (lid);
463 for (
size_t k = 0; k < num_ent; ++k) {
464 const LO lid = lids_in[k];
465 const int pid = pids_in[lid];
471 error_code += p.first;
472 num_bytes_out += p.second;
475 if (error_code != 0) {
476 return return_type (10, num_bytes_out);
479 const size_t expected_num_bytes =
480 num_ent_len + gids_len + pids_len + vals_len;
481 if (num_bytes_out != expected_num_bytes) {
482 return return_type (11, num_bytes_out);
484 return return_type (0, num_bytes_out);
487 template<
class LocalMatrix,
class LocalMap,
class BufferDeviceType>
488 struct PackCrsMatrixFunctor {
489 typedef LocalMatrix local_matrix_type;
490 typedef LocalMap local_map_type;
491 typedef typename local_matrix_type::value_type ST;
492 typedef typename local_map_type::local_ordinal_type LO;
493 typedef typename local_map_type::global_ordinal_type GO;
494 typedef typename local_matrix_type::device_type DT;
496 typedef Kokkos::View<const size_t*, BufferDeviceType>
497 num_packets_per_lid_view_type;
498 typedef Kokkos::View<const size_t*, BufferDeviceType> offsets_view_type;
499 typedef Kokkos::View<char*, BufferDeviceType> exports_view_type;
501 export_lids_view_type;
503 source_pids_view_type;
505 typedef typename num_packets_per_lid_view_type::non_const_value_type
507 typedef typename offsets_view_type::non_const_value_type
509 typedef Kokkos::pair<int, LO> value_type;
511 static_assert (std::is_same<LO, typename local_matrix_type::ordinal_type>::value,
512 "local_map_type::local_ordinal_type and "
513 "local_matrix_type::ordinal_type must be the same.");
515 local_matrix_type local_matrix;
516 local_map_type local_col_map;
517 exports_view_type exports;
518 num_packets_per_lid_view_type num_packets_per_lid;
519 export_lids_view_type export_lids;
520 source_pids_view_type source_pids;
521 offsets_view_type offsets;
522 size_t num_bytes_per_value;
525 PackCrsMatrixFunctor (
const local_matrix_type& local_matrix_in,
526 const local_map_type& local_col_map_in,
527 const exports_view_type& exports_in,
528 const num_packets_per_lid_view_type& num_packets_per_lid_in,
529 const export_lids_view_type& export_lids_in,
530 const source_pids_view_type& source_pids_in,
531 const offsets_view_type& offsets_in,
532 const size_t num_bytes_per_value_in,
533 const bool pack_pids_in) :
534 local_matrix (local_matrix_in),
535 local_col_map (local_col_map_in),
536 exports (exports_in),
537 num_packets_per_lid (num_packets_per_lid_in),
538 export_lids (export_lids_in),
539 source_pids (source_pids_in),
540 offsets (offsets_in),
541 num_bytes_per_value (num_bytes_per_value_in),
542 pack_pids (pack_pids_in)
544 const LO numRows = local_matrix_in.numRows ();
546 static_cast<LO
> (local_matrix.graph.row_map.extent (0));
547 TEUCHOS_TEST_FOR_EXCEPTION
548 (numRows != 0 && rowMapDim != numRows +
static_cast<LO
> (1),
549 std::logic_error,
"local_matrix.graph.row_map.extent(0) = "
550 << rowMapDim <<
" != numRows (= " << numRows <<
" ) + 1.");
553 KOKKOS_INLINE_FUNCTION
void init (value_type& dst)
const
555 using ::Tpetra::Details::OrdinalTraits;
556 dst = Kokkos::make_pair (0, OrdinalTraits<LO>::invalid ());
559 KOKKOS_INLINE_FUNCTION
void
560 join (
volatile value_type& dst,
const volatile value_type& src)
const
564 if (src.first != 0 && dst.first == 0) {
569 KOKKOS_INLINE_FUNCTION
570 void operator() (
const LO i, value_type& dst)
const
572 const size_t offset = offsets[i];
573 const LO export_lid = export_lids[i];
574 const size_t buf_size = exports.size();
575 const size_t num_bytes = num_packets_per_lid(i);
576 const size_t num_ent =
577 static_cast<size_t> (local_matrix.graph.row_map[export_lid+1]
578 - local_matrix.graph.row_map[export_lid]);
588 if (export_lid >= local_matrix.numRows ()) {
589 if (dst.first != 0) {
590 dst = Kokkos::make_pair (1, i);
594 else if ((offset > buf_size || offset + num_bytes > buf_size)) {
595 if (dst.first != 0) {
596 dst = Kokkos::make_pair (2, i);
606 const auto row_beg = local_matrix.graph.row_map[export_lid];
607 const auto row_end = local_matrix.graph.row_map[export_lid + 1];
608 auto vals_in = subview (local_matrix.values,
609 Kokkos::make_pair (row_beg, row_end));
610 auto lids_in = subview (local_matrix.graph.entries,
611 Kokkos::make_pair (row_beg, row_end));
612 typedef local_map_type LMT;
613 typedef BufferDeviceType BDT;
614 auto p = packCrsMatrixRow<ST, LMT, BDT> (local_col_map, exports, lids_in,
615 source_pids, vals_in, offset,
616 num_ent, num_bytes_per_value,
618 int error_code_this_row = p.first;
619 size_t num_bytes_packed_this_row = p.second;
620 if (error_code_this_row != 0) {
621 if (dst.first != 0) {
622 dst = Kokkos::make_pair (error_code_this_row, i);
625 else if (num_bytes_packed_this_row != num_bytes) {
626 if (dst.first != 0) {
627 dst = Kokkos::make_pair (3, i);
640 template<
class LocalMatrix,
class LocalMap,
class BufferDeviceType>
642 do_pack (
const LocalMatrix& local_matrix,
643 const LocalMap& local_map,
644 const Kokkos::View<char*, BufferDeviceType>& exports,
645 const typename PackTraits<
648 >::input_array_type& num_packets_per_lid,
649 const typename PackTraits<
650 typename LocalMap::local_ordinal_type,
652 >::input_array_type& export_lids,
653 const typename PackTraits<
655 typename LocalMatrix::device_type
656 >::input_array_type& source_pids,
657 const Kokkos::View<const size_t*, BufferDeviceType>& offsets,
658 const size_t num_bytes_per_value,
659 const bool pack_pids)
661 typedef typename LocalMap::local_ordinal_type LO;
662 typedef typename LocalMatrix::device_type DT;
663 typedef Kokkos::RangePolicy<typename DT::execution_space, LO> range_type;
664 const char prefix[] =
"Tpetra::Details::do_pack: ";
666 if (export_lids.extent (0) != 0) {
667 TEUCHOS_TEST_FOR_EXCEPTION
668 (
static_cast<size_t> (offsets.extent (0)) !=
669 static_cast<size_t> (export_lids.extent (0) + 1),
670 std::invalid_argument, prefix <<
"offsets.extent(0) = "
671 << offsets.extent (0) <<
" != export_lids.extent(0) (= "
672 << export_lids.extent (0) <<
") + 1.");
673 TEUCHOS_TEST_FOR_EXCEPTION
674 (export_lids.extent (0) != num_packets_per_lid.extent (0),
675 std::invalid_argument, prefix <<
"export_lids.extent(0) = " <<
676 export_lids.extent (0) <<
" != num_packets_per_lid.extent(0) = "
677 << num_packets_per_lid.extent (0) <<
".");
681 TEUCHOS_TEST_FOR_EXCEPTION
682 (pack_pids && exports.extent (0) != 0 &&
683 source_pids.extent (0) == 0, std::invalid_argument, prefix <<
684 "pack_pids is true, and exports.extent(0) = " <<
685 exports.extent (0) <<
" != 0, meaning that we need to pack at "
686 "least one matrix entry, but source_pids.extent(0) = 0.");
689 typedef PackCrsMatrixFunctor<LocalMatrix, LocalMap,
690 BufferDeviceType> pack_functor_type;
691 pack_functor_type f (local_matrix, local_map, exports,
692 num_packets_per_lid, export_lids,
693 source_pids, offsets, num_bytes_per_value,
696 typename pack_functor_type::value_type result;
697 range_type range (0, num_packets_per_lid.extent (0));
698 Kokkos::parallel_reduce (range, f, result);
700 if (result.first != 0) {
701 std::ostringstream os;
703 if (result.first == 1) {
704 auto export_lids_h = Kokkos::create_mirror_view (export_lids);
706 const auto firstBadLid = export_lids_h(result.second);
707 os <<
"First bad export LID: export_lids(i=" << result.second <<
") = "
710 else if (result.first == 2) {
711 auto offsets_h = Kokkos::create_mirror_view (offsets);
713 const auto firstBadOffset = offsets_h(result.second);
715 auto num_packets_per_lid_h =
716 Kokkos::create_mirror_view (num_packets_per_lid);
718 os <<
"First bad offset: offsets(i=" << result.second <<
") = "
719 << firstBadOffset <<
", num_packets_per_lid(i) = "
720 << num_packets_per_lid_h(result.second) <<
", buf_size = "
724 TEUCHOS_TEST_FOR_EXCEPTION
725 (
true, std::runtime_error, prefix <<
"PackCrsMatrixFunctor reported "
726 "error code " << result.first <<
" for the first bad row "
727 << result.second <<
". " << os.str ());
760 template<
typename ST,
typename LO,
typename GO,
typename NT,
typename BufferDeviceType>
762 packCrsMatrix (
const CrsMatrix<ST, LO, GO, NT>& sourceMatrix,
763 Kokkos::DualView<char*, BufferDeviceType>& exports,
764 const Kokkos::View<size_t*, BufferDeviceType>& num_packets_per_lid,
765 const Kokkos::View<const LO*, BufferDeviceType>& export_lids,
766 const Kokkos::View<const int*, typename NT::device_type>& export_pids,
767 size_t& constant_num_packets,
768 const bool pack_pids,
772 typedef BufferDeviceType DT;
773 typedef typename DT::execution_space execution_space;
774 typedef Kokkos::DualView<char*, BufferDeviceType> exports_view_type;
775 const char prefix[] =
"Tpetra::Details::PackCrsMatrixImpl::packCrsMatrix: ";
776 constexpr
bool debug =
false;
778 auto local_matrix = sourceMatrix.getLocalMatrix ();
779 auto local_col_map = sourceMatrix.getColMap ()->getLocalMap ();
784 constant_num_packets = 0;
786 const size_t num_export_lids =
787 static_cast<size_t> (export_lids.extent (0));
788 TEUCHOS_TEST_FOR_EXCEPTION
790 static_cast<size_t> (num_packets_per_lid.extent (0)),
791 std::invalid_argument, prefix <<
"num_export_lids.extent(0) = "
792 << num_export_lids <<
" != num_packets_per_lid.extent(0) = "
793 << num_packets_per_lid.extent (0) <<
".");
794 if (num_export_lids != 0) {
795 TEUCHOS_TEST_FOR_EXCEPTION
796 (num_packets_per_lid.data () == NULL, std::invalid_argument,
797 prefix <<
"num_export_lids = "<< num_export_lids <<
" != 0, but "
798 "num_packets_per_lid.data() = "
799 << num_packets_per_lid.data () <<
" == NULL.");
806 size_t num_bytes_per_value = 0;
821 size_t num_bytes_per_value_l = 0;
822 if (local_matrix.values.extent(0) > 0) {
823 const ST& val = local_matrix.values(0);
826 using Teuchos::reduceAll;
827 reduceAll<int, size_t> (* (sourceMatrix.getComm ()),
829 num_bytes_per_value_l,
830 Teuchos::outArg (num_bytes_per_value));
833 if (num_export_lids == 0) {
837 execution_space().fence ();
838 exports = exports_view_type (
"exports", 0);
839 execution_space().fence ();
844 Kokkos::View<size_t*, DT> offsets (
"offsets", num_export_lids + 1);
849 computeNumPacketsAndOffsets (offsets, num_packets_per_lid,
850 local_matrix.graph.row_map, export_lids,
852 num_bytes_per_lid, num_bytes_per_gid,
853 num_bytes_per_pid, num_bytes_per_value);
856 if (count >
static_cast<size_t> (exports.extent (0))) {
860 execution_space().fence ();
861 exports = exports_view_type (
"exports", count);
863 std::ostringstream os;
864 os <<
"*** exports resized to " << count << std::endl;
865 std::cerr << os.str ();
867 execution_space().fence ();
870 std::ostringstream os;
871 os <<
"*** count: " << count <<
", exports.extent(0): "
872 << exports.extent (0) << std::endl;
873 std::cerr << os.str ();
879 TEUCHOS_TEST_FOR_EXCEPTION
880 (pack_pids && exports.extent (0) != 0 &&
881 export_pids.extent (0) == 0, std::invalid_argument, prefix <<
882 "pack_pids is true, and exports.extent(0) = " <<
883 exports.extent (0) <<
" != 0, meaning that we need to pack at least "
884 "one matrix entry, but export_pids.extent(0) = 0.");
886 typedef typename std::decay<decltype (local_matrix)>::type
888 typedef typename std::decay<decltype (local_col_map)>::type
891 exports.modify_device ();
892 auto exports_d = exports.view_device ();
893 do_pack<local_matrix_type, local_map_type, DT>
894 (local_matrix, local_col_map, exports_d, num_packets_per_lid,
895 export_lids, export_pids, offsets, num_bytes_per_value,
902 template<
typename ST,
typename LO,
typename GO,
typename NT>
905 Teuchos::Array<char>& exports,
906 const Teuchos::ArrayView<size_t>& numPacketsPerLID,
907 const Teuchos::ArrayView<const LO>& exportLIDs,
908 size_t& constantNumPackets,
912 using device_type =
typename local_matrix_type::device_type;
914 using host_exec_space =
typename Kokkos::View<size_t*, device_type>::HostMirror::execution_space;
915 using host_dev_type = Kokkos::Device<host_exec_space, Kokkos::HostSpace>;
921 Kokkos::View<size_t*, buffer_device_type> num_packets_per_lid_d =
923 numPacketsPerLID.getRawPtr (),
924 numPacketsPerLID.size (),
false,
925 "num_packets_per_lid");
932 Kokkos::View<const LO*, buffer_device_type> export_lids_d =
934 exportLIDs.getRawPtr (),
935 exportLIDs.size (),
true,
938 Kokkos::View<int*, device_type> export_pids_d;
939 Kokkos::DualView<char*, buffer_device_type> exports_dv;
940 constexpr
bool pack_pids =
false;
941 PackCrsMatrixImpl::packCrsMatrix<ST, LO, GO, NT, buffer_device_type> (
942 sourceMatrix, exports_dv, num_packets_per_lid_d, export_lids_d,
943 export_pids_d, constantNumPackets, pack_pids, distor);
947 Kokkos::View<size_t*, host_dev_type> num_packets_per_lid_h
948 (numPacketsPerLID.getRawPtr (),
949 numPacketsPerLID.size ());
957 if (
static_cast<size_t> (exports.size ()) !=
958 static_cast<size_t> (exports_dv.extent (0))) {
959 exports.resize (exports_dv.extent (0));
961 Kokkos::View<char*, host_dev_type> exports_h (exports.getRawPtr (),
966 template<
typename ST,
typename LO,
typename GO,
typename NT>
969 Kokkos::DualView<
char*,
971 const Kokkos::DualView<
size_t*,
973 const Kokkos::DualView<
const LO*,
975 size_t& constantNumPackets,
982 Kokkos::View<int*, device_type> exportPIDs_d (
"exportPIDs", 0);
983 constexpr
bool pack_pids =
false;
986 auto numPacketsPerLID_nc = numPacketsPerLID;
987 numPacketsPerLID_nc.clear_sync_state ();
988 numPacketsPerLID_nc.modify_device ();
989 auto numPacketsPerLID_d = numPacketsPerLID.view_device ();
992 TEUCHOS_ASSERT( ! exportLIDs.need_sync_device () );
993 auto exportLIDs_d = exportLIDs.view_device ();
995 PackCrsMatrixImpl::packCrsMatrix<ST,LO,GO,NT,buffer_device_type> (
996 sourceMatrix, exports, numPacketsPerLID_d, exportLIDs_d,
997 exportPIDs_d, constantNumPackets, pack_pids, distor);
1000 template<
typename ST,
typename LO,
typename GO,
typename NT>
1004 const Teuchos::ArrayView<size_t>& numPacketsPerLID,
1005 const Teuchos::ArrayView<const LO>& exportLIDs,
1006 const Teuchos::ArrayView<const int>& sourcePIDs,
1007 size_t& constantNumPackets,
1012 typedef typename Kokkos::DualView<char*, buffer_device_type>::t_host::execution_space host_exec_space;
1013 typedef Kokkos::Device<host_exec_space, Kokkos::HostSpace> host_dev_type;
1015 typename local_matrix_type::device_type outputDevice;
1018 std::unique_ptr<std::string> prefix;
1020 const int myRank = [&] () {
1021 auto map = sourceMatrix.
getMap ();
1022 if (map.get () ==
nullptr) {
1025 auto comm = map->getComm ();
1026 if (comm.get () ==
nullptr) {
1029 return comm->getRank ();
1031 std::ostringstream os;
1032 os <<
"Proc " << myRank <<
": packCrsMatrixWithOwningPIDs: ";
1033 prefix = std::unique_ptr<std::string> (
new std::string (os.str ()));
1035 std::ostringstream os2;
1036 os2 << *prefix <<
"start" << std::endl;
1037 std::cerr << os2.str ();
1044 auto num_packets_per_lid_d =
1046 numPacketsPerLID.getRawPtr (),
1047 numPacketsPerLID.size (),
false,
1048 "num_packets_per_lid");
1052 auto export_lids_d =
1054 exportLIDs.getRawPtr (),
1055 exportLIDs.size (),
true,
1059 auto export_pids_d =
1061 sourcePIDs.getRawPtr (),
1062 sourcePIDs.size (),
true,
1064 constexpr
bool pack_pids =
true;
1066 PackCrsMatrixImpl::packCrsMatrix
1067 (sourceMatrix, exports_dv, num_packets_per_lid_d, export_lids_d,
1068 export_pids_d, constantNumPackets, pack_pids, distor);
1070 catch (std::exception& e) {
1072 std::ostringstream os;
1073 os << *prefix <<
"PackCrsMatrixImpl::packCrsMatrix threw: "
1074 << e.what () << std::endl;
1075 std::cerr << os.str ();
1081 std::ostringstream os;
1082 os << *prefix <<
"PackCrsMatrixImpl::packCrsMatrix threw an exception "
1083 "not a subclass of std::exception" << std::endl;
1084 std::cerr << os.str ();
1089 if (numPacketsPerLID.size () != 0) {
1093 Kokkos::View<size_t*, host_dev_type> num_packets_per_lid_h
1094 (numPacketsPerLID.getRawPtr (), numPacketsPerLID.size ());
1097 catch (std::exception& e) {
1099 std::ostringstream os;
1100 os << *prefix <<
"Kokkos::deep_copy threw: " << e.what () << std::endl;
1101 std::cerr << os.str ();
1107 std::ostringstream os;
1108 os << *prefix <<
"Kokkos::deep_copy threw an exception not a subclass "
1109 "of std::exception" << std::endl;
1110 std::cerr << os.str ();
1117 std::ostringstream os;
1118 os << *prefix <<
"done" << std::endl;
1119 std::cerr << os.str ();
1126 #define TPETRA_DETAILS_PACKCRSMATRIX_INSTANT( ST, LO, GO, NT ) \
1128 Details::packCrsMatrix<ST, LO, GO, NT> (const CrsMatrix<ST, LO, GO, NT>&, \
1129 Teuchos::Array<char>&, \
1130 const Teuchos::ArrayView<size_t>&, \
1131 const Teuchos::ArrayView<const LO>&, \
1135 Details::packCrsMatrixNew<ST, LO, GO, NT> (const CrsMatrix<ST, LO, GO, NT>&, \
1136 Kokkos::DualView<char*, DistObject<char, LO, GO, NT>::buffer_device_type>&, \
1137 const Kokkos::DualView<size_t*, DistObject<char, LO, GO, NT>::buffer_device_type>&, \
1138 const Kokkos::DualView<const LO*, DistObject<char, LO, GO, NT>::buffer_device_type>&, \
1142 Details::packCrsMatrixWithOwningPIDs<ST, LO, GO, NT> (const CrsMatrix<ST, LO, GO, NT>&, \
1143 Kokkos::DualView<char*, DistObject<char, LO, GO, NT>::buffer_device_type>&, \
1144 const Teuchos::ArrayView<size_t>&, \
1145 const Teuchos::ArrayView<const LO>&, \
1146 const Teuchos::ArrayView<const int>&, \
Declaration of the Tpetra::CrsMatrix class.
Declaration of Tpetra::Details::Behavior, a class that describes Tpetra's behavior.
Import KokkosSparse::OrdinalTraits, a traits class for "invalid" (flag) values of integer types,...
Declaration and generic definition of traits class that tells Tpetra::CrsMatrix how to pack and unpac...
Declaration and definition of Tpetra::Details::castAwayConstDualView, an implementation detail of Tpe...
Functions that wrap Kokkos::create_mirror_view, in order to avoid deep copies when not necessary,...
Declaration and definition of Tpetra::Details::getEntryOnHost.
Sparse matrix that presents a row-oriented interface that lets users read or modify entries.
KokkosSparse::CrsMatrix< impl_scalar_type, local_ordinal_type, execution_space, void, typename local_graph_type::size_type > local_matrix_type
The specialization of Kokkos::CrsMatrix that represents the part of the sparse matrix on each MPI pro...
typename Node::device_type device_type
The Kokkos device type.
static bool verbose()
Whether Tpetra is in verbose mode.
Compute the number of packets and offsets for the pack procedure.
int getError() const
Host function for getting the error.
Base class for distributed Tpetra objects that support data redistribution.
virtual Teuchos::RCP< const map_type > getMap() const
The Map describing the parallel distribution of this object.
Sets up and executes a communication plan for a Tpetra DistObject.
Implementation details of Tpetra.
void packCrsMatrixWithOwningPIDs(const CrsMatrix< ST, LO, GO, NT > &sourceMatrix, Kokkos::DualView< char *, typename DistObject< char, LO, GO, NT >::buffer_device_type > &exports_dv, const Teuchos::ArrayView< size_t > &numPacketsPerLID, const Teuchos::ArrayView< const LO > &exportLIDs, const Teuchos::ArrayView< const int > &sourcePIDs, size_t &constantNumPackets, Distributor &distor)
Pack specified entries of the given local sparse matrix for communication.
Impl::CreateMirrorViewFromUnmanagedHostArray< ValueType, OutputDeviceType >::output_view_type create_mirror_view_from_raw_host_array(const OutputDeviceType &, ValueType *inPtr, const size_t inSize, const bool copy=true, const char label[]="")
Variant of Kokkos::create_mirror_view that takes a raw host 1-d array as input.
void packCrsMatrixNew(const CrsMatrix< ST, LO, GO, NT > &sourceMatrix, Kokkos::DualView< char *, typename DistObject< char, LO, GO, NT >::buffer_device_type > &exports, const Kokkos::DualView< size_t *, typename DistObject< char, LO, GO, NT >::buffer_device_type > &numPacketsPerLID, const Kokkos::DualView< const LO *, typename DistObject< char, LO, GO, NT >::buffer_device_type > &exportLIDs, size_t &constantNumPackets, Distributor &distor)
Pack specified entries of the given local sparse matrix for communication, for "new" DistObject inter...
void packCrsMatrix(const CrsMatrix< ST, LO, GO, NT > &sourceMatrix, Teuchos::Array< char > &exports, const Teuchos::ArrayView< size_t > &numPacketsPerLID, const Teuchos::ArrayView< const LO > &exportLIDs, size_t &constantNumPackets, Distributor &distor)
Pack specified entries of the given local sparse matrix for communication.
Namespace Tpetra contains the class and methods constituting the Tpetra library.
void deep_copy(MultiVector< DS, DL, DG, DN > &dst, const MultiVector< SS, SL, SG, SN > &src)
Copy the contents of the MultiVector src into dst.
static KOKKOS_INLINE_FUNCTION Kokkos::pair< int, size_t > packArray(char outBuf[], const value_type inBuf[], const size_t numEnt)
Pack the first numEnt entries of the given input buffer of value_type, into the output buffer of byte...
static KOKKOS_INLINE_FUNCTION size_t packValueCount(const T &)
Number of bytes required to pack or unpack the given value of type value_type.
static KOKKOS_INLINE_FUNCTION size_t packValue(char outBuf[], const T &inVal)
Pack the given value of type value_type into the given output buffer of bytes (char).
static const bool compileTimeSize
Whether the number of bytes required to pack one instance of value_type is fixed at compile time.
Kokkos::View< const value_type *, D, Kokkos::MemoryUnmanaged > input_array_type
The type of an input array of value_type.