Tpetra parallel linear algebra  Version of the Day
Tpetra_Details_packCrsMatrix_def.hpp
1 // @HEADER
2 // ***********************************************************************
3 //
4 // Tpetra: Templated Linear Algebra Services Package
5 // Copyright (2008) Sandia Corporation
6 //
7 // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
8 // the U.S. Government retains certain rights in this software.
9 //
10 // Redistribution and use in source and binary forms, with or without
11 // modification, are permitted provided that the following conditions are
12 // met:
13 //
14 // 1. Redistributions of source code must retain the above copyright
15 // notice, this list of conditions and the following disclaimer.
16 //
17 // 2. Redistributions in binary form must reproduce the above copyright
18 // notice, this list of conditions and the following disclaimer in the
19 // documentation and/or other materials provided with the distribution.
20 //
21 // 3. Neither the name of the Corporation nor the names of the
22 // contributors may be used to endorse or promote products derived from
23 // this software without specific prior written permission.
24 //
25 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
26 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
29 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
30 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
31 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
32 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
33 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
34 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
35 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36 //
37 // Questions? Contact Michael A. Heroux (maherou@sandia.gov)
38 //
39 // ************************************************************************
40 // @HEADER
41 
42 #ifndef TPETRA_DETAILS_PACKCRSMATRIX_DEF_HPP
43 #define TPETRA_DETAILS_PACKCRSMATRIX_DEF_HPP
44 
45 #include "TpetraCore_config.h"
46 #include "Teuchos_Array.hpp"
47 #include "Teuchos_ArrayView.hpp"
55 #include <memory>
56 #include <sstream>
57 #include <stdexcept>
58 #include <string>
59 
82 
83 namespace Tpetra {
84 
85 #ifndef DOXYGEN_SHOULD_SKIP_THIS
86 // Forward declaration of Distributor
87 class Distributor;
88 #endif // DOXYGEN_SHOULD_SKIP_THIS
89 
90 //
91 // Users must never rely on anything in the Details namespace.
92 //
93 namespace Details {
94 
95 namespace PackCrsMatrixImpl {
103 template<class OutputOffsetsViewType,
104  class CountsViewType,
105  class InputOffsetsViewType,
106  class InputLocalRowIndicesViewType,
107  class InputLocalRowPidsViewType,
108  const bool debug =
109 #ifdef HAVE_TPETRA_DEBUG
110  true
111 #else
112  false
113 #endif // HAVE_TPETRA_DEBUG
114  >
116 public:
117  typedef typename OutputOffsetsViewType::non_const_value_type output_offset_type;
118  typedef typename CountsViewType::non_const_value_type count_type;
119  typedef typename InputOffsetsViewType::non_const_value_type input_offset_type;
120  typedef typename InputLocalRowIndicesViewType::non_const_value_type local_row_index_type;
121  typedef typename InputLocalRowPidsViewType::non_const_value_type local_row_pid_type;
122  // output Views drive where execution happens.
123  typedef typename OutputOffsetsViewType::device_type device_type;
124  static_assert (std::is_same<typename CountsViewType::device_type::execution_space,
125  typename device_type::execution_space>::value,
126  "OutputOffsetsViewType and CountsViewType must have the same execution space.");
127  static_assert (Kokkos::Impl::is_view<OutputOffsetsViewType>::value,
128  "OutputOffsetsViewType must be a Kokkos::View.");
129  static_assert (std::is_same<typename OutputOffsetsViewType::value_type, output_offset_type>::value,
130  "OutputOffsetsViewType must be a nonconst Kokkos::View.");
131  static_assert (std::is_integral<output_offset_type>::value,
132  "The type of each entry of OutputOffsetsViewType must be a built-in integer type.");
133  static_assert (Kokkos::Impl::is_view<CountsViewType>::value,
134  "CountsViewType must be a Kokkos::View.");
135  static_assert (std::is_same<typename CountsViewType::value_type, output_offset_type>::value,
136  "CountsViewType must be a nonconst Kokkos::View.");
137  static_assert (std::is_integral<count_type>::value,
138  "The type of each entry of CountsViewType must be a built-in integer type.");
139  static_assert (Kokkos::Impl::is_view<InputOffsetsViewType>::value,
140  "InputOffsetsViewType must be a Kokkos::View.");
141  static_assert (std::is_integral<input_offset_type>::value,
142  "The type of each entry of InputOffsetsViewType must be a built-in integer type.");
143  static_assert (Kokkos::Impl::is_view<InputLocalRowIndicesViewType>::value,
144  "InputLocalRowIndicesViewType must be a Kokkos::View.");
145  static_assert (std::is_integral<local_row_index_type>::value,
146  "The type of each entry of InputLocalRowIndicesViewType must be a built-in integer type.");
147 
148  NumPacketsAndOffsetsFunctor (const OutputOffsetsViewType& outputOffsets,
149  const CountsViewType& counts,
150  const InputOffsetsViewType& rowOffsets,
151  const InputLocalRowIndicesViewType& lclRowInds,
152  const InputLocalRowPidsViewType& lclRowPids,
153  const count_type sizeOfLclCount,
154  const count_type sizeOfGblColInd,
155  const count_type sizeOfPid,
156  const count_type sizeOfValue) :
157  outputOffsets_ (outputOffsets),
158  counts_ (counts),
159  rowOffsets_ (rowOffsets),
160  lclRowInds_ (lclRowInds),
161  lclRowPids_ (lclRowPids),
162  sizeOfLclCount_ (sizeOfLclCount),
163  sizeOfGblColInd_ (sizeOfGblColInd),
164  sizeOfPid_ (sizeOfPid),
165  sizeOfValue_ (sizeOfValue),
166  error_ ("error") // don't forget this, or you'll get segfaults!
167  {
168  if (debug) {
169  const size_t numRowsToPack = static_cast<size_t> (lclRowInds_.extent (0));
170 
171  if (numRowsToPack != static_cast<size_t> (counts_.extent (0))) {
172  std::ostringstream os;
173  os << "lclRowInds.extent(0) = " << numRowsToPack
174  << " != counts.extent(0) = " << counts_.extent (0)
175  << ".";
176  TEUCHOS_TEST_FOR_EXCEPTION(true, std::invalid_argument, os.str ());
177  }
178  if (static_cast<size_t> (numRowsToPack + 1) !=
179  static_cast<size_t> (outputOffsets_.extent (0))) {
180  std::ostringstream os;
181  os << "lclRowInds.extent(0) + 1 = " << (numRowsToPack + 1)
182  << " != outputOffsets.extent(0) = " << outputOffsets_.extent (0)
183  << ".";
184  TEUCHOS_TEST_FOR_EXCEPTION(true, std::invalid_argument, os.str ());
185  }
186  }
187  }
188 
189  KOKKOS_INLINE_FUNCTION void
190  operator() (const local_row_index_type& curInd,
191  output_offset_type& update,
192  const bool final) const
193  {
194  if (debug) {
195  if (curInd < static_cast<local_row_index_type> (0)) {
196  error_ () = 1;
197  return;
198  }
199  }
200 
201  if (final) {
202  if (debug) {
203  if (curInd >= static_cast<local_row_index_type> (outputOffsets_.extent (0))) {
204  error_ () = 2;
205  return;
206  }
207  }
208  outputOffsets_(curInd) = update;
209  }
210 
211  if (curInd < static_cast<local_row_index_type> (counts_.extent (0))) {
212  const auto lclRow = lclRowInds_(curInd);
213  if (static_cast<size_t> (lclRow + 1) >= static_cast<size_t> (rowOffsets_.extent (0)) ||
214  static_cast<local_row_index_type> (lclRow) < static_cast<local_row_index_type> (0)) {
215  error_ () = 3;
216  return;
217  }
218  // count_type could differ from the type of each row offset.
219  // For example, row offsets might each be 64 bits, but if their
220  // difference always fits in 32 bits, we may then safely use a
221  // 32-bit count_type.
222  const count_type count =
223  static_cast<count_type> (rowOffsets_(lclRow+1) - rowOffsets_(lclRow));
224 
225  // We pack first the number of entries in the row, then that
226  // many global column indices, then that many pids (if any),
227  // then that many values. However, if the number of entries in
228  // the row is zero, we pack nothing.
229  const count_type numBytes = (count == 0) ?
230  static_cast<count_type> (0) :
231  sizeOfLclCount_ + count * (sizeOfGblColInd_ +
232  (lclRowPids_.size() > 0 ? sizeOfPid_ : 0) +
233  sizeOfValue_);
234 
235  if (final) {
236  counts_(curInd) = numBytes;
237  }
238  update += numBytes;
239  }
240  }
241 
242  // mfh 31 May 2017: Don't need init or join. If you have join, MUST
243  // have join both with and without volatile! Otherwise intrawarp
244  // joins are really slow on GPUs.
245 
247  int getError () const {
248  auto error_h = Kokkos::create_mirror_view (error_);
249  Kokkos::deep_copy (error_h, error_);
250  return error_h ();
251  }
252 
253 private:
254  OutputOffsetsViewType outputOffsets_;
255  CountsViewType counts_;
256  typename InputOffsetsViewType::const_type rowOffsets_;
257  typename InputLocalRowIndicesViewType::const_type lclRowInds_;
258  typename InputLocalRowPidsViewType::const_type lclRowPids_;
259  count_type sizeOfLclCount_;
260  count_type sizeOfGblColInd_;
261  count_type sizeOfPid_;
262  count_type sizeOfValue_;
263  Kokkos::View<int, device_type> error_;
264 };
265 
275 template<class OutputOffsetsViewType,
276  class CountsViewType,
277  class InputOffsetsViewType,
278  class InputLocalRowIndicesViewType,
279  class InputLocalRowPidsViewType>
280 typename CountsViewType::non_const_value_type
281 computeNumPacketsAndOffsets (const OutputOffsetsViewType& outputOffsets,
282  const CountsViewType& counts,
283  const InputOffsetsViewType& rowOffsets,
284  const InputLocalRowIndicesViewType& lclRowInds,
285  const InputLocalRowPidsViewType& lclRowPids,
286  const typename CountsViewType::non_const_value_type sizeOfLclCount,
287  const typename CountsViewType::non_const_value_type sizeOfGblColInd,
288  const typename CountsViewType::non_const_value_type sizeOfPid,
289  const typename CountsViewType::non_const_value_type sizeOfValue)
290 {
291  typedef NumPacketsAndOffsetsFunctor<OutputOffsetsViewType,
292  CountsViewType, typename InputOffsetsViewType::const_type,
293  typename InputLocalRowIndicesViewType::const_type,
294  typename InputLocalRowPidsViewType::const_type> functor_type;
295  typedef typename CountsViewType::non_const_value_type count_type;
296  typedef typename OutputOffsetsViewType::size_type size_type;
297  typedef typename OutputOffsetsViewType::execution_space execution_space;
298  typedef typename functor_type::local_row_index_type LO;
299  typedef Kokkos::RangePolicy<execution_space, LO> range_type;
300  const char prefix[] = "computeNumPacketsAndOffsets: ";
301 
302  count_type count = 0;
303  const count_type numRowsToPack = lclRowInds.extent (0);
304 
305  if (numRowsToPack == 0) {
306  return count;
307  }
308  else {
309  TEUCHOS_TEST_FOR_EXCEPTION
310  (rowOffsets.extent (0) <= static_cast<size_type> (1),
311  std::invalid_argument, prefix << "There is at least one row to pack, "
312  "but the matrix has no rows. lclRowInds.extent(0) = " <<
313  numRowsToPack << ", but rowOffsets.extent(0) = " <<
314  rowOffsets.extent (0) << " <= 1.");
315  TEUCHOS_TEST_FOR_EXCEPTION
316  (outputOffsets.extent (0) !=
317  static_cast<size_type> (numRowsToPack + 1), std::invalid_argument,
318  prefix << "Output dimension does not match number of rows to pack. "
319  << "outputOffsets.extent(0) = " << outputOffsets.extent (0)
320  << " != lclRowInds.extent(0) + 1 = "
321  << static_cast<size_type> (numRowsToPack + 1) << ".");
322  TEUCHOS_TEST_FOR_EXCEPTION
323  (counts.extent (0) != numRowsToPack, std::invalid_argument,
324  prefix << "counts.extent(0) = " << counts.extent (0)
325  << " != numRowsToPack = " << numRowsToPack << ".");
326 
327  functor_type f (outputOffsets, counts, rowOffsets,
328  lclRowInds, lclRowPids, sizeOfLclCount,
329  sizeOfGblColInd, sizeOfPid, sizeOfValue);
330  Kokkos::parallel_scan (range_type (0, numRowsToPack + 1), f);
331 
332  // At least in debug mode, this functor checks for errors.
333  const int errCode = f.getError ();
334  TEUCHOS_TEST_FOR_EXCEPTION
335  (errCode != 0, std::runtime_error, prefix << "parallel_scan error code "
336  << errCode << " != 0.");
337 
338 #if 0
339  size_t total = 0;
340  for (LO k = 0; k < numRowsToPack; ++k) {
341  total += counts[k];
342  }
343  if (outputOffsets(numRowsToPack) != total) {
344  if (errStr.get () == NULL) {
345  errStr = std::unique_ptr<std::ostringstream> (new std::ostringstream ());
346  }
347  std::ostringstream& os = *errStr;
348  os << prefix
349  << "outputOffsets(numRowsToPack=" << numRowsToPack << ") "
350  << outputOffsets(numRowsToPack) << " != sum of counts = "
351  << total << "." << std::endl;
352  if (numRowsToPack != 0) {
353  // Only print the array if it's not too long.
354  if (numRowsToPack < static_cast<LO> (10)) {
355  os << "outputOffsets: [";
356  for (LO i = 0; i <= numRowsToPack; ++i) {
357  os << outputOffsets(i);
358  if (static_cast<LO> (i + 1) <= numRowsToPack) {
359  os << ",";
360  }
361  }
362  os << "]" << std::endl;
363  os << "counts: [";
364  for (LO i = 0; i < numRowsToPack; ++i) {
365  os << counts(i);
366  if (static_cast<LO> (i + 1) < numRowsToPack) {
367  os << ",";
368  }
369  }
370  os << "]" << std::endl;
371  }
372  else {
373  os << "outputOffsets(" << (numRowsToPack-1) << ") = "
374  << outputOffsets(numRowsToPack-1) << "." << std::endl;
375  }
376  }
377  count = outputOffsets(numRowsToPack);
378  return {false, errStr};
379  }
380 #endif // HAVE_TPETRA_DEBUG
381 
382  // Get last entry of outputOffsets, which is the sum of the entries
383  // of counts. Don't assume UVM.
384  using Tpetra::Details::getEntryOnHost;
385  return static_cast<count_type> (getEntryOnHost (outputOffsets,
386  numRowsToPack));
387  }
388 }
389 
405 template<class ST, class ColumnMap, class BufferDeviceType>
406 KOKKOS_FUNCTION
407 Kokkos::pair<int, size_t>
408 packCrsMatrixRow (const ColumnMap& col_map,
409  const Kokkos::View<char*, BufferDeviceType>& exports,
413  const size_t offset,
414  const size_t num_ent,
415  const size_t num_bytes_per_value,
416  const bool pack_pids)
417 {
418  using Kokkos::subview;
419  typedef typename ColumnMap::local_ordinal_type LO;
420  typedef typename ColumnMap::global_ordinal_type GO;
421  typedef BufferDeviceType BDT;
422  typedef Kokkos::pair<int, size_t> return_type;
423 
424  if (num_ent == 0) {
425  // Empty rows always take zero bytes, to ensure sparsity.
426  return return_type (0, 0);
427  }
428 
429  const LO num_ent_LO = static_cast<LO> (num_ent); // packValueCount wants this
430  const size_t num_ent_beg = offset;
431  const size_t num_ent_len = PackTraits<LO, BDT>::packValueCount (num_ent_LO);
432 
433  const size_t gids_beg = num_ent_beg + num_ent_len;
434  const size_t gids_len = num_ent * PackTraits<GO, BDT>::packValueCount (GO (0));
435 
436  const size_t pids_beg = gids_beg + gids_len;
437  const size_t pids_len = pack_pids ?
438  num_ent * PackTraits<int, BDT>::packValueCount (int (0)) :
439  static_cast<size_t> (0);
440 
441  const size_t vals_beg = gids_beg + gids_len + pids_len;
442  const size_t vals_len = num_ent * num_bytes_per_value;
443 
444  char* const num_ent_out = exports.data () + num_ent_beg;
445  char* const gids_out = exports.data () + gids_beg;
446  char* const pids_out = pack_pids ? exports.data () + pids_beg : NULL;
447  char* const vals_out = exports.data () + vals_beg;
448 
449  size_t num_bytes_out = 0;
450  int error_code = 0;
451  num_bytes_out += PackTraits<LO, BDT>::packValue (num_ent_out, num_ent_LO);
452 
453  {
454  // Copy column indices one at a time, so that we don't need
455  // temporary storage.
456  for (size_t k = 0; k < num_ent; ++k) {
457  const LO lid = lids_in[k];
458  const GO gid = col_map.getGlobalElement (lid);
459  num_bytes_out += PackTraits<GO, BDT>::packValue (gids_out, k, gid);
460  }
461  // Copy PIDs one at a time, so that we don't need temporary storage.
462  if (pack_pids) {
463  for (size_t k = 0; k < num_ent; ++k) {
464  const LO lid = lids_in[k];
465  const int pid = pids_in[lid];
466  num_bytes_out += PackTraits<int, BDT>::packValue (pids_out, k, pid);
467  }
468  }
469  const auto p =
470  PackTraits<ST, BDT>::packArray (vals_out, vals_in.data (), num_ent);
471  error_code += p.first;
472  num_bytes_out += p.second;
473  }
474 
475  if (error_code != 0) {
476  return return_type (10, num_bytes_out);
477  }
478 
479  const size_t expected_num_bytes =
480  num_ent_len + gids_len + pids_len + vals_len;
481  if (num_bytes_out != expected_num_bytes) {
482  return return_type (11, num_bytes_out);
483  }
484  return return_type (0, num_bytes_out);
485 }
486 
487 template<class LocalMatrix, class LocalMap, class BufferDeviceType>
488 struct PackCrsMatrixFunctor {
489  typedef LocalMatrix local_matrix_type;
490  typedef LocalMap local_map_type;
491  typedef typename local_matrix_type::value_type ST;
492  typedef typename local_map_type::local_ordinal_type LO;
493  typedef typename local_map_type::global_ordinal_type GO;
494  typedef typename local_matrix_type::device_type DT;
495 
496  typedef Kokkos::View<const size_t*, BufferDeviceType>
497  num_packets_per_lid_view_type;
498  typedef Kokkos::View<const size_t*, BufferDeviceType> offsets_view_type;
499  typedef Kokkos::View<char*, BufferDeviceType> exports_view_type;
501  export_lids_view_type;
503  source_pids_view_type;
504 
505  typedef typename num_packets_per_lid_view_type::non_const_value_type
506  count_type;
507  typedef typename offsets_view_type::non_const_value_type
508  offset_type;
509  typedef Kokkos::pair<int, LO> value_type;
510 
511  static_assert (std::is_same<LO, typename local_matrix_type::ordinal_type>::value,
512  "local_map_type::local_ordinal_type and "
513  "local_matrix_type::ordinal_type must be the same.");
514 
515  local_matrix_type local_matrix;
516  local_map_type local_col_map;
517  exports_view_type exports;
518  num_packets_per_lid_view_type num_packets_per_lid;
519  export_lids_view_type export_lids;
520  source_pids_view_type source_pids;
521  offsets_view_type offsets;
522  size_t num_bytes_per_value;
523  bool pack_pids;
524 
525  PackCrsMatrixFunctor (const local_matrix_type& local_matrix_in,
526  const local_map_type& local_col_map_in,
527  const exports_view_type& exports_in,
528  const num_packets_per_lid_view_type& num_packets_per_lid_in,
529  const export_lids_view_type& export_lids_in,
530  const source_pids_view_type& source_pids_in,
531  const offsets_view_type& offsets_in,
532  const size_t num_bytes_per_value_in,
533  const bool pack_pids_in) :
534  local_matrix (local_matrix_in),
535  local_col_map (local_col_map_in),
536  exports (exports_in),
537  num_packets_per_lid (num_packets_per_lid_in),
538  export_lids (export_lids_in),
539  source_pids (source_pids_in),
540  offsets (offsets_in),
541  num_bytes_per_value (num_bytes_per_value_in),
542  pack_pids (pack_pids_in)
543  {
544  const LO numRows = local_matrix_in.numRows ();
545  const LO rowMapDim =
546  static_cast<LO> (local_matrix.graph.row_map.extent (0));
547  TEUCHOS_TEST_FOR_EXCEPTION
548  (numRows != 0 && rowMapDim != numRows + static_cast<LO> (1),
549  std::logic_error, "local_matrix.graph.row_map.extent(0) = "
550  << rowMapDim << " != numRows (= " << numRows << " ) + 1.");
551  }
552 
553  KOKKOS_INLINE_FUNCTION void init (value_type& dst) const
554  {
555  using ::Tpetra::Details::OrdinalTraits;
556  dst = Kokkos::make_pair (0, OrdinalTraits<LO>::invalid ());
557  }
558 
559  KOKKOS_INLINE_FUNCTION void
560  join (volatile value_type& dst, const volatile value_type& src) const
561  {
562  // `dst` should reflect the first (least) bad index and all other
563  // associated error codes and data, so prefer keeping it.
564  if (src.first != 0 && dst.first == 0) {
565  dst = src;
566  }
567  }
568 
569  KOKKOS_INLINE_FUNCTION
570  void operator() (const LO i, value_type& dst) const
571  {
572  const size_t offset = offsets[i];
573  const LO export_lid = export_lids[i];
574  const size_t buf_size = exports.size();
575  const size_t num_bytes = num_packets_per_lid(i);
576  const size_t num_ent =
577  static_cast<size_t> (local_matrix.graph.row_map[export_lid+1]
578  - local_matrix.graph.row_map[export_lid]);
579 
580  // Only pack this row's data if it has a nonzero number of
581  // entries. We can do this because receiving processes get the
582  // number of packets, and will know that zero packets means zero
583  // entries.
584  if (num_ent == 0) {
585  return;
586  }
587 
588  if (export_lid >= local_matrix.numRows ()) {
589  if (dst.first != 0) { // keep only the first error
590  dst = Kokkos::make_pair (1, i); // invalid row
591  }
592  return;
593  }
594  else if ((offset > buf_size || offset + num_bytes > buf_size)) {
595  if (dst.first != 0) { // keep only the first error
596  dst = Kokkos::make_pair (2, i); // out of bounds
597  }
598  return;
599  }
600 
601  // We can now pack this row
602 
603  // Since the matrix is locally indexed on the calling process, we
604  // have to use its column Map (which it _must_ have in this case)
605  // to convert to global indices.
606  const auto row_beg = local_matrix.graph.row_map[export_lid];
607  const auto row_end = local_matrix.graph.row_map[export_lid + 1];
608  auto vals_in = subview (local_matrix.values,
609  Kokkos::make_pair (row_beg, row_end));
610  auto lids_in = subview (local_matrix.graph.entries,
611  Kokkos::make_pair (row_beg, row_end));
612  typedef local_map_type LMT;
613  typedef BufferDeviceType BDT;
614  auto p = packCrsMatrixRow<ST, LMT, BDT> (local_col_map, exports, lids_in,
615  source_pids, vals_in, offset,
616  num_ent, num_bytes_per_value,
617  pack_pids);
618  int error_code_this_row = p.first;
619  size_t num_bytes_packed_this_row = p.second;
620  if (error_code_this_row != 0) {
621  if (dst.first != 0) { // keep only the first error
622  dst = Kokkos::make_pair (error_code_this_row, i); // bad pack
623  }
624  }
625  else if (num_bytes_packed_this_row != num_bytes) {
626  if (dst.first != 0) { // keep only the first error
627  dst = Kokkos::make_pair (3, i);
628  }
629  }
630  }
631 };
632 
640 template<class LocalMatrix, class LocalMap, class BufferDeviceType>
641 void
642 do_pack (const LocalMatrix& local_matrix,
643  const LocalMap& local_map,
644  const Kokkos::View<char*, BufferDeviceType>& exports,
645  const typename PackTraits<
646  size_t,
647  BufferDeviceType
648  >::input_array_type& num_packets_per_lid,
649  const typename PackTraits<
650  typename LocalMap::local_ordinal_type,
651  BufferDeviceType
652  >::input_array_type& export_lids,
653  const typename PackTraits<
654  int,
655  typename LocalMatrix::device_type
656  >::input_array_type& source_pids,
657  const Kokkos::View<const size_t*, BufferDeviceType>& offsets,
658  const size_t num_bytes_per_value,
659  const bool pack_pids)
660 {
661  typedef typename LocalMap::local_ordinal_type LO;
662  typedef typename LocalMatrix::device_type DT;
663  typedef Kokkos::RangePolicy<typename DT::execution_space, LO> range_type;
664  const char prefix[] = "Tpetra::Details::do_pack: ";
665 
666  if (export_lids.extent (0) != 0) {
667  TEUCHOS_TEST_FOR_EXCEPTION
668  (static_cast<size_t> (offsets.extent (0)) !=
669  static_cast<size_t> (export_lids.extent (0) + 1),
670  std::invalid_argument, prefix << "offsets.extent(0) = "
671  << offsets.extent (0) << " != export_lids.extent(0) (= "
672  << export_lids.extent (0) << ") + 1.");
673  TEUCHOS_TEST_FOR_EXCEPTION
674  (export_lids.extent (0) != num_packets_per_lid.extent (0),
675  std::invalid_argument, prefix << "export_lids.extent(0) = " <<
676  export_lids.extent (0) << " != num_packets_per_lid.extent(0) = "
677  << num_packets_per_lid.extent (0) << ".");
678  // If exports has nonzero length at this point, then the matrix
679  // has at least one entry to pack. Thus, if packing process
680  // ranks, we had better have at least one process rank to pack.
681  TEUCHOS_TEST_FOR_EXCEPTION
682  (pack_pids && exports.extent (0) != 0 &&
683  source_pids.extent (0) == 0, std::invalid_argument, prefix <<
684  "pack_pids is true, and exports.extent(0) = " <<
685  exports.extent (0) << " != 0, meaning that we need to pack at "
686  "least one matrix entry, but source_pids.extent(0) = 0.");
687  }
688 
689  typedef PackCrsMatrixFunctor<LocalMatrix, LocalMap,
690  BufferDeviceType> pack_functor_type;
691  pack_functor_type f (local_matrix, local_map, exports,
692  num_packets_per_lid, export_lids,
693  source_pids, offsets, num_bytes_per_value,
694  pack_pids);
695 
696  typename pack_functor_type::value_type result;
697  range_type range (0, num_packets_per_lid.extent (0));
698  Kokkos::parallel_reduce (range, f, result);
699 
700  if (result.first != 0) {
701  std::ostringstream os;
702 
703  if (result.first == 1) { // invalid local row index
704  auto export_lids_h = Kokkos::create_mirror_view (export_lids);
705  Kokkos::deep_copy (export_lids_h, export_lids);
706  const auto firstBadLid = export_lids_h(result.second);
707  os << "First bad export LID: export_lids(i=" << result.second << ") = "
708  << firstBadLid;
709  }
710  else if (result.first == 2) { // invalid offset
711  auto offsets_h = Kokkos::create_mirror_view (offsets);
712  Kokkos::deep_copy (offsets_h, offsets);
713  const auto firstBadOffset = offsets_h(result.second);
714 
715  auto num_packets_per_lid_h =
716  Kokkos::create_mirror_view (num_packets_per_lid);
717  Kokkos::deep_copy (num_packets_per_lid_h, num_packets_per_lid);
718  os << "First bad offset: offsets(i=" << result.second << ") = "
719  << firstBadOffset << ", num_packets_per_lid(i) = "
720  << num_packets_per_lid_h(result.second) << ", buf_size = "
721  << exports.size ();
722  }
723 
724  TEUCHOS_TEST_FOR_EXCEPTION
725  (true, std::runtime_error, prefix << "PackCrsMatrixFunctor reported "
726  "error code " << result.first << " for the first bad row "
727  << result.second << ". " << os.str ());
728  }
729 }
730 
760 template<typename ST, typename LO, typename GO, typename NT, typename BufferDeviceType>
761 void
762 packCrsMatrix (const CrsMatrix<ST, LO, GO, NT>& sourceMatrix,
763  Kokkos::DualView<char*, BufferDeviceType>& exports,
764  const Kokkos::View<size_t*, BufferDeviceType>& num_packets_per_lid,
765  const Kokkos::View<const LO*, BufferDeviceType>& export_lids,
766  const Kokkos::View<const int*, typename NT::device_type>& export_pids,
767  size_t& constant_num_packets,
768  const bool pack_pids,
769  Distributor& /* dist */)
770 {
771  using Kokkos::View;
772  typedef BufferDeviceType DT;
773  typedef typename DT::execution_space execution_space;
774  typedef Kokkos::DualView<char*, BufferDeviceType> exports_view_type;
775  const char prefix[] = "Tpetra::Details::PackCrsMatrixImpl::packCrsMatrix: ";
776  constexpr bool debug = false;
777 
778  auto local_matrix = sourceMatrix.getLocalMatrix ();
779  auto local_col_map = sourceMatrix.getColMap ()->getLocalMap ();
780 
781  // Setting this to zero tells the caller to expect a possibly
782  // different ("nonconstant") number of packets per local index
783  // (i.e., a possibly different number of entries per row).
784  constant_num_packets = 0;
785 
786  const size_t num_export_lids =
787  static_cast<size_t> (export_lids.extent (0));
788  TEUCHOS_TEST_FOR_EXCEPTION
789  (num_export_lids !=
790  static_cast<size_t> (num_packets_per_lid.extent (0)),
791  std::invalid_argument, prefix << "num_export_lids.extent(0) = "
792  << num_export_lids << " != num_packets_per_lid.extent(0) = "
793  << num_packets_per_lid.extent (0) << ".");
794  if (num_export_lids != 0) {
795  TEUCHOS_TEST_FOR_EXCEPTION
796  (num_packets_per_lid.data () == NULL, std::invalid_argument,
797  prefix << "num_export_lids = "<< num_export_lids << " != 0, but "
798  "num_packets_per_lid.data() = "
799  << num_packets_per_lid.data () << " == NULL.");
800  }
801 
802  const size_t num_bytes_per_lid = PackTraits<LO, DT>::packValueCount (LO (0));
803  const size_t num_bytes_per_gid = PackTraits<GO, DT>::packValueCount (GO (0));
804  const size_t num_bytes_per_pid = PackTraits<int, DT>::packValueCount (int (0));
805 
806  size_t num_bytes_per_value = 0;
808  // Assume ST is default constructible; packValueCount wants an instance.
809  num_bytes_per_value = PackTraits<ST,DT>::packValueCount (ST ());
810  }
811  else {
812  // Since the packed data come from the source matrix, we can use
813  // the source matrix to get the number of bytes per Scalar value
814  // stored in the matrix. This assumes that all Scalar values in
815  // the source matrix require the same number of bytes. If the
816  // source matrix has no entries on the calling process, then we
817  // hope that some process does have some idea how big a Scalar
818  // value is. Of course, if no processes have any entries, then no
819  // values should be packed (though this does assume that in our
820  // packing scheme, rows with zero entries take zero bytes).
821  size_t num_bytes_per_value_l = 0;
822  if (local_matrix.values.extent(0) > 0) {
823  const ST& val = local_matrix.values(0);
824  num_bytes_per_value_l = PackTraits<ST, DT>::packValueCount (val);
825  }
826  using Teuchos::reduceAll;
827  reduceAll<int, size_t> (* (sourceMatrix.getComm ()),
828  Teuchos::REDUCE_MAX,
829  num_bytes_per_value_l,
830  Teuchos::outArg (num_bytes_per_value));
831  }
832 
833  if (num_export_lids == 0) {
834  // FIXME (26 Apr 2016) Fences around (UVM) allocations only
835  // temporarily needed for #227 debugging. Should be able to
836  // remove them after that's fixed.
837  execution_space().fence ();
838  exports = exports_view_type ("exports", 0);
839  execution_space().fence ();
840  return;
841  }
842 
843  // Array of offsets into the pack buffer.
844  Kokkos::View<size_t*, DT> offsets ("offsets", num_export_lids + 1);
845 
846  // Compute number of packets per LID (row to send), as well as
847  // corresponding offsets (the prefix sum of the packet counts).
848  const size_t count =
849  computeNumPacketsAndOffsets (offsets, num_packets_per_lid,
850  local_matrix.graph.row_map, export_lids,
851  export_pids,
852  num_bytes_per_lid, num_bytes_per_gid,
853  num_bytes_per_pid, num_bytes_per_value);
854 
855  // Resize the output pack buffer if needed.
856  if (count > static_cast<size_t> (exports.extent (0))) {
857  // FIXME (26 Apr 2016) Fences around (UVM) allocations only
858  // temporarily needed for #227 debugging. Should be able to
859  // remove them after that's fixed.
860  execution_space().fence ();
861  exports = exports_view_type ("exports", count);
862  if (debug) {
863  std::ostringstream os;
864  os << "*** exports resized to " << count << std::endl;
865  std::cerr << os.str ();
866  }
867  execution_space().fence ();
868  }
869  if (debug) {
870  std::ostringstream os;
871  os << "*** count: " << count << ", exports.extent(0): "
872  << exports.extent (0) << std::endl;
873  std::cerr << os.str ();
874  }
875 
876  // If exports has nonzero length at this point, then the matrix has
877  // at least one entry to pack. Thus, if packing process ranks, we
878  // had better have at least one process rank to pack.
879  TEUCHOS_TEST_FOR_EXCEPTION
880  (pack_pids && exports.extent (0) != 0 &&
881  export_pids.extent (0) == 0, std::invalid_argument, prefix <<
882  "pack_pids is true, and exports.extent(0) = " <<
883  exports.extent (0) << " != 0, meaning that we need to pack at least "
884  "one matrix entry, but export_pids.extent(0) = 0.");
885 
886  typedef typename std::decay<decltype (local_matrix)>::type
887  local_matrix_type;
888  typedef typename std::decay<decltype (local_col_map)>::type
889  local_map_type;
890 
891  exports.modify_device ();
892  auto exports_d = exports.view_device ();
893  do_pack<local_matrix_type, local_map_type, DT>
894  (local_matrix, local_col_map, exports_d, num_packets_per_lid,
895  export_lids, export_pids, offsets, num_bytes_per_value,
896  pack_pids);
897  // If we got this far, we succeeded.
898 }
899 
900 } // namespace PackCrsMatrixImpl
901 
902 template<typename ST, typename LO, typename GO, typename NT>
903 void
905  Teuchos::Array<char>& exports,
906  const Teuchos::ArrayView<size_t>& numPacketsPerLID,
907  const Teuchos::ArrayView<const LO>& exportLIDs,
908  size_t& constantNumPackets,
909  Distributor& distor)
910 {
911  using local_matrix_type = typename CrsMatrix<ST,LO,GO,NT>::local_matrix_type;
912  using device_type = typename local_matrix_type::device_type;
913  using buffer_device_type = typename DistObject<char, LO, GO, NT>::buffer_device_type;
914  using host_exec_space = typename Kokkos::View<size_t*, device_type>::HostMirror::execution_space;
915  using host_dev_type = Kokkos::Device<host_exec_space, Kokkos::HostSpace>;
916 
917  // Convert all Teuchos::Array to Kokkos::View
918 
919  // This is an output array, so we don't have to copy to device here.
920  // However, we'll have to remember to copy back to host when done.
921  Kokkos::View<size_t*, buffer_device_type> num_packets_per_lid_d =
922  create_mirror_view_from_raw_host_array (buffer_device_type (),
923  numPacketsPerLID.getRawPtr (),
924  numPacketsPerLID.size (), false,
925  "num_packets_per_lid");
926  // FIXME (mfh 05 Feb 2019) We should just pass the exportLIDs
927  // DualView through here, instead of recreating a device View from a
928  // host ArrayView that itself came from a DualView.
929  //
930  // This is an input array, so we have to copy to device here.
931  // However, we never need to copy it back to host.
932  Kokkos::View<const LO*, buffer_device_type> export_lids_d =
933  create_mirror_view_from_raw_host_array (buffer_device_type (),
934  exportLIDs.getRawPtr (),
935  exportLIDs.size (), true,
936  "export_lids");
937 
938  Kokkos::View<int*, device_type> export_pids_d; // output arg
939  Kokkos::DualView<char*, buffer_device_type> exports_dv; // output arg
940  constexpr bool pack_pids = false;
941  PackCrsMatrixImpl::packCrsMatrix<ST, LO, GO, NT, buffer_device_type> (
942  sourceMatrix, exports_dv, num_packets_per_lid_d, export_lids_d,
943  export_pids_d, constantNumPackets, pack_pids, distor);
944 
945  // The counts are an output of PackCrsMatrixImpl::packCrsMatrix, so we have to
946  // copy them back to host.
947  Kokkos::View<size_t*, host_dev_type> num_packets_per_lid_h
948  (numPacketsPerLID.getRawPtr (),
949  numPacketsPerLID.size ());
950  Kokkos::deep_copy (num_packets_per_lid_h, num_packets_per_lid_d);
951 
952  // FIXME (mfh 23 Aug 2017) If we're forced to use a DualView for
953  // exports_dv above, then we have two host copies for exports_h.
954 
955  // The exports are an output of PackCrsMatrixImpl::packCrsMatrix, so we have
956  // to copy them back to host.
957  if (static_cast<size_t> (exports.size ()) !=
958  static_cast<size_t> (exports_dv.extent (0))) {
959  exports.resize (exports_dv.extent (0));
960  }
961  Kokkos::View<char*, host_dev_type> exports_h (exports.getRawPtr (),
962  exports.size ());
963  Kokkos::deep_copy (exports_h, exports_dv.d_view);
964 }
965 
966 template<typename ST, typename LO, typename GO, typename NT>
967 void
969  Kokkos::DualView<char*,
971  const Kokkos::DualView<size_t*,
972  typename DistObject<char, LO, GO, NT>::buffer_device_type>& numPacketsPerLID,
973  const Kokkos::DualView<const LO*,
975  size_t& constantNumPackets,
976  Distributor& distor)
977 {
978  using device_type = typename CrsMatrix<ST, LO, GO, NT>::device_type;
979  using buffer_device_type = typename DistObject<char, LO, GO, NT>::buffer_device_type;
980 
981  // Create an empty array of PIDs, since the interface needs it.
982  Kokkos::View<int*, device_type> exportPIDs_d ("exportPIDs", 0);
983  constexpr bool pack_pids = false;
984 
985  // Write-only device access
986  auto numPacketsPerLID_nc = numPacketsPerLID; // const DV& -> DV
987  numPacketsPerLID_nc.clear_sync_state ();
988  numPacketsPerLID_nc.modify_device ();
989  auto numPacketsPerLID_d = numPacketsPerLID.view_device ();
990 
991  // Read-only device access
992  TEUCHOS_ASSERT( ! exportLIDs.need_sync_device () );
993  auto exportLIDs_d = exportLIDs.view_device ();
994 
995  PackCrsMatrixImpl::packCrsMatrix<ST,LO,GO,NT,buffer_device_type> (
996  sourceMatrix, exports, numPacketsPerLID_d, exportLIDs_d,
997  exportPIDs_d, constantNumPackets, pack_pids, distor);
998 }
999 
1000 template<typename ST, typename LO, typename GO, typename NT>
1001 void
1003  Kokkos::DualView<char*, typename DistObject<char, LO, GO, NT>::buffer_device_type>& exports_dv,
1004  const Teuchos::ArrayView<size_t>& numPacketsPerLID,
1005  const Teuchos::ArrayView<const LO>& exportLIDs,
1006  const Teuchos::ArrayView<const int>& sourcePIDs,
1007  size_t& constantNumPackets,
1008  Distributor& distor)
1009 {
1010  typedef typename CrsMatrix<ST,LO,GO,NT>::local_matrix_type local_matrix_type;
1011  typedef typename DistObject<char, LO, GO, NT>::buffer_device_type buffer_device_type;
1012  typedef typename Kokkos::DualView<char*, buffer_device_type>::t_host::execution_space host_exec_space;
1013  typedef Kokkos::Device<host_exec_space, Kokkos::HostSpace> host_dev_type;
1014 
1015  typename local_matrix_type::device_type outputDevice;
1016 
1017  const bool verbose = ::Tpetra::Details::Behavior::verbose ();
1018  std::unique_ptr<std::string> prefix;
1019  if (verbose) {
1020  const int myRank = [&] () {
1021  auto map = sourceMatrix.getMap ();
1022  if (map.get () == nullptr) {
1023  return -1;
1024  }
1025  auto comm = map->getComm ();
1026  if (comm.get () == nullptr) {
1027  return -2;
1028  }
1029  return comm->getRank ();
1030  } ();
1031  std::ostringstream os;
1032  os << "Proc " << myRank << ": packCrsMatrixWithOwningPIDs: ";
1033  prefix = std::unique_ptr<std::string> (new std::string (os.str ()));
1034 
1035  std::ostringstream os2;
1036  os2 << *prefix << "start" << std::endl;
1037  std::cerr << os2.str ();
1038  }
1039 
1040  // Convert all Teuchos::Array to Kokkos::View
1041 
1042  // This is an output array, so we don't have to copy to device here.
1043  // However, we'll have to remember to copy back to host when done.
1044  auto num_packets_per_lid_d =
1045  create_mirror_view_from_raw_host_array (buffer_device_type (),
1046  numPacketsPerLID.getRawPtr (),
1047  numPacketsPerLID.size (), false,
1048  "num_packets_per_lid");
1049 
1050  // This is an input array, so we have to copy to device here.
1051  // However, we never need to copy it back to host.
1052  auto export_lids_d =
1053  create_mirror_view_from_raw_host_array (buffer_device_type (),
1054  exportLIDs.getRawPtr (),
1055  exportLIDs.size (), true,
1056  "export_lids");
1057  // This is an input array, so we have to copy to device here.
1058  // However, we never need to copy it back to host.
1059  auto export_pids_d =
1061  sourcePIDs.getRawPtr (),
1062  sourcePIDs.size (), true,
1063  "export_pids");
1064  constexpr bool pack_pids = true;
1065  try {
1066  PackCrsMatrixImpl::packCrsMatrix
1067  (sourceMatrix, exports_dv, num_packets_per_lid_d, export_lids_d,
1068  export_pids_d, constantNumPackets, pack_pids, distor);
1069  }
1070  catch (std::exception& e) {
1071  if (verbose) {
1072  std::ostringstream os;
1073  os << *prefix << "PackCrsMatrixImpl::packCrsMatrix threw: "
1074  << e.what () << std::endl;
1075  std::cerr << os.str ();
1076  }
1077  throw;
1078  }
1079  catch (...) {
1080  if (verbose) {
1081  std::ostringstream os;
1082  os << *prefix << "PackCrsMatrixImpl::packCrsMatrix threw an exception "
1083  "not a subclass of std::exception" << std::endl;
1084  std::cerr << os.str ();
1085  }
1086  throw;
1087  }
1088 
1089  if (numPacketsPerLID.size () != 0) {
1090  try {
1091  // The counts are an output of PackCrsMatrixImpl::packCrsMatrix,
1092  // so we have to copy them back to host.
1093  Kokkos::View<size_t*, host_dev_type> num_packets_per_lid_h
1094  (numPacketsPerLID.getRawPtr (), numPacketsPerLID.size ());
1095  Kokkos::deep_copy (num_packets_per_lid_h, num_packets_per_lid_d);
1096  }
1097  catch (std::exception& e) {
1098  if (verbose) {
1099  std::ostringstream os;
1100  os << *prefix << "Kokkos::deep_copy threw: " << e.what () << std::endl;
1101  std::cerr << os.str ();
1102  }
1103  throw;
1104  }
1105  catch (...) {
1106  if (verbose) {
1107  std::ostringstream os;
1108  os << *prefix << "Kokkos::deep_copy threw an exception not a subclass "
1109  "of std::exception" << std::endl;
1110  std::cerr << os.str ();
1111  }
1112  throw;
1113  }
1114  }
1115 
1116  if (verbose) {
1117  std::ostringstream os;
1118  os << *prefix << "done" << std::endl;
1119  std::cerr << os.str ();
1120  }
1121 }
1122 
1123 } // namespace Details
1124 } // namespace Tpetra
1125 
1126 #define TPETRA_DETAILS_PACKCRSMATRIX_INSTANT( ST, LO, GO, NT ) \
1127  template void \
1128  Details::packCrsMatrix<ST, LO, GO, NT> (const CrsMatrix<ST, LO, GO, NT>&, \
1129  Teuchos::Array<char>&, \
1130  const Teuchos::ArrayView<size_t>&, \
1131  const Teuchos::ArrayView<const LO>&, \
1132  size_t&, \
1133  Distributor&); \
1134  template void \
1135  Details::packCrsMatrixNew<ST, LO, GO, NT> (const CrsMatrix<ST, LO, GO, NT>&, \
1136  Kokkos::DualView<char*, DistObject<char, LO, GO, NT>::buffer_device_type>&, \
1137  const Kokkos::DualView<size_t*, DistObject<char, LO, GO, NT>::buffer_device_type>&, \
1138  const Kokkos::DualView<const LO*, DistObject<char, LO, GO, NT>::buffer_device_type>&, \
1139  size_t&, \
1140  Distributor&); \
1141  template void \
1142  Details::packCrsMatrixWithOwningPIDs<ST, LO, GO, NT> (const CrsMatrix<ST, LO, GO, NT>&, \
1143  Kokkos::DualView<char*, DistObject<char, LO, GO, NT>::buffer_device_type>&, \
1144  const Teuchos::ArrayView<size_t>&, \
1145  const Teuchos::ArrayView<const LO>&, \
1146  const Teuchos::ArrayView<const int>&, \
1147  size_t&, \
1148  Distributor&);
1149 
1150 #endif // TPETRA_DETAILS_PACKCRSMATRIX_DEF_HPP
Declaration of the Tpetra::CrsMatrix class.
Declaration of Tpetra::Details::Behavior, a class that describes Tpetra's behavior.
Import KokkosSparse::OrdinalTraits, a traits class for "invalid" (flag) values of integer types,...
Declaration and generic definition of traits class that tells Tpetra::CrsMatrix how to pack and unpac...
Declaration and definition of Tpetra::Details::castAwayConstDualView, an implementation detail of Tpe...
Functions that wrap Kokkos::create_mirror_view, in order to avoid deep copies when not necessary,...
Declaration and definition of Tpetra::Details::getEntryOnHost.
Sparse matrix that presents a row-oriented interface that lets users read or modify entries.
KokkosSparse::CrsMatrix< impl_scalar_type, local_ordinal_type, execution_space, void, typename local_graph_type::size_type > local_matrix_type
The specialization of Kokkos::CrsMatrix that represents the part of the sparse matrix on each MPI pro...
typename Node::device_type device_type
The Kokkos device type.
static bool verbose()
Whether Tpetra is in verbose mode.
Compute the number of packets and offsets for the pack procedure.
Base class for distributed Tpetra objects that support data redistribution.
virtual Teuchos::RCP< const map_type > getMap() const
The Map describing the parallel distribution of this object.
Sets up and executes a communication plan for a Tpetra DistObject.
Implementation details of Tpetra.
void packCrsMatrixWithOwningPIDs(const CrsMatrix< ST, LO, GO, NT > &sourceMatrix, Kokkos::DualView< char *, typename DistObject< char, LO, GO, NT >::buffer_device_type > &exports_dv, const Teuchos::ArrayView< size_t > &numPacketsPerLID, const Teuchos::ArrayView< const LO > &exportLIDs, const Teuchos::ArrayView< const int > &sourcePIDs, size_t &constantNumPackets, Distributor &distor)
Pack specified entries of the given local sparse matrix for communication.
Impl::CreateMirrorViewFromUnmanagedHostArray< ValueType, OutputDeviceType >::output_view_type create_mirror_view_from_raw_host_array(const OutputDeviceType &, ValueType *inPtr, const size_t inSize, const bool copy=true, const char label[]="")
Variant of Kokkos::create_mirror_view that takes a raw host 1-d array as input.
void packCrsMatrixNew(const CrsMatrix< ST, LO, GO, NT > &sourceMatrix, Kokkos::DualView< char *, typename DistObject< char, LO, GO, NT >::buffer_device_type > &exports, const Kokkos::DualView< size_t *, typename DistObject< char, LO, GO, NT >::buffer_device_type > &numPacketsPerLID, const Kokkos::DualView< const LO *, typename DistObject< char, LO, GO, NT >::buffer_device_type > &exportLIDs, size_t &constantNumPackets, Distributor &distor)
Pack specified entries of the given local sparse matrix for communication, for "new" DistObject inter...
void packCrsMatrix(const CrsMatrix< ST, LO, GO, NT > &sourceMatrix, Teuchos::Array< char > &exports, const Teuchos::ArrayView< size_t > &numPacketsPerLID, const Teuchos::ArrayView< const LO > &exportLIDs, size_t &constantNumPackets, Distributor &distor)
Pack specified entries of the given local sparse matrix for communication.
Namespace Tpetra contains the class and methods constituting the Tpetra library.
void deep_copy(MultiVector< DS, DL, DG, DN > &dst, const MultiVector< SS, SL, SG, SN > &src)
Copy the contents of the MultiVector src into dst.
static KOKKOS_INLINE_FUNCTION Kokkos::pair< int, size_t > packArray(char outBuf[], const value_type inBuf[], const size_t numEnt)
Pack the first numEnt entries of the given input buffer of value_type, into the output buffer of byte...
static KOKKOS_INLINE_FUNCTION size_t packValueCount(const T &)
Number of bytes required to pack or unpack the given value of type value_type.
static KOKKOS_INLINE_FUNCTION size_t packValue(char outBuf[], const T &inVal)
Pack the given value of type value_type into the given output buffer of bytes (char).
static const bool compileTimeSize
Whether the number of bytes required to pack one instance of value_type is fixed at compile time.
Kokkos::View< const value_type *, D, Kokkos::MemoryUnmanaged > input_array_type
The type of an input array of value_type.