Tpetra parallel linear algebra  Version of the Day
Tpetra_BlockMultiVector_def.hpp
1 // @HEADER
2 // ***********************************************************************
3 //
4 // Tpetra: Templated Linear Algebra Services Package
5 // Copyright (2008) Sandia Corporation
6 //
7 // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
8 // the U.S. Government retains certain rights in this software.
9 //
10 // Redistribution and use in source and binary forms, with or without
11 // modification, are permitted provided that the following conditions are
12 // met:
13 //
14 // 1. Redistributions of source code must retain the above copyright
15 // notice, this list of conditions and the following disclaimer.
16 //
17 // 2. Redistributions in binary form must reproduce the above copyright
18 // notice, this list of conditions and the following disclaimer in the
19 // documentation and/or other materials provided with the distribution.
20 //
21 // 3. Neither the name of the Corporation nor the names of the
22 // contributors may be used to endorse or promote products derived from
23 // this software without specific prior written permission.
24 //
25 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
26 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
29 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
30 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
31 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
32 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
33 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
34 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
35 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36 //
37 // Questions? Contact Michael A. Heroux (maherou@sandia.gov)
38 //
39 // ************************************************************************
40 // @HEADER
41 
42 #ifndef TPETRA_BLOCKMULTIVECTOR_DEF_HPP
43 #define TPETRA_BLOCKMULTIVECTOR_DEF_HPP
44 
46 #include "Tpetra_BlockView.hpp"
47 #include "Teuchos_OrdinalTraits.hpp"
48 
49 namespace { // anonymous
50 
62  template<class MultiVectorType>
63  struct RawHostPtrFromMultiVector {
64  typedef typename MultiVectorType::impl_scalar_type impl_scalar_type;
65 
66  static impl_scalar_type* getRawPtr (MultiVectorType& X) {
67  // NOTE (mfh 09 Jun 2016) This does NOT sync to host, or mark
68  // host as modified. This is on purpose, because we don't want
69  // the BlockMultiVector sync'd to host unnecessarily.
70  // Otherwise, all the MultiVector and BlockMultiVector kernels
71  // would run on host instead of device. See Github Issue #428.
72  auto X_view_host = X.template getLocalView<typename MultiVectorType::dual_view_type::t_host::device_type> ();
73  impl_scalar_type* X_raw = X_view_host.data ();
74  return X_raw;
75  }
76  };
77 
90  template<class S, class LO, class GO, class N>
92  getRawHostPtrFromMultiVector (Tpetra::MultiVector<S, LO, GO, N>& X) {
94  return RawHostPtrFromMultiVector<MV>::getRawPtr (X);
95  }
96 
97 } // namespace (anonymous)
98 
99 namespace Tpetra {
100 
101 template<class Scalar, class LO, class GO, class Node>
104 getMultiVectorView () const
105 {
106  return mv_;
107 }
108 
109 template<class Scalar, class LO, class GO, class Node>
110 Teuchos::RCP<const BlockMultiVector<Scalar, LO, GO, Node> >
113 {
115  const BMV* src_bmv = dynamic_cast<const BMV*> (&src);
116  TEUCHOS_TEST_FOR_EXCEPTION(
117  src_bmv == nullptr, std::invalid_argument, "Tpetra::"
118  "BlockMultiVector: The source object of an Import or Export to a "
119  "BlockMultiVector, must also be a BlockMultiVector.");
120  return Teuchos::rcp (src_bmv, false); // nonowning RCP
121 }
122 
123 template<class Scalar, class LO, class GO, class Node>
126  const Teuchos::DataAccess copyOrView) :
127  dist_object_type (in),
128  meshMap_ (in.meshMap_),
129  pointMap_ (in.pointMap_),
130  mv_ (in.mv_, copyOrView),
131  mvData_ (getRawHostPtrFromMultiVector (mv_)),
132  blockSize_ (in.blockSize_)
133 {}
134 
135 template<class Scalar, class LO, class GO, class Node>
137 BlockMultiVector (const map_type& meshMap,
138  const LO blockSize,
139  const LO numVecs) :
140  dist_object_type (Teuchos::rcp (new map_type (meshMap))), // shallow copy
141  meshMap_ (meshMap),
142  pointMap_ (makePointMap (meshMap, blockSize)),
143  mv_ (Teuchos::rcpFromRef (pointMap_), numVecs), // nonowning RCP is OK, since pointMap_ won't go away
144  mvData_ (getRawHostPtrFromMultiVector (mv_)),
145  blockSize_ (blockSize)
146 {}
147 
148 template<class Scalar, class LO, class GO, class Node>
150 BlockMultiVector (const map_type& meshMap,
151  const map_type& pointMap,
152  const LO blockSize,
153  const LO numVecs) :
154  dist_object_type (Teuchos::rcp (new map_type (meshMap))), // shallow copy
155  meshMap_ (meshMap),
156  pointMap_ (pointMap),
157  mv_ (Teuchos::rcpFromRef (pointMap_), numVecs),
158  mvData_ (getRawHostPtrFromMultiVector (mv_)),
159  blockSize_ (blockSize)
160 {}
161 
162 template<class Scalar, class LO, class GO, class Node>
164 BlockMultiVector (const mv_type& X_mv,
165  const map_type& meshMap,
166  const LO blockSize) :
167  dist_object_type (Teuchos::rcp (new map_type (meshMap))), // shallow copy
168  meshMap_ (meshMap),
169  mvData_ (nullptr), // just for now
170  blockSize_ (blockSize)
171 {
172  using Teuchos::RCP;
173 
174  if (X_mv.getCopyOrView () == Teuchos::View) {
175  // The input MultiVector has view semantics, so assignment just
176  // does a shallow copy.
177  mv_ = X_mv;
178  }
179  else if (X_mv.getCopyOrView () == Teuchos::Copy) {
180  // The input MultiVector has copy semantics. We can't change
181  // that, but we can make a view of the input MultiVector and
182  // change the view to have view semantics. (That sounds silly;
183  // shouldn't views always have view semantics? but remember that
184  // "view semantics" just governs the default behavior of the copy
185  // constructor and assignment operator.)
186  RCP<const mv_type> X_view_const;
187  const size_t numCols = X_mv.getNumVectors ();
188  if (numCols == 0) {
189  Teuchos::Array<size_t> cols (0); // view will have zero columns
190  X_view_const = X_mv.subView (cols ());
191  } else { // Range1D is an inclusive range
192  X_view_const = X_mv.subView (Teuchos::Range1D (0, numCols-1));
193  }
194  TEUCHOS_TEST_FOR_EXCEPTION(
195  X_view_const.is_null (), std::logic_error, "Tpetra::"
196  "BlockMultiVector constructor: X_mv.subView(...) returned null. This "
197  "should never happen. Please report this bug to the Tpetra developers.");
198 
199  // It's perfectly OK to cast away const here. Those view methods
200  // should be marked const anyway, because views can't change the
201  // allocation (just the entries themselves).
202  RCP<mv_type> X_view = Teuchos::rcp_const_cast<mv_type> (X_view_const);
203  TEUCHOS_TEST_FOR_EXCEPTION(
204  X_view->getCopyOrView () != Teuchos::View, std::logic_error, "Tpetra::"
205  "BlockMultiVector constructor: We just set a MultiVector "
206  "to have view semantics, but it claims that it doesn't have view "
207  "semantics. This should never happen. "
208  "Please report this bug to the Tpetra developers.");
209  mv_ = *X_view; // MultiVector::operator= does a shallow copy here
210  }
211 
212  // At this point, mv_ has been assigned, so we can ignore X_mv.
213  Teuchos::RCP<const map_type> pointMap = mv_.getMap ();
214  if (! pointMap.is_null ()) {
215  pointMap_ = *pointMap; // Map::operator= also does a shallow copy
216  }
217  mvData_ = getRawHostPtrFromMultiVector (mv_);
218 }
219 
220 template<class Scalar, class LO, class GO, class Node>
223  const map_type& newMeshMap,
224  const map_type& newPointMap,
225  const size_t offset) :
226  dist_object_type (Teuchos::rcp (new map_type (newMeshMap))), // shallow copy
227  meshMap_ (newMeshMap),
228  pointMap_ (newPointMap),
229  mv_ (X.mv_, newPointMap, offset * X.getBlockSize ()), // MV "offset view" constructor
230  mvData_ (getRawHostPtrFromMultiVector (mv_)),
231  blockSize_ (X.getBlockSize ())
232 {}
233 
234 template<class Scalar, class LO, class GO, class Node>
237  const map_type& newMeshMap,
238  const size_t offset) :
239  dist_object_type (Teuchos::rcp (new map_type (newMeshMap))), // shallow copy
240  meshMap_ (newMeshMap),
241  pointMap_ (makePointMap (newMeshMap, X.getBlockSize ())),
242  mv_ (X.mv_, pointMap_, offset * X.getBlockSize ()), // MV "offset view" constructor
243  mvData_ (getRawHostPtrFromMultiVector (mv_)),
244  blockSize_ (X.getBlockSize ())
245 {}
246 
247 template<class Scalar, class LO, class GO, class Node>
249 BlockMultiVector () :
250  dist_object_type (Teuchos::null),
251  mvData_ (nullptr),
252  blockSize_ (0)
253 {}
254 
255 template<class Scalar, class LO, class GO, class Node>
258 makePointMap (const map_type& meshMap, const LO blockSize)
259 {
260  typedef Tpetra::global_size_t GST;
261  typedef typename Teuchos::ArrayView<const GO>::size_type size_type;
262 
263  const GST gblNumMeshMapInds =
264  static_cast<GST> (meshMap.getGlobalNumElements ());
265  const size_t lclNumMeshMapIndices =
266  static_cast<size_t> (meshMap.getNodeNumElements ());
267  const GST gblNumPointMapInds =
268  gblNumMeshMapInds * static_cast<GST> (blockSize);
269  const size_t lclNumPointMapInds =
270  lclNumMeshMapIndices * static_cast<size_t> (blockSize);
271  const GO indexBase = meshMap.getIndexBase ();
272 
273  if (meshMap.isContiguous ()) {
274  return map_type (gblNumPointMapInds, lclNumPointMapInds, indexBase,
275  meshMap.getComm ());
276  }
277  else {
278  // "Hilbert's Hotel" trick: multiply each process' GIDs by
279  // blockSize, and fill in. That ensures correctness even if the
280  // mesh Map is overlapping.
281  Teuchos::ArrayView<const GO> lclMeshGblInds = meshMap.getNodeElementList ();
282  const size_type lclNumMeshGblInds = lclMeshGblInds.size ();
283  Teuchos::Array<GO> lclPointGblInds (lclNumPointMapInds);
284  for (size_type g = 0; g < lclNumMeshGblInds; ++g) {
285  const GO meshGid = lclMeshGblInds[g];
286  const GO pointGidStart = indexBase +
287  (meshGid - indexBase) * static_cast<GO> (blockSize);
288  const size_type offset = g * static_cast<size_type> (blockSize);
289  for (LO k = 0; k < blockSize; ++k) {
290  const GO pointGid = pointGidStart + static_cast<GO> (k);
291  lclPointGblInds[offset + static_cast<size_type> (k)] = pointGid;
292  }
293  }
294  return map_type (gblNumPointMapInds, lclPointGblInds (), indexBase,
295  meshMap.getComm ());
296  }
297 }
298 
299 
300 template<class Scalar, class LO, class GO, class Node>
301 void
303 replaceLocalValuesImpl (const LO localRowIndex,
304  const LO colIndex,
305  const Scalar vals[]) const
306 {
307  auto X_dst = getLocalBlock (localRowIndex, colIndex);
308  typename const_little_vec_type::HostMirror::const_type X_src (reinterpret_cast<const impl_scalar_type*> (vals),
309  getBlockSize ());
310  Kokkos::deep_copy (X_dst, X_src);
311 }
312 
313 
314 template<class Scalar, class LO, class GO, class Node>
315 bool
317 replaceLocalValues (const LO localRowIndex,
318  const LO colIndex,
319  const Scalar vals[]) const
320 {
321  if (! meshMap_.isNodeLocalElement (localRowIndex)) {
322  return false;
323  } else {
324  replaceLocalValuesImpl (localRowIndex, colIndex, vals);
325  return true;
326  }
327 }
328 
329 template<class Scalar, class LO, class GO, class Node>
330 bool
332 replaceGlobalValues (const GO globalRowIndex,
333  const LO colIndex,
334  const Scalar vals[]) const
335 {
336  const LO localRowIndex = meshMap_.getLocalElement (globalRowIndex);
337  if (localRowIndex == Teuchos::OrdinalTraits<LO>::invalid ()) {
338  return false;
339  } else {
340  replaceLocalValuesImpl (localRowIndex, colIndex, vals);
341  return true;
342  }
343 }
344 
345 template<class Scalar, class LO, class GO, class Node>
346 void
348 sumIntoLocalValuesImpl (const LO localRowIndex,
349  const LO colIndex,
350  const Scalar vals[]) const
351 {
352  auto X_dst = getLocalBlock (localRowIndex, colIndex);
353  typename const_little_vec_type::HostMirror::const_type X_src (reinterpret_cast<const impl_scalar_type*> (vals),
354  getBlockSize ());
355  AXPY (static_cast<impl_scalar_type> (STS::one ()), X_src, X_dst);
356 }
357 
358 template<class Scalar, class LO, class GO, class Node>
359 bool
361 sumIntoLocalValues (const LO localRowIndex,
362  const LO colIndex,
363  const Scalar vals[]) const
364 {
365  if (! meshMap_.isNodeLocalElement (localRowIndex)) {
366  return false;
367  } else {
368  sumIntoLocalValuesImpl (localRowIndex, colIndex, vals);
369  return true;
370  }
371 }
372 
373 template<class Scalar, class LO, class GO, class Node>
374 bool
376 sumIntoGlobalValues (const GO globalRowIndex,
377  const LO colIndex,
378  const Scalar vals[]) const
379 {
380  const LO localRowIndex = meshMap_.getLocalElement (globalRowIndex);
381  if (localRowIndex == Teuchos::OrdinalTraits<LO>::invalid ()) {
382  return false;
383  } else {
384  sumIntoLocalValuesImpl (localRowIndex, colIndex, vals);
385  return true;
386  }
387 }
388 
389 template<class Scalar, class LO, class GO, class Node>
390 bool
392 getLocalRowView (const LO localRowIndex, const LO colIndex, Scalar*& vals) const
393 {
394  if (! meshMap_.isNodeLocalElement (localRowIndex)) {
395  return false;
396  } else {
397  auto X_ij = getLocalBlock (localRowIndex, colIndex);
398  vals = reinterpret_cast<Scalar*> (X_ij.data ());
399  return true;
400  }
401 }
402 
403 template<class Scalar, class LO, class GO, class Node>
404 bool
406 getGlobalRowView (const GO globalRowIndex, const LO colIndex, Scalar*& vals) const
407 {
408  const LO localRowIndex = meshMap_.getLocalElement (globalRowIndex);
409  if (localRowIndex == Teuchos::OrdinalTraits<LO>::invalid ()) {
410  return false;
411  } else {
412  auto X_ij = getLocalBlock (localRowIndex, colIndex);
413  vals = reinterpret_cast<Scalar*> (X_ij.data ());
414  return true;
415  }
416 }
417 
418 template<class Scalar, class LO, class GO, class Node>
421 getLocalBlock (const LO localRowIndex,
422  const LO colIndex) const
423 {
424  // NOTE (mfh 07 Jul 2016) It should be correct to add the
425  // commented-out test below. However, I've conservatively commented
426  // it out, since users might not realize that they need to have
427  // things sync'd correctly.
428 
429 // #ifdef HAVE_TPETRA_DEBUG
430 // TEUCHOS_TEST_FOR_EXCEPTION
431 // (mv_.need_sync_host (), std::runtime_error,
432 // "Tpetra::BlockMultiVector::getLocalBlock: This method "
433 // "accesses host data, but the object is not in sync on host." );
434 // #endif // HAVE_TPETRA_DEBUG
435 
436  if (! isValidLocalMeshIndex (localRowIndex)) {
437  return typename little_vec_type::HostMirror ();
438  } else {
439  const size_t blockSize = getBlockSize ();
440  const size_t offset = colIndex * this->getStrideY () +
441  localRowIndex * blockSize;
442  impl_scalar_type* blockRaw = this->getRawPtr () + offset;
443  return typename little_vec_type::HostMirror (blockRaw, blockSize);
444  }
445 }
446 
447 template<class Scalar, class LO, class GO, class Node>
448 Teuchos::RCP<const typename BlockMultiVector<Scalar, LO, GO, Node>::mv_type>
451 {
452  using Teuchos::rcp;
453  using Teuchos::rcpFromRef;
454 
455  // The source object of an Import or Export must be a
456  // BlockMultiVector or MultiVector (a Vector is a MultiVector). Try
457  // them in that order; one must succeed. Note that the target of
458  // the Import or Export calls checkSizes in DistObject's doTransfer.
459  typedef BlockMultiVector<Scalar, LO, GO, Node> this_type;
460  const this_type* srcBlkVec = dynamic_cast<const this_type*> (&src);
461  if (srcBlkVec == nullptr) {
462  const mv_type* srcMultiVec = dynamic_cast<const mv_type*> (&src);
463  if (srcMultiVec == nullptr) {
464  // FIXME (mfh 05 May 2014) Tpetra::MultiVector's operator=
465  // currently does a shallow copy by default. This is why we
466  // return by RCP, rather than by value.
467  return rcp (new mv_type ());
468  } else { // src is a MultiVector
469  return rcp (srcMultiVec, false); // nonowning RCP
470  }
471  } else { // src is a BlockMultiVector
472  return rcpFromRef (srcBlkVec->mv_); // nonowning RCP
473  }
474 }
475 
476 template<class Scalar, class LO, class GO, class Node>
479 {
480  return ! getMultiVectorFromSrcDistObject (src).is_null ();
481 }
482 
483 template<class Scalar, class LO, class GO, class Node>
485 #ifdef TPETRA_ENABLE_DEPRECATED_CODE
486 copyAndPermuteNew
487 #else // TPETRA_ENABLE_DEPRECATED_CODE
488 copyAndPermute
489 #endif // TPETRA_ENABLE_DEPRECATED_CODE
490 (const SrcDistObject& src,
491  const size_t numSameIDs,
492  const Kokkos::DualView<const local_ordinal_type*,
493  buffer_device_type>& permuteToLIDs,
494  const Kokkos::DualView<const local_ordinal_type*,
495  buffer_device_type>& permuteFromLIDs)
496 {
497  TEUCHOS_TEST_FOR_EXCEPTION
498  (true, std::logic_error,
499  "Tpetra::BlockMultiVector::copyAndPermute: Do NOT use this "
500  "instead, create a point importer using makePointMap function.");
501 }
502 
503 template<class Scalar, class LO, class GO, class Node>
504 void BlockMultiVector<Scalar, LO, GO, Node>::
505 #ifdef TPETRA_ENABLE_DEPRECATED_CODE
506 packAndPrepareNew
507 #else // TPETRA_ENABLE_DEPRECATED_CODE
508 packAndPrepare
509 #endif // TPETRA_ENABLE_DEPRECATED_CODE
510 (const SrcDistObject& src,
511  const Kokkos::DualView<const local_ordinal_type*,
512  buffer_device_type>& exportLIDs,
513  Kokkos::DualView<packet_type*,
514  buffer_device_type>& exports,
515  Kokkos::DualView<size_t*,
516  buffer_device_type> numPacketsPerLID,
517  size_t& constantNumPackets,
518  Distributor& distor)
519 {
520  TEUCHOS_TEST_FOR_EXCEPTION
521  (true, std::logic_error,
522  "Tpetra::BlockMultiVector::copyAndPermute: Do NOT use this; "
523  "instead, create a point importer using makePointMap function.");
524 }
525 
526 template<class Scalar, class LO, class GO, class Node>
527 void BlockMultiVector<Scalar, LO, GO, Node>::
528 #ifdef TPETRA_ENABLE_DEPRECATED_CODE
529 unpackAndCombineNew
530 #else // TPETRA_ENABLE_DEPRECATED_CODE
532 #endif // TPETRA_ENABLE_DEPRECATED_CODE
533 (const Kokkos::DualView<const local_ordinal_type*,
534  buffer_device_type>& importLIDs,
535  Kokkos::DualView<packet_type*,
536  buffer_device_type> imports,
537  Kokkos::DualView<size_t*,
538  buffer_device_type> numPacketsPerLID,
539  const size_t constantNumPackets,
540  Distributor& distor,
541  const CombineMode combineMode)
542 {
543  TEUCHOS_TEST_FOR_EXCEPTION
544  (true, std::logic_error,
545  "Tpetra::BlockMultiVector::copyAndPermute: Do NOT use this; "
546  "instead, create a point importer using makePointMap function.");
547 }
548 
549 template<class Scalar, class LO, class GO, class Node>
551 isValidLocalMeshIndex (const LO meshLocalIndex) const
552 {
553  return meshLocalIndex != Teuchos::OrdinalTraits<LO>::invalid () &&
554  meshMap_.isNodeLocalElement (meshLocalIndex);
555 }
556 
557 template<class Scalar, class LO, class GO, class Node>
559 putScalar (const Scalar& val)
560 {
561  mv_.putScalar (val);
562 }
563 
564 template<class Scalar, class LO, class GO, class Node>
566 scale (const Scalar& val)
567 {
568  mv_.scale (val);
569 }
570 
571 template<class Scalar, class LO, class GO, class Node>
573 update (const Scalar& alpha,
575  const Scalar& beta)
576 {
577  mv_.update (alpha, X.mv_, beta);
578 }
579 
580 namespace Impl {
581 // Y := alpha * D * X
582 template <typename Scalar, typename ViewY, typename ViewD, typename ViewX>
583 struct BlockWiseMultiply {
584  typedef typename ViewD::size_type Size;
585 
586 private:
587  typedef typename ViewD::device_type Device;
588  typedef typename ViewD::non_const_value_type ImplScalar;
589  typedef Kokkos::MemoryTraits<Kokkos::Unmanaged> Unmanaged;
590 
591  template <typename View>
592  using UnmanagedView = Kokkos::View<typename View::data_type, typename View::array_layout,
593  typename View::device_type, Unmanaged>;
594  typedef UnmanagedView<ViewY> UnMViewY;
595  typedef UnmanagedView<ViewD> UnMViewD;
596  typedef UnmanagedView<ViewX> UnMViewX;
597 
598  const Size block_size_;
599  Scalar alpha_;
600  UnMViewY Y_;
601  UnMViewD D_;
602  UnMViewX X_;
603 
604 public:
605  BlockWiseMultiply (const Size block_size, const Scalar& alpha,
606  const ViewY& Y, const ViewD& D, const ViewX& X)
607  : block_size_(block_size), alpha_(alpha), Y_(Y), D_(D), X_(X)
608  {}
609 
610  KOKKOS_INLINE_FUNCTION
611  void operator() (const Size k) const {
612  const auto zero = Kokkos::Details::ArithTraits<Scalar>::zero();
613  auto D_curBlk = Kokkos::subview(D_, k, Kokkos::ALL (), Kokkos::ALL ());
614  const auto num_vecs = X_.extent(1);
615  for (Size i = 0; i < num_vecs; ++i) {
616  Kokkos::pair<Size, Size> kslice(k*block_size_, (k+1)*block_size_);
617  auto X_curBlk = Kokkos::subview(X_, kslice, i);
618  auto Y_curBlk = Kokkos::subview(Y_, kslice, i);
619  // Y_curBlk := alpha * D_curBlk * X_curBlk.
620  // Recall that GEMV does an update (+=) of the last argument.
621  Tpetra::FILL(Y_curBlk, zero);
622  Tpetra::GEMV(alpha_, D_curBlk, X_curBlk, Y_curBlk);
623  }
624  }
625 };
626 
627 template <typename Scalar, typename ViewY, typename ViewD, typename ViewX>
628 inline BlockWiseMultiply<Scalar, ViewY, ViewD, ViewX>
629 createBlockWiseMultiply (const int block_size, const Scalar& alpha,
630  const ViewY& Y, const ViewD& D, const ViewX& X) {
631  return BlockWiseMultiply<Scalar, ViewY, ViewD, ViewX>(block_size, alpha, Y, D, X);
632 }
633 
634 template <typename ViewY,
635  typename Scalar,
636  typename ViewD,
637  typename ViewZ,
638  typename LO = typename ViewY::size_type>
639 class BlockJacobiUpdate {
640 private:
641  typedef typename ViewD::device_type Device;
642  typedef typename ViewD::non_const_value_type ImplScalar;
643  typedef Kokkos::MemoryTraits<Kokkos::Unmanaged> Unmanaged;
644 
645  template <typename ViewType>
646  using UnmanagedView = Kokkos::View<typename ViewType::data_type,
647  typename ViewType::array_layout,
648  typename ViewType::device_type,
649  Unmanaged>;
650  typedef UnmanagedView<ViewY> UnMViewY;
651  typedef UnmanagedView<ViewD> UnMViewD;
652  typedef UnmanagedView<ViewZ> UnMViewZ;
653 
654  const LO blockSize_;
655  UnMViewY Y_;
656  const Scalar alpha_;
657  UnMViewD D_;
658  UnMViewZ Z_;
659  const Scalar beta_;
660 
661 public:
662  BlockJacobiUpdate (const ViewY& Y,
663  const Scalar& alpha,
664  const ViewD& D,
665  const ViewZ& Z,
666  const Scalar& beta) :
667  blockSize_ (D.extent (1)),
668  // numVecs_ (static_cast<int> (ViewY::rank) == 1 ? static_cast<size_type> (1) : static_cast<size_type> (Y_.extent (1))),
669  Y_ (Y),
670  alpha_ (alpha),
671  D_ (D),
672  Z_ (Z),
673  beta_ (beta)
674  {
675  static_assert (static_cast<int> (ViewY::rank) == 1,
676  "Y must have rank 1.");
677  static_assert (static_cast<int> (ViewD::rank) == 3, "D must have rank 3.");
678  static_assert (static_cast<int> (ViewZ::rank) == 1,
679  "Z must have rank 1.");
680  // static_assert (static_cast<int> (ViewZ::rank) ==
681  // static_cast<int> (ViewY::rank),
682  // "Z must have the same rank as Y.");
683  }
684 
685  KOKKOS_INLINE_FUNCTION void
686  operator() (const LO& k) const
687  {
688  using Kokkos::ALL;
689  using Kokkos::subview;
690  typedef Kokkos::pair<LO, LO> range_type;
691  typedef Kokkos::Details::ArithTraits<Scalar> KAT;
692 
693  // We only have to implement the alpha != 0 case.
694 
695  auto D_curBlk = subview (D_, k, ALL (), ALL ());
696  const range_type kslice (k*blockSize_, (k+1)*blockSize_);
697 
698  // Z.update (STS::one (), X, -STS::one ()); // assume this is done
699 
700  auto Z_curBlk = subview (Z_, kslice);
701  auto Y_curBlk = subview (Y_, kslice);
702  // Y_curBlk := beta * Y_curBlk + alpha * D_curBlk * Z_curBlk
703  if (beta_ == KAT::zero ()) {
704  Tpetra::FILL (Y_curBlk, KAT::zero ());
705  }
706  else if (beta_ != KAT::one ()) {
707  Tpetra::SCAL (beta_, Y_curBlk);
708  }
709  Tpetra::GEMV (alpha_, D_curBlk, Z_curBlk, Y_curBlk);
710  }
711 };
712 
713 template<class ViewY,
714  class Scalar,
715  class ViewD,
716  class ViewZ,
717  class LO = typename ViewD::size_type>
718 void
719 blockJacobiUpdate (const ViewY& Y,
720  const Scalar& alpha,
721  const ViewD& D,
722  const ViewZ& Z,
723  const Scalar& beta)
724 {
725  static_assert (Kokkos::Impl::is_view<ViewY>::value, "Y must be a Kokkos::View.");
726  static_assert (Kokkos::Impl::is_view<ViewD>::value, "D must be a Kokkos::View.");
727  static_assert (Kokkos::Impl::is_view<ViewZ>::value, "Z must be a Kokkos::View.");
728  static_assert (static_cast<int> (ViewY::rank) == static_cast<int> (ViewZ::rank),
729  "Y and Z must have the same rank.");
730  static_assert (static_cast<int> (ViewD::rank) == 3, "D must have rank 3.");
731 
732  const auto lclNumMeshRows = D.extent (0);
733 
734 #ifdef HAVE_TPETRA_DEBUG
735  // D.extent(0) is the (local) number of mesh rows.
736  // D.extent(1) is the block size. Thus, their product should be
737  // the local number of point rows, that is, the number of rows in Y.
738  const auto blkSize = D.extent (1);
739  const auto lclNumPtRows = lclNumMeshRows * blkSize;
740  TEUCHOS_TEST_FOR_EXCEPTION
741  (Y.extent (0) != lclNumPtRows, std::invalid_argument,
742  "blockJacobiUpdate: Y.extent(0) = " << Y.extent (0) << " != "
743  "D.extent(0)*D.extent(1) = " << lclNumMeshRows << " * " << blkSize
744  << " = " << lclNumPtRows << ".");
745  TEUCHOS_TEST_FOR_EXCEPTION
746  (Y.extent (0) != Z.extent (0), std::invalid_argument,
747  "blockJacobiUpdate: Y.extent(0) = " << Y.extent (0) << " != "
748  "Z.extent(0) = " << Z.extent (0) << ".");
749  TEUCHOS_TEST_FOR_EXCEPTION
750  (Y.extent (1) != Z.extent (1), std::invalid_argument,
751  "blockJacobiUpdate: Y.extent(1) = " << Y.extent (1) << " != "
752  "Z.extent(1) = " << Z.extent (1) << ".");
753 #endif // HAVE_TPETRA_DEBUG
754 
755  BlockJacobiUpdate<ViewY, Scalar, ViewD, ViewZ, LO> functor (Y, alpha, D, Z, beta);
756  typedef Kokkos::RangePolicy<typename ViewY::execution_space, LO> range_type;
757  // lclNumMeshRows must fit in LO, else the Map would not be correct.
758  range_type range (0, static_cast<LO> (lclNumMeshRows));
759  Kokkos::parallel_for (range, functor);
760 }
761 
762 } // namespace Impl
763 
764 template<class Scalar, class LO, class GO, class Node>
766 blockWiseMultiply (const Scalar& alpha,
767  const Kokkos::View<const impl_scalar_type***,
768  device_type, Kokkos::MemoryUnmanaged>& D,
770 {
771  using Kokkos::ALL;
772  typedef typename device_type::execution_space execution_space;
773  typedef typename device_type::memory_space memory_space;
774  const LO lclNumMeshRows = meshMap_.getNodeNumElements ();
775 
776  if (alpha == STS::zero ()) {
777  this->putScalar (STS::zero ());
778  }
779  else { // alpha != 0
780  const LO blockSize = this->getBlockSize ();
781  const impl_scalar_type alphaImpl = static_cast<impl_scalar_type> (alpha);
782  auto X_lcl = X.mv_.template getLocalView<memory_space> ();
783  auto Y_lcl = this->mv_.template getLocalView<memory_space> ();
784  auto bwm = Impl::createBlockWiseMultiply (blockSize, alphaImpl, Y_lcl, D, X_lcl);
785 
786  // Use an explicit RangePolicy with the desired execution space.
787  // Otherwise, if you just use a number, it runs on the default
788  // execution space. For example, if the default execution space
789  // is Cuda but the current execution space is Serial, using just a
790  // number would incorrectly run with Cuda.
791  Kokkos::RangePolicy<execution_space, LO> range (0, lclNumMeshRows);
792  Kokkos::parallel_for (range, bwm);
793  }
794 }
795 
796 template<class Scalar, class LO, class GO, class Node>
798 blockJacobiUpdate (const Scalar& alpha,
799  const Kokkos::View<const impl_scalar_type***,
800  device_type, Kokkos::MemoryUnmanaged>& D,
803  const Scalar& beta)
804 {
805  using Kokkos::ALL;
806  using Kokkos::subview;
807  typedef typename device_type::memory_space memory_space;
808  typedef impl_scalar_type IST;
809 
810  const IST alphaImpl = static_cast<IST> (alpha);
811  const IST betaImpl = static_cast<IST> (beta);
812  const LO numVecs = mv_.getNumVectors ();
813 
814  auto X_lcl = X.mv_.template getLocalView<memory_space> ();
815  auto Y_lcl = this->mv_.template getLocalView<memory_space> ();
816  auto Z_lcl = Z.mv_.template getLocalView<memory_space> ();
817 
818  if (alpha == STS::zero ()) { // Y := beta * Y
819  this->scale (beta);
820  }
821  else { // alpha != 0
822  Z.update (STS::one (), X, -STS::one ());
823  for (LO j = 0; j < numVecs; ++j) {
824  auto X_lcl_j = subview (X_lcl, ALL (), j);
825  auto Y_lcl_j = subview (Y_lcl, ALL (), j);
826  auto Z_lcl_j = subview (Z_lcl, ALL (), j);
827  Impl::blockJacobiUpdate (Y_lcl_j, alphaImpl, D, Z_lcl_j, betaImpl);
828  }
829  }
830 }
831 
832 } // namespace Tpetra
833 
834 //
835 // Explicit instantiation macro
836 //
837 // Must be expanded from within the Tpetra namespace!
838 //
839 #define TPETRA_BLOCKMULTIVECTOR_INSTANT(S,LO,GO,NODE) \
840  template class BlockMultiVector< S, LO, GO, NODE >;
841 
842 #endif // TPETRA_BLOCKMULTIVECTOR_DEF_HPP
Linear algebra kernels for small dense matrices and vectors.
Declaration of Tpetra::Details::Behavior, a class that describes Tpetra's behavior.
void unpackAndCombine(const RowView &row_ptrs_beg, const RowView &row_ptrs_end, IndicesView &indices, const Kokkos::View< const Packet *, BufferDevice, Kokkos::MemoryUnmanaged > &imports, const Kokkos::View< const size_t *, BufferDevice, Kokkos::MemoryUnmanaged > &num_packets_per_lid, const Kokkos::View< const LocalOrdinal *, BufferDevice, Kokkos::MemoryUnmanaged > &import_lids, const bool unpack_pids)
Perform the unpack operation for the graph.
MultiVector for multiple degrees of freedom per mesh point.
virtual bool checkSizes(const Tpetra::SrcDistObject &source)
Compare the source and target (this) objects for compatibility.
void putScalar(const Scalar &val)
Fill all entries with the given value val.
void blockWiseMultiply(const Scalar &alpha, const Kokkos::View< const impl_scalar_type ***, device_type, Kokkos::MemoryUnmanaged > &D, const BlockMultiVector< Scalar, LO, GO, Node > &X)
*this := alpha * D * X, where D is a block diagonal matrix.
void blockJacobiUpdate(const Scalar &alpha, const Kokkos::View< const impl_scalar_type ***, device_type, Kokkos::MemoryUnmanaged > &D, const BlockMultiVector< Scalar, LO, GO, Node > &X, BlockMultiVector< Scalar, LO, GO, Node > &Z, const Scalar &beta)
Block Jacobi update .
little_vec_type::HostMirror getLocalBlock(const LO localRowIndex, const LO colIndex) const
Get a host view of the degrees of freedom at the given mesh point.
bool replaceGlobalValues(const GO globalRowIndex, const LO colIndex, const Scalar vals[]) const
Replace all values at the given mesh point, using a global index.
void update(const Scalar &alpha, const BlockMultiVector< Scalar, LO, GO, Node > &X, const Scalar &beta)
Update: this = beta*this + alpha*X.
typename mv_type::impl_scalar_type impl_scalar_type
The implementation type of entries in the object.
bool getGlobalRowView(const GO globalRowIndex, const LO colIndex, Scalar *&vals) const
Get a writeable view of the entries at the given mesh point, using a global index.
bool getLocalRowView(const LO localRowIndex, const LO colIndex, Scalar *&vals) const
Get a writeable view of the entries at the given mesh point, using a local index.
mv_type getMultiVectorView() const
Get a Tpetra::MultiVector that views this BlockMultiVector's data.
static map_type makePointMap(const map_type &meshMap, const LO blockSize)
Create and return the point Map corresponding to the given mesh Map and block size.
Tpetra::MultiVector< Scalar, LO, GO, Node > mv_type
The specialization of Tpetra::MultiVector that this class uses.
bool sumIntoLocalValues(const LO localRowIndex, const LO colIndex, const Scalar vals[]) const
Sum into all values at the given mesh point, using a local index.
typename mv_type::device_type device_type
The Kokkos Device type.
void scale(const Scalar &val)
Multiply all entries in place by the given value val.
bool isValidLocalMeshIndex(const LO meshLocalIndex) const
True if and only if meshLocalIndex is a valid local index in the mesh Map.
bool replaceLocalValues(const LO localRowIndex, const LO colIndex, const Scalar vals[]) const
Replace all values at the given mesh point, using local row and column indices.
bool sumIntoGlobalValues(const GO globalRowIndex, const LO colIndex, const Scalar vals[]) const
Sum into all values at the given mesh point, using a global index.
mv_type mv_
The Tpetra::MultiVector used to represent the data.
typename device_type::execution_space execution_space
The Kokkos execution space.
virtual Teuchos::RCP< const map_type > getMap() const
The Map describing the parallel distribution of this object.
Teuchos::ArrayView< const GlobalOrdinal > getNodeElementList() const
Return a NONOWNING view of the global indices owned by this process.
Teuchos::RCP< const Teuchos::Comm< int > > getComm() const
Accessors for the Teuchos::Comm and Kokkos Node objects.
global_size_t getGlobalNumElements() const
The number of elements in this Map.
bool isContiguous() const
True if this Map is distributed contiguously, else false.
GlobalOrdinal getIndexBase() const
The index base for this Map.
size_t getNodeNumElements() const
The number of elements belonging to the calling process.
One or more distributed dense vectors.
Teuchos::RCP< const MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > > subView(const Teuchos::Range1D &colRng) const
Return a const MultiVector with const views of selected columns.
typename Kokkos::Details::ArithTraits< Scalar >::val_type impl_scalar_type
The type used internally in place of Scalar.
size_t getNumVectors() const
Number of columns in the multivector.
Teuchos::DataAccess getCopyOrView() const
Get whether this has copy (copyOrView = Teuchos::Copy) or view (copyOrView = Teuchos::View) semantics...
Abstract base class for objects that can be the source of an Import or Export operation.
int local_ordinal_type
Default value of Scalar template parameter.
Namespace Tpetra contains the class and methods constituting the Tpetra library.
void deep_copy(MultiVector< DS, DL, DG, DN > &dst, const MultiVector< SS, SL, SG, SN > &src)
Copy the contents of the MultiVector src into dst.
KOKKOS_INLINE_FUNCTION void GEMV(const CoeffType &alpha, const BlkType &A, const VecType1 &x, const VecType2 &y)
y := y + alpha * A * x (dense matrix-vector multiply)
KOKKOS_INLINE_FUNCTION void SCAL(const CoefficientType &alpha, const ViewType &x)
x := alpha*x, where x is either rank 1 (a vector) or rank 2 (a matrix).
KOKKOS_INLINE_FUNCTION void FILL(const ViewType &x, const InputType &val)
Set every entry of x to val.
size_t global_size_t
Global size_t object.
KOKKOS_INLINE_FUNCTION void AXPY(const CoefficientType &alpha, const ViewType1 &x, const ViewType2 &y)
y := y + alpha * x (dense vector or matrix update)
CombineMode
Rule for combining data in an Import or Export.