Tpetra parallel linear algebra  Version of the Day
Tpetra_MultiVector_def.hpp
Go to the documentation of this file.
1 // @HEADER
2 // ***********************************************************************
3 //
4 // Tpetra: Templated Linear Algebra Services Package
5 // Copyright (2008) Sandia Corporation
6 //
7 // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
8 // the U.S. Government retains certain rights in this software.
9 //
10 // Redistribution and use in source and binary forms, with or without
11 // modification, are permitted provided that the following conditions are
12 // met:
13 //
14 // 1. Redistributions of source code must retain the above copyright
15 // notice, this list of conditions and the following disclaimer.
16 //
17 // 2. Redistributions in binary form must reproduce the above copyright
18 // notice, this list of conditions and the following disclaimer in the
19 // documentation and/or other materials provided with the distribution.
20 //
21 // 3. Neither the name of the Corporation nor the names of the
22 // contributors may be used to endorse or promote products derived from
23 // this software without specific prior written permission.
24 //
25 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
26 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
29 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
30 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
31 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
32 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
33 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
34 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
35 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36 //
37 // Questions? Contact Michael A. Heroux (maherou@sandia.gov)
38 //
39 // ************************************************************************
40 // @HEADER
41 
42 #ifndef TPETRA_MULTIVECTOR_DEF_HPP
43 #define TPETRA_MULTIVECTOR_DEF_HPP
44 
52 
53 #include "Tpetra_Util.hpp"
54 #include "Tpetra_Vector.hpp"
55 #include "Tpetra_Details_MultiVectorDistObjectKernels.hpp"
56 #include "Tpetra_KokkosRefactor_Details_MultiVectorDistObjectKernels.hpp"
57 
58 #include "KokkosCompat_View.hpp"
59 #include "Kokkos_MV_GEMM.hpp"
60 #include "Kokkos_Blas1_MV.hpp"
61 #include "Kokkos_Random.hpp"
62 
63 #ifdef HAVE_TPETRA_INST_FLOAT128
64 namespace Kokkos {
65  // FIXME (mfh 04 Sep 2015) Just a stub for now!
66  template<class Generator>
67  struct rand<Generator, __float128> {
68  static KOKKOS_INLINE_FUNCTION __float128 max ()
69  {
70  return static_cast<__float128> (1.0);
71  }
72  static KOKKOS_INLINE_FUNCTION __float128
73  draw (Generator& gen)
74  {
75  // Half the smallest normalized double, is the scaling factor of
76  // the lower-order term in the double-double representation.
77  const __float128 scalingFactor =
78  static_cast<__float128> (std::numeric_limits<double>::min ()) /
79  static_cast<__float128> (2.0);
80  const __float128 higherOrderTerm = static_cast<__float128> (gen.drand ());
81  const __float128 lowerOrderTerm =
82  static_cast<__float128> (gen.drand ()) * scalingFactor;
83  return higherOrderTerm + lowerOrderTerm;
84  }
85  static KOKKOS_INLINE_FUNCTION __float128
86  draw (Generator& gen, const __float128& range)
87  {
88  // FIXME (mfh 05 Sep 2015) Not sure if this is right.
89  const __float128 scalingFactor =
90  static_cast<__float128> (std::numeric_limits<double>::min ()) /
91  static_cast<__float128> (2.0);
92  const __float128 higherOrderTerm =
93  static_cast<__float128> (gen.drand (range));
94  const __float128 lowerOrderTerm =
95  static_cast<__float128> (gen.drand (range)) * scalingFactor;
96  return higherOrderTerm + lowerOrderTerm;
97  }
98  static KOKKOS_INLINE_FUNCTION __float128
99  draw (Generator& gen, const __float128& start, const __float128& end)
100  {
101  // FIXME (mfh 05 Sep 2015) Not sure if this is right.
102  const __float128 scalingFactor =
103  static_cast<__float128> (std::numeric_limits<double>::min ()) /
104  static_cast<__float128> (2.0);
105  const __float128 higherOrderTerm =
106  static_cast<__float128> (gen.drand (start, end));
107  const __float128 lowerOrderTerm =
108  static_cast<__float128> (gen.drand (start, end)) * scalingFactor;
109  return higherOrderTerm + lowerOrderTerm;
110  }
111  };
112 } // namespace Kokkos
113 #endif // HAVE_TPETRA_INST_FLOAT128
114 
115 namespace { // (anonymous)
116 
129  template<class ST, class LO, class GO, class NT>
131  allocDualView (const size_t lclNumRows, const size_t numCols, const bool zeroOut = true)
132  {
133  typedef typename Tpetra::MultiVector<ST, LO, GO, NT>::dual_view_type dual_view_type;
134  const char* label = "MV::DualView";
135 
136 #if 0
137  (void) zeroOut;
138  return dual_view_type (label, lclNumRows, numCols);
139 #else
140  if (zeroOut) {
141  return dual_view_type (label, lclNumRows, numCols);
142  }
143  else {
144  // FIXME (mfh 18 Feb 2015, 12 Apr 2015) This is just a hack,
145  // until Kokkos::DualView accepts an AllocationProperties
146  // initial argument, just like Kokkos::View. However, the hack
147  // is harmless, since it does what the (currently nonexistent)
148  // equivalent DualView constructor would have done anyway.
149  typename dual_view_type::t_dev d_view (Kokkos::ViewAllocateWithoutInitializing (label), lclNumRows, numCols);
150 #ifdef HAVE_TPETRA_DEBUG
151  // Filling with NaN is a cheap and effective way to tell if
152  // downstream code is trying to use a MultiVector's data without
153  // them having been initialized. ArithTraits lets us call nan()
154  // even if the scalar type doesn't define it; it just returns some
155  // undefined value in the latter case. This won't hurt anything
156  // because by setting zeroOut=false, users already agreed that
157  // they don't care about the contents of the MultiVector.
158  const ST nan = Kokkos::Details::ArithTraits<ST>::nan ();
159  KokkosBlas::fill (d_view, nan);
160 #endif // HAVE_TPETRA_DEBUG
161  typename dual_view_type::t_host h_view = Kokkos::create_mirror_view (d_view);
162  // Even though the user doesn't care about the contents of the
163  // MultiVector, the device and host views are still out of sync.
164  // We prefer to work in device memory. The way to ensure this
165  // happens is to mark the device view as modified.
166  dual_view_type dv (d_view, h_view);
167  dv.template modify<typename dual_view_type::t_dev::memory_space> ();
168 
169  return dual_view_type (d_view, h_view);
170  }
171 #endif // 0
172  }
173 
174  // Convert 1-D Teuchos::ArrayView to an unmanaged 1-D host Kokkos::View.
175  //
176  // T: The type of the entries of the View.
177  // ExecSpace: The Kokkos execution space.
178  template<class T, class ExecSpace>
179  struct MakeUnmanagedView {
180  // The 'false' part of the branch carefully ensures that this
181  // won't attempt to use a host execution space that hasn't been
182  // initialized. For example, if Kokkos::OpenMP is disabled and
183  // Kokkos::Threads is enabled, the latter is always the default
184  // execution space of Kokkos::HostSpace, even when ExecSpace is
185  // Kokkos::Serial. That's why we go through the trouble of asking
186  // Kokkos::DualView what _its_ space is. That seems to work
187  // around this default execution space issue.
188  typedef typename Kokkos::Impl::if_c<
189  Kokkos::Impl::VerifyExecutionCanAccessMemorySpace<ExecSpace, Kokkos::HostSpace>::value,
190  typename ExecSpace::device_type,
191  typename Kokkos::DualView<T*, ExecSpace>::host_mirror_space>::type host_exec_space;
192  typedef Kokkos::LayoutLeft array_layout;
193  typedef Kokkos::View<T*, array_layout, host_exec_space,
194  Kokkos::MemoryUnmanaged> view_type;
195 
196  static view_type getView (const Teuchos::ArrayView<T>& x_in)
197  {
198  const size_t numEnt = static_cast<size_t> (x_in.size ());
199  if (numEnt == 0) {
200  return view_type ();
201  } else {
202  return view_type (x_in.getRawPtr (), numEnt);
203  }
204  }
205  };
206 
207  // mfh 14 Apr 2015: Work-around for bug in Kokkos::subview, where
208  // taking a subview of a 0 x N DualView incorrectly always results
209  // in a 0 x 0 DualView.
210  template<class DualViewType>
211  DualViewType
212  takeSubview (const DualViewType& X,
213  const Kokkos::ALL&,
214  const std::pair<size_t, size_t>& colRng)
215  {
216  if (X.dimension_0 () == 0 && X.dimension_1 () != 0) {
217  return DualViewType ("MV::DualView", 0, colRng.second - colRng.first);
218  }
219  else {
220  return subview (X, Kokkos::ALL (), colRng);
221  }
222  }
223 
224  // mfh 14 Apr 2015: Work-around for bug in Kokkos::subview, where
225  // taking a subview of a 0 x N DualView incorrectly always results
226  // in a 0 x 0 DualView.
227  template<class DualViewType>
228  DualViewType
229  takeSubview (const DualViewType& X,
230  const std::pair<size_t, size_t>& rowRng,
231  const std::pair<size_t, size_t>& colRng)
232  {
233  if (X.dimension_0 () == 0 && X.dimension_1 () != 0) {
234  return DualViewType ("MV::DualView", 0, colRng.second - colRng.first);
235  }
236  else {
237  return subview (X, rowRng, colRng);
238  }
239  }
240 } // namespace (anonymous)
241 
242 
243 namespace Tpetra {
244 
245  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
246  bool
247  MultiVector<Scalar, LocalOrdinal, GlobalOrdinal, Node, classic>::
248  vectorIndexOutOfRange (const size_t VectorIndex) const {
249  return (VectorIndex < 1 && VectorIndex != 0) || VectorIndex >= getNumVectors();
250  }
251 
252  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
253  MultiVector<Scalar, LocalOrdinal, GlobalOrdinal, Node, classic>::
254  MultiVector () :
255  base_type (Teuchos::rcp (new map_type ()))
256  {}
257 
258  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
260  MultiVector (const Teuchos::RCP<const map_type>& map,
261  const size_t numVecs,
262  const bool zeroOut) : /* default is true */
263  base_type (map)
264  {
265  TEUCHOS_TEST_FOR_EXCEPTION(
266  numVecs < 1, std::invalid_argument, "Tpetra::MultiVector::MultiVector"
267  "(map,numVecs,zeroOut): numVecs = " << numVecs << " < 1.");
268  const size_t lclNumRows = this->getLocalLength ();
269  view_ = allocDualView<Scalar, LocalOrdinal, GlobalOrdinal, Node> (lclNumRows, numVecs, zeroOut);
270  origView_ = view_;
271  }
272 
273  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
276  base_type (source),
277  view_ (source.view_),
278  origView_ (source.origView_),
280  {}
281 
282  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
285  const Teuchos::DataAccess copyOrView) :
286  base_type (source),
287  view_ (source.view_),
288  origView_ (source.origView_),
290  {
292  const char tfecfFuncName[] = "MultiVector(const MultiVector&, "
293  "const Teuchos::DataAccess): ";
294 
295  if (copyOrView == Teuchos::Copy) {
296  // Reuse the conveniently already existing function that creates
297  // a deep copy.
298  MV cpy = createCopy (source);
299  this->view_ = cpy.view_;
300  this->origView_ = cpy.origView_;
301  this->whichVectors_ = cpy.whichVectors_;
302  }
303  else if (copyOrView == Teuchos::View) {
304  }
305  else {
306  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
307  true, std::invalid_argument, "Second argument 'copyOrView' has an "
308  "invalid value " << copyOrView << ". Valid values include "
309  "Teuchos::Copy = " << Teuchos::Copy << " and Teuchos::View = "
310  << Teuchos::View << ".");
311  }
312  }
313 
314  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
316  MultiVector (const Teuchos::RCP<const map_type>& map,
317  const dual_view_type& view) :
318  base_type (map),
319  view_ (view),
320  origView_ (view)
321  {
322  const char tfecfFuncName[] = "MultiVector(map,view): ";
323 
324  // Get stride of view: if second dimension is 0, the
325  // stride might be 0, so take view_dimension instead.
326  size_t stride[8];
327  origView_.stride (stride);
328  const size_t LDA = (origView_.dimension_1 () > 1) ? stride[1] :
329  origView_.dimension_0 ();
330  const size_t lclNumRows = this->getLocalLength (); // comes from the Map
331  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
332  LDA < lclNumRows, std::invalid_argument, "The input Kokkos::DualView's "
333  "column stride LDA = " << LDA << " < getLocalLength() = " << lclNumRows
334  << ". This may also mean that the input view's first dimension (number "
335  "of rows = " << view.dimension_0 () << ") does not not match the number "
336  "of entries in the Map on the calling process.");
337  }
338 
339 
340  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
342  MultiVector (const Teuchos::RCP<const map_type>& map,
343  const typename dual_view_type::t_dev& d_view) :
344  base_type (map)
345  {
346  using Teuchos::ArrayRCP;
347  using Teuchos::RCP;
348  const char tfecfFuncName[] = "MultiVector(map,d_view): ";
349 
350  // Get stride of view: if second dimension is 0, the stride might
351  // be 0, so take view_dimension instead.
352  size_t stride[8];
353  d_view.stride (stride);
354  const size_t LDA = (d_view.dimension_1 () > 1) ? stride[1] :
355  d_view.dimension_0 ();
356  const size_t lclNumRows = this->getLocalLength (); // comes from the Map
357  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
358  LDA < lclNumRows, std::invalid_argument, "The input Kokkos::View's "
359  "column stride LDA = " << LDA << " < getLocalLength() = " << lclNumRows
360  << ". This may also mean that the input view's first dimension (number "
361  "of rows = " << d_view.dimension_0 () << ") does not not match the "
362  "number of entries in the Map on the calling process.");
363 
364  // The difference between create_mirror and create_mirror_view, is
365  // that the latter copies to host. We don't necessarily want to
366  // do that; we just want to allocate the memory.
367  view_ = dual_view_type (d_view, Kokkos::create_mirror (d_view));
368  // The user gave us a device view. We take it as canonical, which
369  // means we mark it as "modified," so that the next sync will
370  // synchronize it with the host view.
371  view_.template modify<execution_space> ();
372  origView_ = view_;
373  }
374 
375  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
377  MultiVector (const Teuchos::RCP<const map_type>& map,
378  const dual_view_type& view,
379  const dual_view_type& origView) :
380  base_type (map),
381  view_ (view),
382  origView_ (origView)
383  {
384  const char tfecfFuncName[] = "MultiVector(map,view,origView): ";
385 
386  // Get stride of view: if second dimension is 0, the
387  // stride might be 0, so take view_dimension instead.
388  size_t stride[8];
389  origView_.stride (stride);
390  const size_t LDA = (origView_.dimension_1 () > 1) ? stride[1] :
391  origView_.dimension_0 ();
392  const size_t lclNumRows = this->getLocalLength (); // comes from the Map
393  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
394  LDA < lclNumRows, std::invalid_argument, "The input Kokkos::DualView's "
395  "column stride LDA = " << LDA << " < getLocalLength() = " << lclNumRows
396  << ". This may also mean that the input origView's first dimension (number "
397  "of rows = " << origView.dimension_0 () << ") does not not match the number "
398  "of entries in the Map on the calling process.");
399  }
400 
401 
402  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
404  MultiVector (const Teuchos::RCP<const map_type>& map,
405  const dual_view_type& view,
406  const Teuchos::ArrayView<const size_t>& whichVectors) :
407  base_type (map),
408  view_ (view),
409  origView_ (view),
410  whichVectors_ (whichVectors.begin (), whichVectors.end ())
411  {
412  using Kokkos::ALL;
413  using Kokkos::subview;
414  const char tfecfFuncName[] = "MultiVector(map,view,whichVectors): ";
415 
416  const size_t lclNumRows = map.is_null () ? size_t (0) :
417  map->getNodeNumElements ();
418  // Check dimensions of the input DualView. We accept that Kokkos
419  // might not allow construction of a 0 x m (Dual)View with m > 0,
420  // so we only require the number of rows to match if the
421  // (Dual)View has more than zero columns. Likewise, we only
422  // require the number of columns to match if the (Dual)View has
423  // more than zero rows.
424  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
425  view.dimension_1 () != 0 && static_cast<size_t> (view.dimension_0 ()) < lclNumRows,
426  std::invalid_argument, "view.dimension_0() = " << view.dimension_0 ()
427  << " < map->getNodeNumElements() = " << lclNumRows << ".");
428  if (whichVectors.size () != 0) {
429  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
430  view.dimension_1 () != 0 && view.dimension_1 () == 0,
431  std::invalid_argument, "view.dimension_1() = 0, but whichVectors.size()"
432  " = " << whichVectors.size () << " > 0.");
433  size_t maxColInd = 0;
434  typedef Teuchos::ArrayView<const size_t>::size_type size_type;
435  for (size_type k = 0; k < whichVectors.size (); ++k) {
436  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
437  whichVectors[k] == Teuchos::OrdinalTraits<size_t>::invalid (),
438  std::invalid_argument, "whichVectors[" << k << "] = "
439  "Teuchos::OrdinalTraits<size_t>::invalid().");
440  maxColInd = std::max (maxColInd, whichVectors[k]);
441  }
442  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
443  view.dimension_1 () != 0 && static_cast<size_t> (view.dimension_1 ()) <= maxColInd,
444  std::invalid_argument, "view.dimension_1() = " << view.dimension_1 ()
445  << " <= max(whichVectors) = " << maxColInd << ".");
446  }
447 
448  // Get stride of view: if second dimension is 0, the
449  // stride might be 0, so take view_dimension instead.
450  size_t stride[8];
451  origView_.stride (stride);
452  const size_t LDA = (origView_.dimension_1 () > 1) ? stride[1] :
453  origView_.dimension_0 ();
454  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
455  LDA < lclNumRows, std::invalid_argument,
456  "LDA = " << LDA << " < this->getLocalLength() = " << lclNumRows << ".");
457 
458  if (whichVectors.size () == 1) {
459  // If whichVectors has only one entry, we don't need to bother
460  // with nonconstant stride. Just shift the view over so it
461  // points to the desired column.
462  //
463  // NOTE (mfh 10 May 2014) This is a special case where we set
464  // origView_ just to view that one column, not all of the
465  // original columns. This ensures that the use of origView_ in
466  // offsetView works correctly.
467  const std::pair<size_t, size_t> colRng (whichVectors[0],
468  whichVectors[0] + 1);
469  view_ = takeSubview (view_, ALL (), colRng);
470  origView_ = takeSubview (origView_, ALL (), colRng);
471  // whichVectors_.size() == 0 means "constant stride."
472  whichVectors_.clear ();
473  }
474  }
475 
476  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
478  MultiVector (const Teuchos::RCP<const map_type>& map,
479  const dual_view_type& view,
480  const dual_view_type& origView,
481  const Teuchos::ArrayView<const size_t>& whichVectors) :
482  base_type (map),
483  view_ (view),
484  origView_ (origView),
485  whichVectors_ (whichVectors.begin (), whichVectors.end ())
486  {
487  using Kokkos::ALL;
488  using Kokkos::subview;
489  const char tfecfFuncName[] = "MultiVector(map,view,origView,whichVectors): ";
490 
491  const size_t lclNumRows = this->getLocalLength ();
492  // Check dimensions of the input DualView. We accept that Kokkos
493  // might not allow construction of a 0 x m (Dual)View with m > 0,
494  // so we only require the number of rows to match if the
495  // (Dual)View has more than zero columns. Likewise, we only
496  // require the number of columns to match if the (Dual)View has
497  // more than zero rows.
498  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
499  view.dimension_1 () != 0 && static_cast<size_t> (view.dimension_0 ()) < lclNumRows,
500  std::invalid_argument, "view.dimension_0() = " << view.dimension_0 ()
501  << " < map->getNodeNumElements() = " << lclNumRows << ".");
502  if (whichVectors.size () != 0) {
503  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
504  view.dimension_1 () != 0 && view.dimension_1 () == 0,
505  std::invalid_argument, "view.dimension_1() = 0, but whichVectors.size()"
506  " = " << whichVectors.size () << " > 0.");
507  size_t maxColInd = 0;
508  typedef Teuchos::ArrayView<const size_t>::size_type size_type;
509  for (size_type k = 0; k < whichVectors.size (); ++k) {
510  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
511  whichVectors[k] == Teuchos::OrdinalTraits<size_t>::invalid (),
512  std::invalid_argument, "whichVectors[" << k << "] = "
513  "Teuchos::OrdinalTraits<size_t>::invalid().");
514  maxColInd = std::max (maxColInd, whichVectors[k]);
515  }
516  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
517  view.dimension_1 () != 0 && static_cast<size_t> (view.dimension_1 ()) <= maxColInd,
518  std::invalid_argument, "view.dimension_1() = " << view.dimension_1 ()
519  << " <= max(whichVectors) = " << maxColInd << ".");
520  }
521  // Get stride of view: if second dimension is 0, the
522  // stride might be 0, so take view_dimension instead.
523  size_t stride[8];
524  origView_.stride (stride);
525  const size_t LDA = (origView_.dimension_1 () > 1) ? stride[1] :
526  origView_.dimension_0 ();
527  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
528  LDA < lclNumRows, std::invalid_argument, "Input DualView's column stride"
529  " = " << LDA << " < this->getLocalLength() = " << lclNumRows << ".");
530 
531  if (whichVectors.size () == 1) {
532  // If whichVectors has only one entry, we don't need to bother
533  // with nonconstant stride. Just shift the view over so it
534  // points to the desired column.
535  //
536  // NOTE (mfh 10 May 2014) This is a special case where we set
537  // origView_ just to view that one column, not all of the
538  // original columns. This ensures that the use of origView_ in
539  // offsetView works correctly.
540  const std::pair<size_t, size_t> colRng (whichVectors[0],
541  whichVectors[0] + 1);
542  view_ = takeSubview (view_, ALL (), colRng);
543  origView_ = takeSubview (origView_, ALL (), colRng);
544  // whichVectors_.size() == 0 means "constant stride."
545  whichVectors_.clear ();
546  }
547  }
548 
549  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
551  MultiVector (const Teuchos::RCP<const map_type>& map,
552  const Teuchos::ArrayView<const Scalar>& data,
553  const size_t LDA,
554  const size_t numVecs) :
555  base_type (map)
556  {
557  using Kokkos::subview;
558  using Teuchos::ArrayView;
559  using Teuchos::av_reinterpret_cast;
560  typedef impl_scalar_type IST;
561  typedef LocalOrdinal LO;
562  typedef GlobalOrdinal GO;
563  typedef typename dual_view_type::host_mirror_space HMS;
564  typedef MakeUnmanagedView<const IST, device_type> view_getter_type;
565  typedef typename view_getter_type::view_type in_view_type;
566  typedef Kokkos::View<IST*, Kokkos::LayoutLeft, HMS> out_view_type;
567  const char tfecfFuncName[] = "MultiVector(map,data,LDA,numVecs): ";
568 
569  // Deep copy constructor, constant stride (NO whichVectors_).
570  // There is no need for a deep copy constructor with nonconstant stride.
571 
572  const size_t lclNumRows =
573  map.is_null () ? size_t (0) : map->getNodeNumElements ();
574  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
575  (LDA < lclNumRows, std::invalid_argument, "LDA = " << LDA << " < "
576  "map->getNodeNumElements() = " << lclNumRows << ".");
577  if (numVecs != 0) {
578  const size_t minNumEntries = LDA * (numVecs - 1) + lclNumRows;
579  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
580  (static_cast<size_t> (data.size ()) < minNumEntries,
581  std::invalid_argument, "Input Teuchos::ArrayView does not have enough "
582  "entries, given the input Map and number of vectors in the MultiVector."
583  " data.size() = " << data.size () << " < (LDA*(numVecs-1)) + "
584  "map->getNodeNumElements () = " << minNumEntries << ".");
585  }
586  view_ = allocDualView<Scalar, LO, GO, Node> (lclNumRows, numVecs);
587  view_.template modify<HMS> ();
588 
589  ArrayView<const IST> X_in_av = av_reinterpret_cast<const IST> (data);
590  in_view_type X_in = view_getter_type::getView (X_in_av);
591  const std::pair<size_t, size_t> rowRng (0, lclNumRows);
592  for (size_t j = 0; j < numVecs; ++j) {
593  const std::pair<size_t, size_t> rng (j*LDA, j*LDA + lclNumRows);
594  in_view_type X_j_in = subview (X_in, rng);
595  out_view_type X_j_out = subview (view_.h_view, rowRng, j);
596  Kokkos::deep_copy (X_j_out, X_j_in);
597  }
598  origView_ = view_;
599  }
600 
601  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
603  MultiVector (const Teuchos::RCP<const map_type>& map,
604  const Teuchos::ArrayView<const ArrayView<const Scalar> >& ArrayOfPtrs,
605  const size_t numVecs) :
606  base_type (map)
607  {
608  using Kokkos::subview;
609  using Teuchos::ArrayView;
610  using Teuchos::av_reinterpret_cast;
611  typedef impl_scalar_type IST;
612  typedef LocalOrdinal LO;
613  typedef GlobalOrdinal GO;
614  typedef typename dual_view_type::host_mirror_space HMS;
615  typedef MakeUnmanagedView<const IST, device_type> view_getter_type;
616  typedef typename view_getter_type::view_type in_view_type;
617  typedef Kokkos::View<IST*, Kokkos::LayoutLeft, HMS> out_view_type;
618  const char tfecfFuncName[] = "MultiVector(map,ArrayOfPtrs,numVecs): ";
619 
620  const size_t lclNumRows =
621  map.is_null () ? size_t (0) : map->getNodeNumElements ();
622  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
623  numVecs < 1 || numVecs != static_cast<size_t> (ArrayOfPtrs.size ()),
624  std::runtime_error,
625  "ArrayOfPtrs.size() must be strictly positive and as large as ArrayOfPtrs.");
626  for (size_t j = 0; j < numVecs; ++j) {
627  ArrayView<const Scalar> X_j_av = ArrayOfPtrs[j];
628  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
629  static_cast<size_t> (X_j_av.size ()) < lclNumRows,
630  std::invalid_argument, "ArrayOfPtrs[" << j << "].size() = "
631  << X_j_av.size () << " < map->getNodeNumElements() = " << lclNumRows
632  << ".");
633  }
634 
635  view_ = allocDualView<Scalar, LO, GO, Node> (lclNumRows, numVecs);
636  view_.template modify<HMS> ();
637 
638  const std::pair<size_t, size_t> rowRng (0, lclNumRows);
639  for (size_t j = 0; j < numVecs; ++j) {
640  ArrayView<const IST> X_j_av =
641  av_reinterpret_cast<const IST> (ArrayOfPtrs[j]);
642  in_view_type X_j_in (X_j_av.getRawPtr (), lclNumRows);
643  out_view_type X_j_out = subview (view_.h_view, rowRng, j);
644  Kokkos::deep_copy (X_j_out, X_j_in);
645  }
646  origView_ = view_;
647  }
648 
649  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
652 
653  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
656  return whichVectors_.empty ();
657  }
658 
659  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
660  size_t
663  {
664  if (this->getMap ().is_null ()) { // possible, due to replaceMap().
665  return static_cast<size_t> (0);
666  } else {
667  return this->getMap ()->getNodeNumElements ();
668  }
669  }
670 
671  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
675  {
676  if (this->getMap ().is_null ()) { // possible, due to replaceMap().
677  return static_cast<size_t> (0);
678  } else {
679  return this->getMap ()->getGlobalNumElements ();
680  }
681  }
682 
683  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
684  size_t
686  getStride () const
687  {
688  if (isConstantStride ()) {
689  // Get stride of view: if second dimension is 0, the
690  // stride might be 0, so take view_dimension instead.
691  size_t stride[8];
692  origView_.stride (stride);
693  const size_t LDA = (origView_.dimension_1 () > 1) ? stride[1] : origView_.dimension_0 ();
694  return LDA;
695  }
696  else {
697  return static_cast<size_t> (0);
698  }
699  }
700 
701  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
702  bool
704  checkSizes (const SrcDistObject& sourceObj)
705  {
706  // Check whether the source object is a MultiVector. If not, then
707  // we can't even compare sizes, so it's definitely not OK to
708  // Import or Export from it.
710  const this_type* src = dynamic_cast<const this_type*> (&sourceObj);
711  if (src == NULL) {
712  return false;
713  } else {
714  // The target of the Import or Export calls checkSizes() in
715  // DistObject::doTransfer(). By that point, we've already
716  // constructed an Import or Export object using the two
717  // multivectors' Maps, which means that (hopefully) we've
718  // already checked other attributes of the multivectors. Thus,
719  // all we need to do here is check the number of columns.
720  return src->getNumVectors () == this->getNumVectors ();
721  }
722  }
723 
724  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
725  size_t
728  return this->getNumVectors ();
729  }
730 
731  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
732  void
734  copyAndPermuteNew (const SrcDistObject& sourceObj,
735  size_t numSameIDs,
736  const Kokkos::View<const LocalOrdinal*, execution_space>& permuteToLIDs,
737  const Kokkos::View<const LocalOrdinal*, execution_space>& permuteFromLIDs)
738  {
739  using Teuchos::ArrayRCP;
740  using Teuchos::ArrayView;
741  using Teuchos::RCP;
742  using Kokkos::Compat::getKokkosViewDeepCopy;
743  using Kokkos::subview;
744  typedef Kokkos::DualView<impl_scalar_type*,
745  typename dual_view_type::array_layout,
746  execution_space> col_dual_view_type;
748  //typedef typename ArrayView<const LocalOrdinal>::size_type size_type; // unused
749  const char tfecfFuncName[] = "copyAndPermute";
750 
751  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
752  permuteToLIDs.size() != permuteFromLIDs.size(), std::runtime_error,
753  ": permuteToLIDs and permuteFromLIDs must have the same size."
754  << std::endl << "permuteToLIDs.size() = " << permuteToLIDs.size ()
755  << " != permuteFromLIDs.size() = " << permuteFromLIDs.size () << ".");
756 
757  // We've already called checkSizes(), so this cast must succeed.
758  const MV& sourceMV = dynamic_cast<const MV&> (sourceObj);
759  const size_t numCols = this->getNumVectors ();
760 
761  // TODO (mfh 15 Sep 2013) When we replace
762  // KokkosClassic::MultiVector with a Kokkos::View, there are two
763  // ways to copy the data:
764  //
765  // 1. Get a (sub)view of each column and call deep_copy on that.
766  // 2. Write a custom kernel to copy the data.
767  //
768  // The first is easier, but the second might be more performant in
769  // case we decide to use layouts other than LayoutLeft. It might
770  // even make sense to hide whichVectors_ in an entirely new layout
771  // for Kokkos Views.
772 
773  // Copy rows [0, numSameIDs-1] of the local multivectors.
774  //
775  // For GPU Nodes: All of this happens using device pointers; this
776  // does not require host views of either source or destination.
777  //
778  // Note (ETP 2 Jul 2014) We need to always copy one column at a
779  // time, even when both multivectors are constant-stride, since
780  // deep_copy between strided subviews with more than one column
781  // doesn't currently work.
782  if (numSameIDs > 0) {
783  const std::pair<size_t, size_t> rows (0, numSameIDs);
784  for (size_t j = 0; j < numCols; ++j) {
785  const size_t dstCol = isConstantStride () ? j : whichVectors_[j];
786  const size_t srcCol =
787  sourceMV.isConstantStride () ? j : sourceMV.whichVectors_[j];
788  col_dual_view_type dst_j =
789  subview (view_, rows, dstCol);
790  col_dual_view_type src_j =
791  subview (sourceMV.view_, rows, srcCol);
792  Kokkos::deep_copy (dst_j, src_j); // Copy src_j into dst_j
793  }
794  }
795 
796  // For the remaining GIDs, execute the permutations. This may
797  // involve noncontiguous access of both source and destination
798  // vectors, depending on the LID lists.
799  //
800  // FIXME (mfh 20 June 2012) For an Export with duplicate GIDs on
801  // the same process, this merges their values by replacement of
802  // the last encountered GID, not by the specified merge rule
803  // (such as ADD).
804 
805  // If there are no permutations, we are done
806  if (permuteFromLIDs.size() == 0 || permuteToLIDs.size() == 0)
807  return;
808 
809  if (this->isConstantStride ()) {
810  KokkosRefactor::Details::permute_array_multi_column(
811  getKokkosView(),
812  sourceMV.getKokkosView(),
813  permuteToLIDs,
814  permuteFromLIDs,
815  numCols);
816  }
817  else {
818  KokkosRefactor::Details::permute_array_multi_column_variable_stride(
819  getKokkosView(),
820  sourceMV.getKokkosView(),
821  permuteToLIDs,
822  permuteFromLIDs,
823  getKokkosViewDeepCopy<execution_space> (whichVectors_ ()),
824  getKokkosViewDeepCopy<execution_space> (sourceMV.whichVectors_ ()),
825  numCols);
826  }
827  }
828 
829  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
830  void
832  packAndPrepareNew (const SrcDistObject& sourceObj,
833  const Kokkos::View<const local_ordinal_type*, execution_space> &exportLIDs,
834  Kokkos::View<impl_scalar_type*, execution_space> &exports,
835  const Kokkos::View<size_t*, execution_space> &numExportPacketsPerLID,
836  size_t& constantNumPackets,
837  Distributor & /* distor */ )
838  {
839  using Teuchos::Array;
840  using Teuchos::ArrayView;
841  using Kokkos::Compat::getKokkosViewDeepCopy;
843  //typedef Array<size_t>::size_type size_type; // unused
844 
845  // If we have no exports, there is nothing to do
846  if (exportLIDs.size () == 0) {
847  return;
848  }
849 
850  // We've already called checkSizes(), so this cast must succeed.
851  const MV& sourceMV = dynamic_cast<const MV&> (sourceObj);
852 
853  // We don't need numExportPacketsPerLID; forestall "unused
854  // variable" compile warnings.
855  (void) numExportPacketsPerLID;
856 
857  /* The layout in the export for MultiVectors is as follows:
858  exports = { all of the data from row exportLIDs.front() ;
859  ....
860  all of the data from row exportLIDs.back() }
861  This doesn't have the best locality, but is necessary because
862  the data for a Packet (all data associated with an LID) is
863  required to be contiguous. */
864 
865  // FIXME (mfh 15 Sep 2013) Would it make sense to rethink the
866  // packing scheme in the above comment? The data going to a
867  // particular process must be contiguous, of course, but those
868  // data could include entries from multiple LIDs. DistObject just
869  // needs to know how to index into that data. Kokkos is good at
870  // decoupling storage intent from data layout choice.
871 
872  const size_t numCols = sourceMV.getNumVectors ();
873 
874  // This spares us from needing to fill numExportPacketsPerLID.
875  // Setting constantNumPackets to a nonzero value signals that
876  // all packets have the same number of entries.
877  constantNumPackets = numCols;
878 
879  const size_t numExportLIDs = exportLIDs.size ();
880  const size_t newExportsSize = numCols * numExportLIDs;
881  if (exports.size () != newExportsSize) {
882  Kokkos::Compat::realloc (exports, newExportsSize);
883  }
884 
885  if (numCols == 1) { // special case for one column only
886  // MultiVector always represents a single column with constant
887  // stride, but it doesn't hurt to implement both cases anyway.
888  //
889  // ETP: I'm not sure I agree with the above statement. Can't a single-
890  // column multivector be a subview of another multi-vector, in which case
891  // sourceMV.whichVectors_[0] != 0 ? I think we have to handle that case
892  // separately.
893  if (sourceMV.isConstantStride ()) {
894  KokkosRefactor::Details::pack_array_single_column(
895  exports,
896  sourceMV.getKokkosView (),
897  exportLIDs,
898  0);
899  }
900  else {
901  KokkosRefactor::Details::pack_array_single_column(
902  exports,
903  sourceMV.getKokkosView (),
904  exportLIDs,
905  sourceMV.whichVectors_[0]);
906  }
907  }
908  else { // the source MultiVector has multiple columns
909  if (sourceMV.isConstantStride ()) {
910  KokkosRefactor::Details::pack_array_multi_column(
911  exports,
912  sourceMV.getKokkosView (),
913  exportLIDs,
914  numCols);
915  }
916  else {
917  KokkosRefactor::Details::pack_array_multi_column_variable_stride(
918  exports,
919  sourceMV.getKokkosView (),
920  exportLIDs,
921  getKokkosViewDeepCopy<execution_space> (sourceMV.whichVectors_ ()),
922  numCols);
923  }
924  }
925  }
926 
927 
928  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
929  void
931  unpackAndCombineNew (const Kokkos::View<const local_ordinal_type*, execution_space> &importLIDs,
932  const Kokkos::View<const impl_scalar_type*, execution_space> &imports,
933  const Kokkos::View<size_t*, execution_space> &numPacketsPerLID,
934  size_t constantNumPackets,
935  Distributor & /* distor */,
936  CombineMode CM)
937  {
938  using Teuchos::ArrayView;
939  using Kokkos::Compat::getKokkosViewDeepCopy;
940  const char tfecfFuncName[] = "unpackAndCombine";
941 
942  // If we have no imports, there is nothing to do
943  if (importLIDs.size () == 0) {
944  return;
945  }
946 
947  // We don't need numPacketsPerLID; forestall "unused variable"
948  // compile warnings.
949  (void) numPacketsPerLID;
950 
951  /* The layout in the export for MultiVectors is as follows:
952  imports = { all of the data from row exportLIDs.front() ;
953  ....
954  all of the data from row exportLIDs.back() }
955  This doesn't have the best locality, but is necessary because
956  the data for a Packet (all data associated with an LID) is
957  required to be contiguous. */
958 
959  const size_t numVecs = getNumVectors ();
960 
961 #ifdef HAVE_TPETRA_DEBUG
962  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
963  static_cast<size_t> (imports.size()) != getNumVectors()*importLIDs.size(),
964  std::runtime_error,
965  ": 'imports' buffer size must be consistent with the amount of data to "
966  "be sent. " << std::endl << "imports.size() = " << imports.size()
967  << " != getNumVectors()*importLIDs.size() = " << getNumVectors() << "*"
968  << importLIDs.size() << " = " << getNumVectors() * importLIDs.size()
969  << ".");
970 
971  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
972  constantNumPackets == static_cast<size_t> (0), std::runtime_error,
973  ": constantNumPackets input argument must be nonzero.");
974 
975  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
976  static_cast<size_t> (numVecs) != static_cast<size_t> (constantNumPackets),
977  std::runtime_error, ": constantNumPackets must equal numVecs.");
978 #endif // HAVE_TPETRA_DEBUG
979 
980  if (numVecs > 0 && importLIDs.size () > 0) {
981 
982  // NOTE (mfh 10 Mar 2012, 24 Mar 2014) If you want to implement
983  // custom combine modes, start editing here. Also, if you trust
984  // inlining, it would be nice to condense this code by using a
985  // binary function object f in the pack functors.
986  if (CM == INSERT || CM == REPLACE) {
987  if (isConstantStride()) {
988  KokkosRefactor::Details::unpack_array_multi_column(
989  getKokkosView(),
990  imports,
991  importLIDs,
992  KokkosRefactor::Details::InsertOp(),
993  numVecs);
994  }
995  else {
996  KokkosRefactor::Details::unpack_array_multi_column_variable_stride(
997  getKokkosView(),
998  imports,
999  importLIDs,
1000  getKokkosViewDeepCopy<execution_space>(whichVectors_ ()),
1001  KokkosRefactor::Details::InsertOp(),
1002  numVecs);
1003  }
1004  }
1005  else if (CM == ADD) {
1006  if (isConstantStride()) {
1007  KokkosRefactor::Details::unpack_array_multi_column(
1008  getKokkosView(),
1009  imports,
1010  importLIDs,
1011  KokkosRefactor::Details::AddOp(),
1012  numVecs);
1013  }
1014  else {
1015  KokkosRefactor::Details::unpack_array_multi_column_variable_stride(
1016  getKokkosView(),
1017  imports,
1018  importLIDs,
1019  getKokkosViewDeepCopy<execution_space>(whichVectors_ ()),
1020  KokkosRefactor::Details::AddOp(),
1021  numVecs);
1022  }
1023  }
1024  else if (CM == ABSMAX) {
1025  if (isConstantStride()) {
1026  KokkosRefactor::Details::unpack_array_multi_column(
1027  getKokkosView(),
1028  imports,
1029  importLIDs,
1030  KokkosRefactor::Details::AbsMaxOp(),
1031  numVecs);
1032  }
1033  else {
1034  KokkosRefactor::Details::unpack_array_multi_column_variable_stride(
1035  getKokkosView(),
1036  imports,
1037  importLIDs,
1038  getKokkosViewDeepCopy<execution_space>(whichVectors_ ()),
1039  KokkosRefactor::Details::AbsMaxOp(),
1040  numVecs);
1041  }
1042  }
1043  else {
1044  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
1045  CM != ADD && CM != REPLACE && CM != INSERT && CM != ABSMAX,
1046  std::invalid_argument, ": Invalid CombineMode: " << CM << ". Valid "
1047  "CombineMode values are ADD, REPLACE, INSERT, and ABSMAX.");
1048  }
1049  }
1050  }
1051 
1052  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
1053  size_t
1056  {
1057  if (isConstantStride ()) {
1058  return static_cast<size_t> (view_.dimension_1 ());
1059  } else {
1060  return static_cast<size_t> (whichVectors_.size ());
1061  }
1062  }
1063 
1064  namespace { // (anonymous)
1065 
1066  template<class RV, class XMV>
1067  void
1068  lclDotImpl (const RV& dotsOut,
1069  const XMV& X_lcl,
1070  const XMV& Y_lcl,
1071  const size_t lclNumRows,
1072  const size_t numVecs,
1073  const Teuchos::ArrayView<const size_t>& whichVecsX,
1074  const Teuchos::ArrayView<const size_t>& whichVecsY,
1075  const bool constantStrideX,
1076  const bool constantStrideY)
1077  {
1078  using Kokkos::ALL;
1079  using Kokkos::subview;
1080  typedef typename RV::non_const_value_type dot_type;
1081 #ifdef HAVE_TPETRA_DEBUG
1082  const char prefix[] = "Tpetra::MultiVector::lclDotImpl: ";
1083 #endif // HAVE_TPETRA_DEBUG
1084 
1085  static_assert (Kokkos::Impl::is_view<RV>::value,
1086  "Tpetra::MultiVector::lclDotImpl: "
1087  "The first argument dotsOut is not a Kokkos::View.");
1088  static_assert (RV::rank == 1, "Tpetra::MultiVector::lclDotImpl: "
1089  "The first argument dotsOut must have rank 1.");
1090  static_assert (Kokkos::Impl::is_view<XMV>::value,
1091  "Tpetra::MultiVector::lclDotImpl: The type of the 2nd and "
1092  "3rd arguments (X_lcl and Y_lcl) is not a Kokkos::View.");
1093  static_assert (XMV::rank == 2, "Tpetra::MultiVector::lclDotImpl: "
1094  "X_lcl and Y_lcl must have rank 2.");
1095 
1096  // In case the input dimensions don't match, make sure that we
1097  // don't overwrite memory that doesn't belong to us, by using
1098  // subset views with the minimum dimensions over all input.
1099  const std::pair<size_t, size_t> rowRng (0, lclNumRows);
1100  const std::pair<size_t, size_t> colRng (0, numVecs);
1101  RV theDots = subview (dotsOut, colRng);
1102  XMV X = subview (X_lcl, rowRng, colRng);
1103  XMV Y = subview (Y_lcl, rowRng, colRng);
1104 
1105 #ifdef HAVE_TPETRA_DEBUG
1106  TEUCHOS_TEST_FOR_EXCEPTION(
1107  lclNumRows != 0 &&
1108  (X.dimension_0 () != lclNumRows || X.dimension_1 () != numVecs),
1109  std::logic_error, prefix << "X's dimensions are " << X.dimension_0 ()
1110  << " x " << X.dimension_1 () << ", which differ from the local "
1111  "dimensions " << lclNumRows << " x " << numVecs << ". "
1112  "Please report this bug to the Tpetra developers.");
1113  TEUCHOS_TEST_FOR_EXCEPTION(
1114  lclNumRows != 0 &&
1115  (Y.dimension_0 () != lclNumRows || Y.dimension_1 () != numVecs),
1116  std::logic_error, prefix << "Y's dimensions are " << Y.dimension_0 ()
1117  << " x " << Y.dimension_1 () << ", which differ from the local "
1118  "dimensions " << lclNumRows << " x " << numVecs << ". "
1119  "Please report this bug to the Tpetra developers.");
1120 #endif // HAVE_TPETRA_DEBUG
1121 
1122  if (lclNumRows == 0) {
1123  const dot_type zero = Kokkos::Details::ArithTraits<dot_type>::zero ();
1124  Kokkos::Impl::ViewFill<RV> (theDots, zero);
1125  }
1126  else { // lclNumRows != 0
1127  if (constantStrideX && constantStrideY) {
1128  if(X.dimension_1() == 1) {
1129  typename RV::non_const_value_type result =
1130  KokkosBlas::dot (Kokkos::subview(X,Kokkos::ALL(),0),
1131  Kokkos::subview(Y,Kokkos::ALL(),0));
1132  Kokkos::deep_copy(theDots,result);
1133  } else
1134  KokkosBlas::dot (theDots, X, Y);
1135  }
1136  else { // not constant stride
1137  // NOTE (mfh 15 Jul 2014) This does a kernel launch for
1138  // every column. It might be better to have a kernel that
1139  // does the work all at once. On the other hand, we don't
1140  // prioritize performance of MultiVector views of
1141  // noncontiguous columns.
1142  for (size_t k = 0; k < numVecs; ++k) {
1143  const size_t X_col = constantStrideX ? k : whichVecsX[k];
1144  const size_t Y_col = constantStrideY ? k : whichVecsY[k];
1145  KokkosBlas::dot (subview (theDots, k), subview (X, ALL (), X_col),
1146  subview (Y, ALL (), Y_col));
1147  } // for each column
1148  } // constantStride
1149  } // lclNumRows != 0
1150  }
1151 
1152  template<class RV>
1153  void
1154  gblDotImpl (const RV& dotsOut,
1155  const Teuchos::RCP<const Teuchos::Comm<int> >& comm,
1156  const bool distributed)
1157  {
1158  using Teuchos::REDUCE_MAX;
1159  using Teuchos::REDUCE_SUM;
1160  using Teuchos::reduceAll;
1161  typedef typename RV::non_const_value_type dot_type;
1162 
1163  const size_t numVecs = dotsOut.dimension_0 ();
1164 
1165  // If the MultiVector is distributed over multiple processes, do
1166  // the distributed (interprocess) part of the dot product. We
1167  // assume that the MPI implementation can read from and write to
1168  // device memory.
1169  //
1170  // replaceMap() may have removed some processes. Those
1171  // processes have a null Map. They must not participate in any
1172  // collective operations. We ask first whether the Map is null,
1173  // because isDistributed() defers that question to the Map. We
1174  // still compute and return local dots for processes not
1175  // participating in collective operations; those probably don't
1176  // make any sense, but it doesn't hurt to do them, since it's
1177  // illegal to call dot() on those processes anyway.
1178  if (distributed && ! comm.is_null ()) {
1179  // The calling process only participates in the collective if
1180  // both the Map and its Comm on that process are nonnull.
1181  //
1182  // MPI doesn't allow aliasing of arguments, so we have to make
1183  // a copy of the local sum.
1184  typename RV::non_const_type lclDots (Kokkos::ViewAllocateWithoutInitializing ("tmp"), numVecs);
1185  Kokkos::deep_copy (lclDots, dotsOut);
1186  const dot_type* const lclSum = lclDots.ptr_on_device ();
1187  dot_type* const gblSum = dotsOut.ptr_on_device ();
1188  const int nv = static_cast<int> (numVecs);
1189  reduceAll<int, dot_type> (*comm, REDUCE_SUM, nv, lclSum, gblSum);
1190  }
1191  }
1192  } // namespace (anonymous)
1193 
1194  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
1195  void
1198  const Kokkos::View<dot_type*, device_type>& dots) const
1199  {
1200  using Kokkos::create_mirror_view;
1201  using Kokkos::subview;
1202  using Teuchos::Comm;
1203  using Teuchos::null;
1204  using Teuchos::RCP;
1205  // View of all the dot product results.
1206  typedef Kokkos::View<dot_type*, device_type> RV;
1207  const char tfecfFuncName[] = "Tpetra::MultiVector::dot: ";
1208 
1209  const size_t numVecs = this->getNumVectors ();
1210  if (numVecs == 0) {
1211  return; // nothing to do
1212  }
1213  const size_t lclNumRows = this->getLocalLength ();
1214  const size_t numDots = static_cast<size_t> (dots.dimension_0 ());
1215 
1216 #ifdef HAVE_TPETRA_DEBUG
1217  {
1218  const bool compat = this->getMap ()->isCompatible (* (A.getMap ()));
1219  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
1220  ! compat, std::invalid_argument, "Tpetra::MultiVector::dot: *this is "
1221  "not compatible with the input MultiVector A. We only test for this "
1222  "in a debug build.");
1223  }
1224 #endif // HAVE_TPETRA_DEBUG
1225 
1226  // FIXME (mfh 11 Jul 2014) These exception tests may not
1227  // necessarily be thrown on all processes consistently. We should
1228  // instead pass along error state with the inner product. We
1229  // could do this by setting an extra slot to
1230  // Kokkos::Details::ArithTraits<dot_type>::one() on error. The
1231  // final sum should be
1232  // Kokkos::Details::ArithTraits<dot_type>::zero() if not error.
1233  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
1234  lclNumRows != A.getLocalLength (), std::runtime_error,
1235  "MultiVectors do not have the same local length. "
1236  "this->getLocalLength() = " << lclNumRows << " != "
1237  "A.getLocalLength() = " << A.getLocalLength () << ".");
1238  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
1239  numVecs != A.getNumVectors (), std::runtime_error,
1240  "MultiVectors must have the same number of columns (vectors). "
1241  "this->getNumVectors() = " << numVecs << " != "
1242  "A.getNumVectors() = " << A.getNumVectors () << ".");
1243  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
1244  numDots != numVecs, std::runtime_error,
1245  "The output array 'dots' must have the same number of entries as the "
1246  "number of columns (vectors) in *this and A. dots.dimension_0() = " <<
1247  numDots << " != this->getNumVectors() = " << numVecs << ".");
1248 
1249  const std::pair<size_t, size_t> colRng (0, numVecs);
1250  RV dotsOut = subview (dots, colRng);
1251  RCP<const Comm<int> > comm = this->getMap ().is_null () ? null :
1252  this->getMap ()->getComm ();
1253 
1254  // FIXME (mfh 05 Mar 2015) DualView flags are not indicative when
1255  // the two memory spaces are the same, so we check the latter.
1256  const bool oneMemorySpace =
1257  Kokkos::Impl::is_same<typename dual_view_type::t_dev::memory_space,
1258  typename dual_view_type::t_host::memory_space>::value;
1259  if (! oneMemorySpace && A.view_.modified_host >= A.view_.modified_device) {
1260  // A was last modified on host, so run the local kernel there.
1261  // This means we need a host mirror of the array of norms too.
1262  typedef typename dual_view_type::t_host XMV;
1263  // I consider it more polite to sync *this, then to sync A.
1264  // A is a "guest" of this method, and is passed in const.
1265  this->view_.template sync<typename XMV::memory_space> ();
1266  lclDotImpl<RV, XMV> (dotsOut, view_.h_view, A.view_.h_view,
1267  lclNumRows, numVecs,
1268  this->whichVectors_, A.whichVectors_,
1269  this->isConstantStride (), A.isConstantStride ());
1270  typename RV::HostMirror dotsOutHost = create_mirror_view (dotsOut);
1271  Kokkos::deep_copy (dotsOutHost, dotsOut);
1272  gblDotImpl<typename RV::HostMirror> (dotsOutHost, comm,
1273  this->isDistributed ());
1274  Kokkos::deep_copy (dotsOut, dotsOutHost);
1275  }
1276  else {
1277  // A was last modified on device, so run the local kernel there.
1278  typedef typename dual_view_type::t_dev XMV;
1279  // I consider it more polite to sync *this, then to sync A.
1280  // A is a "guest" of this method, and is passed in const.
1281  this->view_.template sync<typename XMV::memory_space> ();
1282  lclDotImpl<RV, XMV> (dotsOut, view_.d_view, A.view_.d_view,
1283  lclNumRows, numVecs,
1284  this->whichVectors_, A.whichVectors_,
1285  this->isConstantStride (), A.isConstantStride ());
1286  gblDotImpl<RV> (dotsOut, comm, this->isDistributed ());
1287  }
1288  }
1289 
1290 
1291  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
1292  void
1295  const Teuchos::ArrayView<dot_type>& dots) const
1296  {
1297  typedef Kokkos::View<dot_type*, device_type> dev_dots_view_type;
1298  typedef MakeUnmanagedView<dot_type, device_type> view_getter_type;
1299  typedef typename view_getter_type::view_type host_dots_view_type;
1300 
1301  const size_t numDots = static_cast<size_t> (dots.size ());
1302  host_dots_view_type dotsHostView (dots.getRawPtr (), numDots);
1303  dev_dots_view_type dotsDevView ("MV::dot tmp", numDots);
1304  this->dot (A, dotsDevView); // Do the computation on the device.
1305  Kokkos::deep_copy (dotsHostView, dotsDevView); // Bring back result to host
1306  }
1307 
1308 
1309  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
1310  void
1312  norm2 (const Teuchos::ArrayView<mag_type>& norms) const
1313  {
1314  typedef Kokkos::View<mag_type*, device_type> dev_norms_view_type;
1315  typedef MakeUnmanagedView<mag_type, device_type> view_getter_type;
1316  typedef typename view_getter_type::view_type host_norms_view_type;
1317 
1318  const size_t numNorms = static_cast<size_t> (norms.size ());
1319  host_norms_view_type normsHostView (norms.getRawPtr (), numNorms);
1320  dev_norms_view_type normsDevView ("MV::norm2 tmp", numNorms);
1321  this->norm2 (normsDevView); // Do the computation on the device.
1322  Kokkos::deep_copy (normsHostView, normsDevView); // Bring back result to host
1323  }
1324 
1325 
1326  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
1327  void
1329  norm2 (const Kokkos::View<mag_type*, device_type>& norms) const
1330  {
1331  this->normImpl (norms, NORM_TWO);
1332  }
1333 
1334 
1335  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
1336  void
1339  const Teuchos::ArrayView<mag_type> &norms) const
1340  {
1341  using Kokkos::ALL;
1342  using Kokkos::subview;
1343  using Teuchos::Comm;
1344  using Teuchos::null;
1345  using Teuchos::RCP;
1346  using Teuchos::reduceAll;
1347  using Teuchos::REDUCE_SUM;
1348  typedef Kokkos::Details::ArithTraits<impl_scalar_type> ATS;
1349  typedef Kokkos::Details::ArithTraits<mag_type> ATM;
1350  typedef Kokkos::View<mag_type*, device_type> norms_view_type;
1351  const char tfecfFuncName[] = "normWeighted: ";
1352 
1353  const size_t numVecs = this->getNumVectors ();
1354  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
1355  static_cast<size_t> (norms.size ()) != numVecs, std::runtime_error,
1356  "norms.size() = " << norms.size () << " != this->getNumVectors() = "
1357  << numVecs << ".");
1358 
1359  const bool OneW = (weights.getNumVectors () == 1);
1360  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
1361  ! OneW && weights.getNumVectors () != numVecs, std::runtime_error,
1362  "The input MultiVector of weights must contain either one column, "
1363  "or must have the same number of columns as *this. "
1364  "weights.getNumVectors() = " << weights.getNumVectors ()
1365  << " and this->getNumVectors() = " << numVecs << ".");
1366 
1367 #ifdef HAVE_TPETRA_DEBUG
1368  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
1369  ! this->getMap ()->isCompatible (*weights.getMap ()), std::runtime_error,
1370  "MultiVectors do not have compatible Maps:" << std::endl
1371  << "this->getMap(): " << std::endl << *this->getMap()
1372  << "weights.getMap(): " << std::endl << *weights.getMap() << std::endl);
1373 #else
1374  const size_t lclNumRows = this->getLocalLength ();
1375  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
1376  lclNumRows != weights.getLocalLength (), std::runtime_error,
1377  "MultiVectors do not have the same local length.");
1378 #endif // HAVE_TPETRA_DEBUG
1379 
1380  norms_view_type lclNrms ("lclNrms", numVecs);
1381 
1382  view_.template sync<device_type> ();
1383  weights.view_.template sync<device_type> ();
1384 
1385  typename dual_view_type::t_dev X_lcl = this->view_.d_view;
1386  typename dual_view_type::t_dev W_lcl = weights.view_.d_view;
1387 
1388  if (isConstantStride () && ! OneW) {
1389  KokkosBlas::nrm2w_squared (lclNrms, X_lcl, W_lcl);
1390  }
1391  else {
1392  for (size_t j = 0; j < numVecs; ++j) {
1393  const size_t X_col = this->isConstantStride () ? j :
1394  this->whichVectors_[j];
1395  const size_t W_col = OneW ? static_cast<size_t> (0) :
1396  (weights.isConstantStride () ? j : weights.whichVectors_[j]);
1397  KokkosBlas::nrm2w_squared (subview (lclNrms, j),
1398  subview (X_lcl, ALL (), X_col),
1399  subview (W_lcl, ALL (), W_col));
1400  }
1401  }
1402 
1403  const mag_type OneOverN =
1404  ATM::one () / static_cast<mag_type> (this->getGlobalLength ());
1405  RCP<const Comm<int> > comm = this->getMap ().is_null () ?
1406  Teuchos::null : this->getMap ()->getComm ();
1407 
1408  if (! comm.is_null () && this->isDistributed ()) {
1409  // Assume that MPI can access device memory.
1410  reduceAll<int, mag_type> (*comm, REDUCE_SUM, static_cast<int> (numVecs),
1411  lclNrms.ptr_on_device (), norms.getRawPtr ());
1412  for (size_t k = 0; k < numVecs; ++k) {
1413  norms[k] = ATM::sqrt (norms[k] * OneOverN);
1414  }
1415  }
1416  else {
1417  typename norms_view_type::HostMirror lclNrms_h =
1418  Kokkos::create_mirror_view (lclNrms);
1419  Kokkos::deep_copy (lclNrms_h, lclNrms);
1420  for (size_t k = 0; k < numVecs; ++k) {
1421  norms[k] = ATM::sqrt (ATS::magnitude (lclNrms_h(k)) * OneOverN);
1422  }
1423  }
1424  }
1425 
1426 
1427  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
1428  void
1430  norm1 (const Teuchos::ArrayView<mag_type>& norms) const
1431  {
1432  typedef Kokkos::View<mag_type*, device_type> dev_norms_view_type;
1433  typedef MakeUnmanagedView<mag_type, device_type> view_getter_type;
1434  typedef typename view_getter_type::view_type host_norms_view_type;
1435 
1436  const size_t numNorms = static_cast<size_t> (norms.size ());
1437  host_norms_view_type normsHostView (norms.getRawPtr (), numNorms);
1438  dev_norms_view_type normsDevView ("MV::norm1 tmp", numNorms);
1439  this->norm1 (normsDevView); // Do the computation on the device.
1440  Kokkos::deep_copy (normsHostView, normsDevView); // Bring back result to host
1441  }
1442 
1443 
1444  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
1445  void
1447  norm1 (const Kokkos::View<mag_type*, device_type>& norms) const
1448  {
1449  this->normImpl (norms, NORM_ONE);
1450  }
1451 
1452 
1453  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
1454  void
1456  normInf (const Teuchos::ArrayView<mag_type>& norms) const
1457  {
1458  typedef Kokkos::View<mag_type*, device_type> dev_norms_view_type;
1459  typedef MakeUnmanagedView<mag_type, device_type> view_getter_type;
1460  typedef typename view_getter_type::view_type host_norms_view_type;
1461 
1462  const size_t numNorms = static_cast<size_t> (norms.size ());
1463  host_norms_view_type normsHostView (norms.getRawPtr (), numNorms);
1464  dev_norms_view_type normsDevView ("MV::normInf tmp", numNorms);
1465  this->normInf (normsDevView); // Do the computation on the device.
1466  Kokkos::deep_copy (normsHostView, normsDevView); // Bring back result to host
1467  }
1468 
1469 
1470  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
1471  void
1473  normInf (const Kokkos::View<mag_type*, device_type>& norms) const
1474  {
1475  this->normImpl (norms, NORM_INF);
1476  }
1477 
1478  namespace { // (anonymous)
1479 
1482  IMPL_NORM_ONE, //<! Use the one-norm
1483  IMPL_NORM_TWO, //<! Use the two-norm
1484  IMPL_NORM_INF //<! Use the infinity-norm
1485  };
1486 
1487  template<class RV, class XMV>
1488  void
1489  lclNormImpl (const RV& normsOut,
1490  const XMV& X_lcl,
1491  const size_t lclNumRows,
1492  const size_t numVecs,
1493  const Teuchos::ArrayView<const size_t>& whichVecs,
1494  const bool constantStride,
1495  const EWhichNormImpl whichNorm)
1496  {
1497  using Kokkos::ALL;
1498  using Kokkos::subview;
1499  typedef typename RV::non_const_value_type mag_type;
1500 
1501  static_assert (Kokkos::Impl::is_view<RV>::value,
1502  "Tpetra::MultiVector::lclNormImpl: "
1503  "The first argument RV is not a Kokkos::View.");
1504  static_assert (RV::rank == 1, "Tpetra::MultiVector::lclNormImpl: "
1505  "The first argument normsOut must have rank 1.");
1506  static_assert (Kokkos::Impl::is_view<XMV>::value,
1507  "Tpetra::MultiVector::lclNormImpl: "
1508  "The second argument X_lcl is not a Kokkos::View.");
1509  static_assert (XMV::rank == 2, "Tpetra::MultiVector::lclNormImpl: "
1510  "The second argument X_lcl must have rank 2.");
1511 
1512  // In case the input dimensions don't match, make sure that we
1513  // don't overwrite memory that doesn't belong to us, by using
1514  // subset views with the minimum dimensions over all input.
1515  const std::pair<size_t, size_t> rowRng (0, lclNumRows);
1516  const std::pair<size_t, size_t> colRng (0, numVecs);
1517  RV theNorms = subview (normsOut, colRng);
1518  XMV X = subview (X_lcl, rowRng, colRng);
1519 
1520  // mfh 10 Mar 2015: Kokkos::(Dual)View subviews don't quite
1521  // behave how you think when they have zero rows. In that case,
1522  // it returns a 0 x 0 (Dual)View.
1523  TEUCHOS_TEST_FOR_EXCEPTION(
1524  lclNumRows != 0 && (X.dimension_0 () != lclNumRows || X.dimension_1 () != numVecs),
1525  std::logic_error, "X's dimensions are " << X.dimension_0 () << " x "
1526  << X.dimension_1 () << ", which differ from the local dimensions "
1527  << lclNumRows << " x " << numVecs << ". Please report this bug to "
1528  "the Tpetra developers.");
1529 
1530  if (lclNumRows == 0) {
1531  const mag_type zeroMag = Kokkos::Details::ArithTraits<mag_type>::zero ();
1532  Kokkos::Impl::ViewFill<RV> (theNorms, zeroMag);
1533  }
1534  else { // lclNumRows != 0
1535  if (constantStride) {
1536  if (whichNorm == IMPL_NORM_INF) {
1537  KokkosBlas::nrmInf (theNorms, X);
1538  }
1539  else if (whichNorm == IMPL_NORM_ONE) {
1540  KokkosBlas::nrm1 (theNorms, X);
1541  }
1542  else if (whichNorm == IMPL_NORM_TWO) {
1543  KokkosBlas::nrm2_squared (theNorms, X);
1544  }
1545  else {
1546  TEUCHOS_TEST_FOR_EXCEPTION(true, std::logic_error, "Should never get here!");
1547  }
1548  }
1549  else { // not constant stride
1550  // NOTE (mfh 15 Jul 2014) This does a kernel launch for
1551  // every column. It might be better to have a kernel that
1552  // does the work all at once. On the other hand, we don't
1553  // prioritize performance of MultiVector views of
1554  // noncontiguous columns.
1555  for (size_t k = 0; k < numVecs; ++k) {
1556  const size_t X_col = constantStride ? k : whichVecs[k];
1557  if (whichNorm == IMPL_NORM_INF) {
1558  KokkosBlas::nrmInf (subview (theNorms, k),
1559  subview (X, ALL (), X_col));
1560  }
1561  else if (whichNorm == IMPL_NORM_ONE) {
1562  KokkosBlas::nrm1 (subview (theNorms, k),
1563  subview (X, ALL (), X_col));
1564  }
1565  else if (whichNorm == IMPL_NORM_TWO) {
1566  KokkosBlas::nrm2_squared (subview (theNorms, k),
1567  subview (X, ALL (), X_col));
1568  }
1569  else {
1570  TEUCHOS_TEST_FOR_EXCEPTION(true, std::logic_error, "Should never get here!");
1571  }
1572  } // for each column
1573  } // constantStride
1574  } // lclNumRows != 0
1575  }
1576 
1577  template<class RV>
1578  void
1579  gblNormImpl (const RV& normsOut,
1580  const Teuchos::RCP<const Teuchos::Comm<int> >& comm,
1581  const bool distributed,
1582  const EWhichNormImpl whichNorm)
1583  {
1584  using Teuchos::REDUCE_MAX;
1585  using Teuchos::REDUCE_SUM;
1586  using Teuchos::reduceAll;
1587  typedef typename RV::non_const_value_type mag_type;
1588 
1589  const size_t numVecs = normsOut.dimension_0 ();
1590 
1591  // If the MultiVector is distributed over multiple processes, do
1592  // the distributed (interprocess) part of the norm. We assume
1593  // that the MPI implementation can read from and write to device
1594  // memory.
1595  //
1596  // replaceMap() may have removed some processes. Those processes
1597  // have a null Map. They must not participate in any collective
1598  // operations. We ask first whether the Map is null, because
1599  // isDistributed() defers that question to the Map. We still
1600  // compute and return local norms for processes not participating
1601  // in collective operations; those probably don't make any sense,
1602  // but it doesn't hurt to do them, since it's illegal to call
1603  // norm*() on those processes anyway.
1604  if (distributed && ! comm.is_null ()) {
1605  // The calling process only participates in the collective if
1606  // both the Map and its Comm on that process are nonnull.
1607  //
1608  // MPI doesn't allow aliasing of arguments, so we have to make
1609  // a copy of the local sum.
1610  RV lclNorms ("MV::normImpl lcl", numVecs);
1611  Kokkos::deep_copy (lclNorms, normsOut);
1612  const mag_type* const lclSum = lclNorms.ptr_on_device ();
1613  mag_type* const gblSum = normsOut.ptr_on_device ();
1614  const int nv = static_cast<int> (numVecs);
1615  if (whichNorm == IMPL_NORM_INF) {
1616  reduceAll<int, mag_type> (*comm, REDUCE_MAX, nv, lclSum, gblSum);
1617  } else {
1618  reduceAll<int, mag_type> (*comm, REDUCE_SUM, nv, lclSum, gblSum);
1619  }
1620  }
1621 
1622  if (whichNorm == IMPL_NORM_TWO) {
1623  // Replace the norm-squared results with their square roots in
1624  // place, to get the final output. If the device memory and
1625  // the host memory are the same, it probably doesn't pay to
1626  // launch a parallel kernel for that, since there isn't enough
1627  // parallelism for the typical MultiVector case.
1628  const bool inHostMemory =
1629  Kokkos::Impl::is_same<typename RV::memory_space,
1630  typename RV::host_mirror_space::memory_space>::value;
1631  if (inHostMemory) {
1632  for (size_t j = 0; j < numVecs; ++j) {
1633  normsOut(j) = Kokkos::Details::ArithTraits<mag_type>::sqrt (normsOut(j));
1634  }
1635  }
1636  else {
1637  // There's not as much parallelism now, but that's OK. The
1638  // point of doing parallel dispatch here is to keep the norm
1639  // results on the device, thus avoiding a copy to the host and
1640  // back again.
1641  KokkosBlas::Impl::SquareRootFunctor<RV> f (normsOut);
1642  Kokkos::parallel_for (numVecs, f);
1643  }
1644  }
1645  }
1646 
1647  } // namespace (anonymous)
1648 
1649  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
1650  void
1652  normImpl (const Kokkos::View<mag_type*, device_type>& norms,
1653  const EWhichNorm whichNorm) const
1654  {
1655  using Kokkos::create_mirror_view;
1656  using Kokkos::subview;
1657  using Teuchos::Comm;
1658  using Teuchos::null;
1659  using Teuchos::RCP;
1660  // View of all the norm results.
1661  typedef Kokkos::View<mag_type*, device_type> RV;
1662 
1663  const size_t numVecs = this->getNumVectors ();
1664  if (numVecs == 0) {
1665  return; // nothing to do
1666  }
1667  const size_t lclNumRows = this->getLocalLength ();
1668  const size_t numNorms = static_cast<size_t> (norms.dimension_0 ());
1669  TEUCHOS_TEST_FOR_EXCEPTION(
1670  numNorms < numVecs, std::runtime_error, "Tpetra::MultiVector::normImpl: "
1671  "'norms' must have at least as many entries as the number of vectors in "
1672  "*this. norms.dimension_0() = " << numNorms << " < this->getNumVectors()"
1673  " = " << numVecs << ".");
1674 
1675  const std::pair<size_t, size_t> colRng (0, numVecs);
1676  RV normsOut = subview (norms, colRng);
1677 
1678  EWhichNormImpl lclNormType;
1679  if (whichNorm == NORM_ONE) {
1680  lclNormType = IMPL_NORM_ONE;
1681  } else if (whichNorm == NORM_TWO) {
1682  lclNormType = IMPL_NORM_TWO;
1683  } else {
1684  lclNormType = IMPL_NORM_INF;
1685  }
1686 
1687  RCP<const Comm<int> > comm = this->getMap ().is_null () ? null :
1688  this->getMap ()->getComm ();
1689 
1690  // FIXME (mfh 05 Mar 2015) DualView flags are not indicative when
1691  // the two memory spaces are the same, so we check the latter.
1692  const bool oneMemorySpace =
1693  Kokkos::Impl::is_same<typename dual_view_type::t_dev::memory_space,
1694  typename dual_view_type::t_host::memory_space>::value;
1695  if (! oneMemorySpace && view_.modified_host >= view_.modified_device) {
1696  // DualView was last modified on host, so run the local kernel there.
1697  // This means we need a host mirror of the array of norms too.
1698  typedef typename dual_view_type::t_host XMV;
1699  lclNormImpl<RV, XMV> (normsOut, view_.h_view, lclNumRows, numVecs,
1700  this->whichVectors_, this->isConstantStride (),
1701  lclNormType);
1702  typename RV::HostMirror normsOutHost = create_mirror_view (normsOut);
1703  Kokkos::deep_copy (normsOutHost, normsOut);
1704  gblNormImpl<typename RV::HostMirror> (normsOutHost, comm,
1705  this->isDistributed (),
1706  lclNormType);
1707  Kokkos::deep_copy (normsOut, normsOutHost);
1708  }
1709  else {
1710  // DualView was last modified on device, so run the local kernel there.
1711  typedef typename dual_view_type::t_dev XMV;
1712  lclNormImpl<RV, XMV> (normsOut, view_.d_view, lclNumRows, numVecs,
1713  this->whichVectors_, this->isConstantStride (),
1714  lclNormType);
1715  gblNormImpl<RV> (normsOut, comm, this->isDistributed (), lclNormType);
1716  }
1717  }
1718 
1719  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
1720  void
1722  meanValue (const Teuchos::ArrayView<impl_scalar_type>& means) const
1723  {
1724  // KR FIXME Overload this method to take a View.
1725  using Kokkos::ALL;
1726  using Kokkos::subview;
1727  using Teuchos::Comm;
1728  using Teuchos::RCP;
1729  using Teuchos::reduceAll;
1730  using Teuchos::REDUCE_SUM;
1731  typedef Kokkos::Details::ArithTraits<impl_scalar_type> ATS;
1732 
1733  const size_t lclNumRows = this->getLocalLength ();
1734  const size_t numVecs = this->getNumVectors ();
1735  const size_t numMeans = static_cast<size_t> (means.size ());
1736 
1737  TEUCHOS_TEST_FOR_EXCEPTION(
1738  numMeans != numVecs, std::runtime_error,
1739  "Tpetra::MultiVector::meanValue: means.size() = " << numMeans
1740  << " != this->getNumVectors() = " << numVecs << ".");
1741 
1742  const std::pair<size_t, size_t> rowRng (0, lclNumRows);
1743  const std::pair<size_t, size_t> colRng (0, numVecs);
1744 
1745  // Make sure that the final output view has the same layout as the
1746  // temporary view's HostMirror. Left or Right doesn't matter for
1747  // a 1-D array anyway; this is just to placate the compiler.
1748  typedef Kokkos::View<impl_scalar_type*, device_type> local_view_type;
1749  typedef Kokkos::View<impl_scalar_type*,
1750  typename local_view_type::HostMirror::array_layout,
1751  Kokkos::HostSpace,
1752  Kokkos::MemoryTraits<Kokkos::Unmanaged>,
1753  typename local_view_type::HostMirror::specialize> host_local_view_type;
1754  host_local_view_type meansOut (means.getRawPtr (), numMeans);
1755 
1756  RCP<const Comm<int> > comm = this->getMap ().is_null () ? Teuchos::null :
1757  this->getMap ()->getComm ();
1758 
1759  // FIXME (mfh 05 Mar 2015) DualView flags are not indicative when
1760  // the two memory spaces are the same, so we check the latter.
1761  const bool oneMemorySpace =
1762  Kokkos::Impl::is_same<typename dual_view_type::t_dev::memory_space,
1763  typename dual_view_type::t_host::memory_space>::value;
1764  if (! oneMemorySpace && view_.modified_host >= view_.modified_device) {
1765  // DualView was last modified on host, so run the local kernel there.
1766  typename dual_view_type::t_host X_lcl =
1767  subview (this->view_.h_view, rowRng, colRng);
1768 
1769  // Compute the local sum of each column.
1770  typename local_view_type::HostMirror lclSums ("MV::meanValue tmp", numVecs);
1771  if (isConstantStride ()) {
1772  KokkosBlas::sum (lclSums, X_lcl);
1773  }
1774  else {
1775  for (size_t j = 0; j < numVecs; ++j) {
1776  const size_t col = whichVectors_[j];
1777  KokkosBlas::sum (subview (lclSums, j), subview (X_lcl, ALL (), col));
1778  }
1779  }
1780 
1781  // If there are multiple MPI processes, the all-reduce reads
1782  // from lclSums, and writes to meansOut. Otherwise, we just
1783  // copy lclSums into meansOut.
1784  if (! comm.is_null () && this->isDistributed ()) {
1785  reduceAll (*comm, REDUCE_SUM, static_cast<int> (numVecs),
1786  lclSums.ptr_on_device (), meansOut.ptr_on_device ());
1787  }
1788  else {
1789  Kokkos::deep_copy (meansOut, lclSums);
1790  }
1791  }
1792  else {
1793  // DualView was last modified on device, so run the local kernel there.
1794  typename dual_view_type::t_dev X_lcl =
1795  subview (this->view_.d_view, rowRng, colRng);
1796 
1797  // Compute the local sum of each column.
1798  local_view_type lclSums ("MV::meanValue tmp", numVecs);
1799  if (isConstantStride ()) {
1800  KokkosBlas::sum (lclSums, X_lcl);
1801  }
1802  else {
1803  for (size_t j = 0; j < numVecs; ++j) {
1804  const size_t col = whichVectors_[j];
1805  KokkosBlas::sum (subview (lclSums, j), subview (X_lcl, ALL (), col));
1806  }
1807  }
1808 
1809  // If there are multiple MPI processes, the all-reduce reads
1810  // from lclSums, and writes to meansOut. (We assume that MPI
1811  // can read device memory.) Otherwise, we just copy lclSums
1812  // into meansOut.
1813  if (! comm.is_null () && this->isDistributed ()) {
1814  reduceAll (*comm, REDUCE_SUM, static_cast<int> (numVecs),
1815  lclSums.ptr_on_device (), meansOut.ptr_on_device ());
1816  }
1817  else {
1818  Kokkos::deep_copy (meansOut, lclSums);
1819  }
1820  }
1821 
1822  // mfh 12 Apr 2012: Don't take out the cast from the ordinal type
1823  // to the magnitude type, since operator/ (std::complex<T>, int)
1824  // isn't necessarily defined.
1825  const impl_scalar_type OneOverN =
1826  ATS::one () / static_cast<mag_type> (this->getGlobalLength ());
1827  for (size_t k = 0; k < numMeans; ++k) {
1828  meansOut(k) = meansOut(k) * OneOverN;
1829  }
1830  }
1831 
1832 
1833  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
1834  void
1837  {
1838  using Kokkos::ALL;
1839  using Kokkos::subview;
1840  typedef impl_scalar_type IST;
1841  typedef Kokkos::Details::ArithTraits<IST> ATS;
1842  typedef Kokkos::Random_XorShift64_Pool<typename device_type::execution_space> pool_type;
1843  typedef typename pool_type::generator_type generator_type;
1844 
1845  // Seed the pseudorandom number generator using the calling
1846  // process' rank. This helps decorrelate different process'
1847  // pseudorandom streams. It's not perfect but it's effective and
1848  // doesn't require MPI communication. The seed also includes bits
1849  // from the standard library's rand().
1850  //
1851  // FIXME (mfh 07 Jan 2015) Should we save the seed for later use?
1852  // The code below just makes a new seed each time.
1853 
1854  const uint64_t myRank =
1855  static_cast<uint64_t> (this->getMap ()->getComm ()->getRank ());
1856  uint64_t seed64 = static_cast<uint64_t> (std::rand ()) + myRank + 17311uLL;
1857  unsigned int seed = static_cast<unsigned int> (seed64&0xffffffff);
1858 
1859  pool_type rand_pool (seed);
1860  const IST max = Kokkos::rand<generator_type, IST>::max ();
1861  const IST min = ATS::is_signed ? IST (-max) : ATS::zero ();
1862 
1863  if (isConstantStride ()) {
1864  Kokkos::fill_random (view_.d_view, rand_pool, min, max);
1865  view_.template modify<device_type> ();
1866  }
1867  else {
1868  const size_t numVecs = getNumVectors ();
1869  view_.template sync<device_type> ();
1870  typedef Kokkos::View<IST*, device_type> view_type;
1871  for (size_t k = 0; k < numVecs; ++k) {
1872  const size_t col = whichVectors_[k];
1873  view_type X_k = subview (view_.d_view, ALL (), col);
1874  Kokkos::fill_random (X_k, rand_pool, min, max);
1875  }
1876  view_.template modify<device_type> ();
1877  }
1878  }
1879 
1880 
1881  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
1882  void
1884  putScalar (const Scalar& alpha)
1885  {
1886  using Kokkos::ALL;
1887  using Kokkos::Impl::ViewFill;
1888  using Kokkos::subview;
1889  typedef typename dual_view_type::t_dev::device_type DMS;
1890  typedef typename dual_view_type::t_host::device_type HMS;
1891 
1892  const impl_scalar_type theAlpha = static_cast<impl_scalar_type> (alpha);
1893  const size_t lclNumRows = getLocalLength ();
1894  const size_t numVecs = getNumVectors ();
1895  const std::pair<size_t, size_t> rowRng (0, lclNumRows);
1896  const std::pair<size_t, size_t> colRng (0, numVecs);
1897 
1898  // Modify the most recently updated version of the data. This
1899  // avoids sync'ing, which could violate users' expectations.
1900  if (view_.modified_device >= view_.modified_host) {
1901  //
1902  // Last modified in device memory, so modify data there.
1903  //
1904  // Type of the device memory View of the MultiVector's data.
1905  typedef typename dual_view_type::t_dev mv_view_type;
1906  // Type of a View of a single column of the MultiVector's data.
1907  typedef Kokkos::View<impl_scalar_type*,
1908  typename mv_view_type::array_layout, DMS> vec_view_type;
1909 
1910  this->template modify<DMS> (); // we are about to modify on the device
1911  mv_view_type X =
1912  subview (this->getDualView ().template view<DMS> (),
1913  rowRng, colRng);
1914  if (numVecs == 1) {
1915  vec_view_type X_0 =
1916  subview (X, ALL (), static_cast<size_t> (0));
1917  // The constructor of ViewFill invokes the functor in
1918  // parallel, so we don't have to call parallel_for ourselves.
1919  ViewFill<vec_view_type> vf (X_0, theAlpha);
1920  }
1921  else if (isConstantStride ()) {
1922  ViewFill<mv_view_type> vf (X, theAlpha);
1923  }
1924  else {
1925  for (size_t k = 0; k < numVecs; ++k) {
1926  const size_t col = whichVectors_[k];
1927  vec_view_type X_k = subview (X, ALL (), col);
1928  ViewFill<vec_view_type> vf (X_k, theAlpha);
1929  }
1930  }
1931  }
1932  else { // last modified in host memory, so modify data there.
1933  typedef typename dual_view_type::t_host mv_view_type;
1934  typedef Kokkos::View<impl_scalar_type*,
1935  typename mv_view_type::array_layout, HMS> vec_view_type;
1936 
1937  this->template modify<HMS> (); // we are about to modify on the host
1938  mv_view_type X =
1939  subview (this->getDualView ().template view<HMS> (),
1940  rowRng, colRng);
1941  if (numVecs == 1) {
1942  vec_view_type X_0 =
1943  subview (X, ALL (), static_cast<size_t> (0));
1944  // The constructor of ViewFill invokes the functor in
1945  // parallel, so we don't have to call parallel_for ourselves.
1946  ViewFill<vec_view_type> vf (X_0, theAlpha);
1947  }
1948  else if (isConstantStride ()) {
1949  ViewFill<mv_view_type> vf (X, theAlpha);
1950  }
1951  else {
1952  for (size_t k = 0; k < numVecs; ++k) {
1953  const size_t col = whichVectors_[k];
1954  vec_view_type X_k = subview (X, ALL (), col);
1955  ViewFill<vec_view_type> vf (X_k, theAlpha);
1956  }
1957  }
1958  }
1959  }
1960 
1961 
1962  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
1963  void
1965  replaceMap (const Teuchos::RCP<const map_type>& newMap)
1966  {
1967  using Teuchos::ArrayRCP;
1968  using Teuchos::Comm;
1969  using Teuchos::RCP;
1970 
1971  // mfh 28 Mar 2013: This method doesn't forget whichVectors_, so
1972  // it might work if the MV is a column view of another MV.
1973  // However, things might go wrong when restoring the original
1974  // Map, so we don't allow this case for now.
1975  TEUCHOS_TEST_FOR_EXCEPTION(
1976  ! this->isConstantStride (), std::logic_error,
1977  "Tpetra::MultiVector::replaceMap: This method does not currently work "
1978  "if the MultiVector is a column view of another MultiVector (that is, if "
1979  "isConstantStride() == false).");
1980 
1981  // Case 1: current Map and new Map are both nonnull on this process.
1982  // Case 2: current Map is nonnull, new Map is null.
1983  // Case 3: current Map is null, new Map is nonnull.
1984  // Case 4: both Maps are null: forbidden.
1985  //
1986  // Case 1 means that we don't have to do anything on this process,
1987  // other than assign the new Map. (We always have to do that.)
1988  // It's an error for the user to supply a Map that requires
1989  // resizing in this case.
1990  //
1991  // Case 2 means that the calling process is in the current Map's
1992  // communicator, but will be excluded from the new Map's
1993  // communicator. We don't have to do anything on the calling
1994  // process; just leave whatever data it may have alone.
1995  //
1996  // Case 3 means that the calling process is excluded from the
1997  // current Map's communicator, but will be included in the new
1998  // Map's communicator. This means we need to (re)allocate the
1999  // local DualView if it does not have the right number of rows.
2000  // If the new number of rows is nonzero, we'll fill the newly
2001  // allocated local data with zeros, as befits a projection
2002  // operation.
2003  //
2004  // The typical use case for Case 3 is that the MultiVector was
2005  // first created with the Map with more processes, then that Map
2006  // was replaced with a Map with fewer processes, and finally the
2007  // original Map was restored on this call to replaceMap.
2008 
2009 #ifdef HAVE_TEUCHOS_DEBUG
2010  // mfh 28 Mar 2013: We can't check for compatibility across the
2011  // whole communicator, unless we know that the current and new
2012  // Maps are nonnull on _all_ participating processes.
2013  // TEUCHOS_TEST_FOR_EXCEPTION(
2014  // origNumProcs == newNumProcs && ! this->getMap ()->isCompatible (*map),
2015  // std::invalid_argument, "Tpetra::MultiVector::project: "
2016  // "If the input Map's communicator is compatible (has the same number of "
2017  // "processes as) the current Map's communicator, then the two Maps must be "
2018  // "compatible. The replaceMap() method is not for data redistribution; "
2019  // "use Import or Export for that purpose.");
2020 
2021  // TODO (mfh 28 Mar 2013) Add compatibility checks for projections
2022  // of the Map, in case the process counts don't match.
2023 #endif // HAVE_TEUCHOS_DEBUG
2024 
2025  if (this->getMap ().is_null ()) { // current Map is null
2026  // If this->getMap() is null, that means that this MultiVector
2027  // has already had replaceMap happen to it. In that case, just
2028  // reallocate the DualView with the right size.
2029 
2030  TEUCHOS_TEST_FOR_EXCEPTION(
2031  newMap.is_null (), std::invalid_argument,
2032  "Tpetra::MultiVector::replaceMap: both current and new Maps are null. "
2033  "This probably means that the input Map is incorrect.");
2034 
2035  // Case 3: current Map is null, new Map is nonnull.
2036  // Reallocate the DualView with the right dimensions.
2037  const size_t newNumRows = newMap->getNodeNumElements ();
2038  const size_t origNumRows = view_.dimension_0 ();
2039  const size_t numCols = this->getNumVectors ();
2040 
2041  if (origNumRows != newNumRows || view_.dimension_1 () != numCols) {
2042  view_ = allocDualView<Scalar, LocalOrdinal, GlobalOrdinal, Node> (newNumRows, numCols);
2043  }
2044  }
2045  else if (newMap.is_null ()) { // Case 2: current Map is nonnull, new Map is null
2046  // I am an excluded process. Reinitialize my data so that I
2047  // have 0 rows. Keep the number of columns as before.
2048  const size_t newNumRows = static_cast<size_t> (0);
2049  const size_t numCols = this->getNumVectors ();
2050  view_ = allocDualView<Scalar, LocalOrdinal, GlobalOrdinal, Node> (newNumRows, numCols);
2051  }
2052 
2053  this->map_ = newMap;
2054  }
2055 
2056  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
2057  void
2059  scale (const Scalar& alpha)
2060  {
2061  using Kokkos::ALL;
2062  using Kokkos::subview;
2063 
2064  const impl_scalar_type theAlpha = static_cast<impl_scalar_type> (alpha);
2065  if (theAlpha == Kokkos::Details::ArithTraits<impl_scalar_type>::one ()) {
2066  return; // do nothing
2067  }
2068  const size_t lclNumRows = this->getLocalLength ();
2069  const size_t numVecs = this->getNumVectors ();
2070  const std::pair<size_t, size_t> rowRng (0, lclNumRows);
2071  const std::pair<size_t, size_t> colRng (0, numVecs);
2072 
2073  typedef typename dual_view_type::t_dev dev_view_type;
2074  typedef typename dual_view_type::t_host host_view_type;
2075 
2076  // We can't substitute putScalar(0.0) for scale(0.0), because the
2077  // former will overwrite NaNs present in the MultiVector. The
2078  // semantics of this call require multiplying them by 0, which
2079  // IEEE 754 requires to be NaN.
2080 
2081  // FIXME (mfh 05 Mar 2015) DualView flags are not indicative when
2082  // the two memory spaces are the same, so we check the latter.
2083  const bool oneMemorySpace =
2084  Kokkos::Impl::is_same<typename dev_view_type::memory_space,
2085  typename host_view_type::memory_space>::value;
2086  if (! oneMemorySpace && view_.modified_host >= view_.modified_device) {
2087  auto Y_lcl = subview (this->view_.h_view, rowRng, colRng);
2088 
2089  if (isConstantStride ()) {
2090  KokkosBlas::scal (Y_lcl, theAlpha, Y_lcl);
2091  }
2092  else {
2093  for (size_t k = 0; k < numVecs; ++k) {
2094  const size_t Y_col = this->isConstantStride () ? k : this->whichVectors_[k];
2095  auto Y_k = subview (Y_lcl, ALL (), Y_col);
2096  KokkosBlas::scal (Y_k, theAlpha, Y_k);
2097  }
2098  }
2099  }
2100  else { // work on device
2101  auto Y_lcl = subview (this->view_.d_view, rowRng, colRng);
2102 
2103  if (isConstantStride ()) {
2104  KokkosBlas::scal (Y_lcl, theAlpha, Y_lcl);
2105  }
2106  else {
2107  for (size_t k = 0; k < numVecs; ++k) {
2108  const size_t Y_col = this->isConstantStride () ? k : this->whichVectors_[k];
2109  auto Y_k = subview (Y_lcl, ALL (), Y_col);
2110  KokkosBlas::scal (Y_k, theAlpha, Y_k);
2111  }
2112  }
2113  }
2114  }
2115 
2116 
2117  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
2118  void
2120  scale (const Teuchos::ArrayView<const Scalar>& alphas)
2121  {
2122  const size_t numVecs = this->getNumVectors ();
2123  const size_t numAlphas = static_cast<size_t> (alphas.size ());
2124  TEUCHOS_TEST_FOR_EXCEPTION(
2125  numAlphas != numVecs, std::invalid_argument, "Tpetra::MultiVector::"
2126  "scale: alphas.size() = " << numAlphas << " != this->getNumVectors() = "
2127  << numVecs << ".");
2128 
2129  // Use a DualView to copy the scaling constants onto the device.
2130  typedef Kokkos::DualView<impl_scalar_type*, device_type> k_alphas_type ;
2131  k_alphas_type k_alphas ("alphas::tmp", numAlphas);
2132  k_alphas.template modify<typename k_alphas_type::host_mirror_space> ();
2133  for (size_t i = 0; i < numAlphas; ++i) {
2134  k_alphas.h_view(i) = static_cast<impl_scalar_type> (alphas[i]);
2135  }
2136  k_alphas.template sync<typename k_alphas_type::memory_space> ();
2137  // Invoke the scale() overload that takes a device View of coefficients.
2138  this->scale (k_alphas.d_view);
2139  }
2140 
2141  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
2142  void
2144  scale (const Kokkos::View<const impl_scalar_type*, device_type>& alphas)
2145  {
2146  using Kokkos::ALL;
2147  using Kokkos::subview;
2148 
2149  const size_t lclNumRows = this->getLocalLength ();
2150  const size_t numVecs = this->getNumVectors ();
2151  TEUCHOS_TEST_FOR_EXCEPTION(
2152  static_cast<size_t> (alphas.dimension_0 ()) != numVecs,
2153  std::invalid_argument, "Tpetra::MultiVector::scale(alphas): "
2154  "alphas.dimension_0() = " << alphas.dimension_0 ()
2155  << " != this->getNumVectors () = " << numVecs << ".");
2156  const std::pair<size_t, size_t> rowRng (0, lclNumRows);
2157  const std::pair<size_t, size_t> colRng (0, numVecs);
2158 
2159  typedef typename dual_view_type::t_dev dev_view_type;
2160  typedef typename dual_view_type::t_host host_view_type;
2161  // NOTE (mfh 08 Apr 2015) We prefer to let the compiler deduce the
2162  // type of the return value of subview. This is because if we
2163  // switch the array layout from LayoutLeft to LayoutRight
2164  // (preferred for performance of block operations), the types
2165  // below won't be valid. (A view of a column of a LayoutRight
2166  // multivector has LayoutStride, not LayoutLeft.)
2167 
2168  const bool oneMemorySpace =
2169  Kokkos::Impl::is_same<typename dev_view_type::memory_space,
2170  typename host_view_type::memory_space>::value;
2171  if (! oneMemorySpace &&
2172  this->view_.modified_host >= this->view_.modified_device) {
2173  // Work in host memory. This means we need to create a host
2174  // mirror of the input View of coefficients.
2175  typedef Kokkos::View<const impl_scalar_type*,
2176  execution_space> input_view_type;
2177  typename input_view_type::HostMirror alphas_h =
2178  Kokkos::create_mirror_view (alphas);
2179  Kokkos::deep_copy (alphas_h, alphas);
2180 
2181  auto Y_lcl = subview (this->view_.h_view, rowRng, colRng);
2182 
2183  if (isConstantStride ()) {
2184  KokkosBlas::scal (Y_lcl, alphas_h, Y_lcl);
2185  }
2186  else {
2187  for (size_t k = 0; k < numVecs; ++k) {
2188  const size_t Y_col = this->isConstantStride () ? k : this->whichVectors_[k];
2189  auto Y_k = subview (Y_lcl, ALL (), Y_col);
2190  // We don't have to use the entire 1-D View here; we can use
2191  // the version that takes a scalar coefficient.
2192  KokkosBlas::scal (Y_k, alphas_h(k), Y_k);
2193  }
2194  }
2195  }
2196  else { // Work in device memory, using the input View 'alphas' directly.
2197  auto Y_lcl = subview (this->view_.d_view, rowRng, colRng);
2198 
2199  if (isConstantStride ()) {
2200  KokkosBlas::scal (Y_lcl, alphas, Y_lcl);
2201  }
2202  else {
2203  for (size_t k = 0; k < numVecs; ++k) {
2204  const size_t Y_col = this->isConstantStride () ? k : this->whichVectors_[k];
2205  auto Y_k = subview (Y_lcl, ALL (), Y_col);
2206  //
2207  // FIXME (mfh 08 Apr 2015) This assumes UVM. It would be
2208  // better to fix scal() so that it takes a 0-D View as the
2209  // second argument.
2210  //
2211  KokkosBlas::scal (Y_k, alphas(k), Y_k);
2212  }
2213  }
2214  }
2215  }
2216 
2217  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
2218  void
2220  scale (const Scalar& alpha,
2222  {
2223  using Kokkos::ALL;
2224  using Kokkos::subview;
2225  const char tfecfFuncName[] = "scale: ";
2226 
2227  const size_t lclNumRows = getLocalLength ();
2228  const size_t numVecs = getNumVectors ();
2229 
2230  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
2231  lclNumRows != A.getLocalLength (), std::invalid_argument,
2232  "this->getLocalLength() = " << lclNumRows << " != A.getLocalLength() = "
2233  << A.getLocalLength () << ".");
2234  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
2235  numVecs != A.getNumVectors (), std::invalid_argument,
2236  "this->getNumVectors() = " << numVecs << " != A.getNumVectors() = "
2237  << A.getNumVectors () << ".");
2238 
2239  const impl_scalar_type theAlpha = static_cast<impl_scalar_type> (alpha);
2240  const std::pair<size_t, size_t> rowRng (0, lclNumRows);
2241  const std::pair<size_t, size_t> colRng (0, numVecs);
2242 
2243  typedef typename dual_view_type::t_dev dev_view_type;
2244  typedef typename dual_view_type::t_host host_view_type;
2245 
2246  // FIXME (mfh 05 Mar 2015) DualView flags are not indicative when
2247  // the two memory spaces are the same, so we check the latter.
2248  const bool oneMemorySpace =
2249  Kokkos::Impl::is_same<typename dev_view_type::memory_space,
2250  typename host_view_type::memory_space>::value;
2251  if (! oneMemorySpace && A.view_.modified_host >= A.view_.modified_device) {
2252  // Work on host, where A's data were most recently modified. A
2253  // is a "guest" of this method, so it's more polite to sync
2254  // *this, than to sync A.
2255  this->view_.template sync<typename host_view_type::memory_space> ();
2256  this->view_.template modify<typename host_view_type::memory_space> ();
2257  auto Y_lcl = subview (this->view_.h_view, rowRng, colRng);
2258  auto X_lcl = subview (A.view_.h_view, rowRng, colRng);
2259 
2260  if (isConstantStride () && A.isConstantStride ()) {
2261  KokkosBlas::scal (Y_lcl, theAlpha, X_lcl);
2262  }
2263  else {
2264  // Make sure that Kokkos only uses the local length for add.
2265  for (size_t k = 0; k < numVecs; ++k) {
2266  const size_t Y_col = this->isConstantStride () ? k : this->whichVectors_[k];
2267  const size_t X_col = A.isConstantStride () ? k : A.whichVectors_[k];
2268  auto Y_k = subview (Y_lcl, ALL (), Y_col);
2269  auto X_k = subview (X_lcl, ALL (), X_col);
2270 
2271  KokkosBlas::scal (Y_k, theAlpha, X_k);
2272  }
2273  }
2274  }
2275  else { // work on device
2276  // A is a "guest" of this method, so it's more polite to sync
2277  // *this, than to sync A.
2278  this->view_.template sync<typename dev_view_type::memory_space> ();
2279  this->view_.template modify<typename dev_view_type::memory_space> ();
2280  auto Y_lcl = subview (this->view_.d_view, rowRng, colRng);
2281  auto X_lcl = subview (A.view_.d_view, rowRng, colRng);
2282 
2283  if (isConstantStride () && A.isConstantStride ()) {
2284  KokkosBlas::scal (Y_lcl, theAlpha, X_lcl);
2285  }
2286  else {
2287  // Make sure that Kokkos only uses the local length for add.
2288  for (size_t k = 0; k < numVecs; ++k) {
2289  const size_t Y_col = this->isConstantStride () ? k : this->whichVectors_[k];
2290  const size_t X_col = A.isConstantStride () ? k : A.whichVectors_[k];
2291  auto Y_k = subview (Y_lcl, ALL (), Y_col);
2292  auto X_k = subview (X_lcl, ALL (), X_col);
2293 
2294  KokkosBlas::scal (Y_k, theAlpha, X_k);
2295  }
2296  }
2297  }
2298  }
2299 
2300 
2301  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
2302  void
2305  {
2306  const char tfecfFuncName[] = "reciprocal";
2307 
2308  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
2309  getLocalLength () != A.getLocalLength (), std::runtime_error,
2310  ": MultiVectors do not have the same local length. "
2311  "this->getLocalLength() = " << getLocalLength ()
2312  << " != A.getLocalLength() = " << A.getLocalLength () << ".");
2313  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
2314  A.getNumVectors () != this->getNumVectors (), std::runtime_error,
2315  ": MultiVectors do not have the same number of columns (vectors). "
2316  "this->getNumVectors() = " << getNumVectors ()
2317  << " != A.getNumVectors() = " << A.getNumVectors () << ".");
2318 
2319  // FIXME (mfh 07 Jan 2015) See note on two-argument scale() above.
2320 
2321  const size_t numVecs = getNumVectors ();
2322  try {
2323  if (isConstantStride () && A.isConstantStride ()) {
2324  view_.template sync<device_type> ();
2325  view_.template modify<device_type> ();
2326  KokkosBlas::reciprocal (view_.d_view, A.view_.d_view);
2327  }
2328  else {
2329  using Kokkos::ALL;
2330  using Kokkos::subview;
2331  typedef Kokkos::View<impl_scalar_type*, device_type> view_type;
2332 
2333  view_.template sync<device_type> ();
2334  view_.template modify<device_type> ();
2335 
2336  // FIXME (mfh 23 Jul 2014) I'm not sure if it should be our
2337  // responsibility to sync A.
2338  A.view_.template sync<device_type> ();
2339  A.view_.template modify<device_type> ();
2340 
2341  for (size_t k = 0; k < numVecs; ++k) {
2342  const size_t this_col = isConstantStride () ? k : whichVectors_[k];
2343  view_type vector_k = subview (view_.d_view, ALL (), this_col);
2344  const size_t A_col = isConstantStride () ? k : A.whichVectors_[k];
2345  view_type vector_Ak = subview (A.view_.d_view, ALL (), A_col);
2346  KokkosBlas::reciprocal(vector_k, vector_Ak);
2347  }
2348  }
2349  }
2350  catch (std::runtime_error &e) {
2351  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(true, std::runtime_error,
2352  ": Caught exception from Kokkos: " << e.what () << std::endl);
2353  }
2354  }
2355 
2356 
2357  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
2358  void
2361  {
2362  const char tfecfFuncName[] = "abs";
2363  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
2364  getLocalLength () != A.getLocalLength (), std::runtime_error,
2365  ": MultiVectors do not have the same local length. "
2366  "this->getLocalLength() = " << getLocalLength ()
2367  << " != A.getLocalLength() = " << A.getLocalLength () << ".");
2368  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
2369  A.getNumVectors () != this->getNumVectors (), std::runtime_error,
2370  ": MultiVectors do not have the same number of columns (vectors). "
2371  "this->getNumVectors() = " << getNumVectors ()
2372  << " != A.getNumVectors() = " << A.getNumVectors () << ".");
2373 
2374  // FIXME (mfh 07 Jan 2015) See note on two-argument scale() above.
2375 
2376  const size_t numVecs = getNumVectors ();
2377  if (isConstantStride () && A.isConstantStride ()) {
2378  view_.template sync<device_type>();
2379  view_.template modify<device_type>();
2380  KokkosBlas::abs (view_.d_view, A.view_.d_view);
2381  }
2382  else {
2383  using Kokkos::ALL;
2384  using Kokkos::subview;
2385  typedef Kokkos::View<impl_scalar_type*, device_type> view_type;
2386 
2387  view_.template sync<device_type> ();
2388  view_.template modify<device_type> ();
2389  A.view_.template sync<device_type> ();
2390  A.view_.template modify<device_type> ();
2391 
2392  for (size_t k=0; k < numVecs; ++k) {
2393  const size_t this_col = isConstantStride () ? k : whichVectors_[k];
2394  view_type vector_k = subview (view_.d_view, ALL (), this_col);
2395  const size_t A_col = isConstantStride () ? k : A.whichVectors_[k];
2396  view_type vector_Ak = subview (A.view_.d_view, ALL (), A_col);
2397  KokkosBlas::abs (vector_k, vector_Ak);
2398  }
2399  }
2400  }
2401 
2402 
2403  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
2404  void
2406  update (const Scalar& alpha,
2408  const Scalar& beta)
2409  {
2410  using Kokkos::ALL;
2411  using Kokkos::subview;
2412  const char tfecfFuncName[] = "update: ";
2413 
2414  const size_t lclNumRows = getLocalLength ();
2415  const size_t numVecs = getNumVectors ();
2416 
2417  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
2418  lclNumRows != A.getLocalLength (), std::invalid_argument,
2419  "this->getLocalLength() = " << lclNumRows << " != A.getLocalLength() = "
2420  << A.getLocalLength () << ".");
2421  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
2422  numVecs != A.getNumVectors (), std::invalid_argument,
2423  "this->getNumVectors() = " << numVecs << " != A.getNumVectors() = "
2424  << A.getNumVectors () << ".");
2425 
2426  const impl_scalar_type theAlpha = static_cast<impl_scalar_type> (alpha);
2427  const impl_scalar_type theBeta = static_cast<impl_scalar_type> (beta);
2428  const std::pair<size_t, size_t> rowRng (0, lclNumRows);
2429  const std::pair<size_t, size_t> colRng (0, numVecs);
2430 
2431  typedef typename dual_view_type::t_dev dev_view_type;
2432  typedef typename dual_view_type::t_host host_view_type;
2433 
2434  // FIXME (mfh 05 Mar 2015) DualView flags are not indicative when
2435  // the two memory spaces are the same, so we check the latter.
2436  const bool oneMemorySpace =
2437  Kokkos::Impl::is_same<typename dev_view_type::memory_space,
2438  typename host_view_type::memory_space>::value;
2439  if (! oneMemorySpace && A.view_.modified_host >= A.view_.modified_device) {
2440  // Work on host, where A's data were most recently modified. A
2441  // is a "guest" of this method, so it's more polite to sync
2442  // *this, than to sync A.
2443  this->view_.template sync<typename host_view_type::memory_space> ();
2444  this->view_.template modify<typename host_view_type::memory_space> ();
2445  auto Y_lcl = subview (this->view_.h_view, rowRng, colRng);
2446  auto X_lcl = subview (A.view_.h_view, rowRng, colRng);
2447 
2448  if (isConstantStride () && A.isConstantStride ()) {
2449  KokkosBlas::axpby (theAlpha, X_lcl, theBeta, Y_lcl);
2450  }
2451  else {
2452  // Make sure that Kokkos only uses the local length for add.
2453  for (size_t k = 0; k < numVecs; ++k) {
2454  const size_t Y_col = this->isConstantStride () ? k : this->whichVectors_[k];
2455  const size_t X_col = A.isConstantStride () ? k : A.whichVectors_[k];
2456  auto Y_k = subview (Y_lcl, ALL (), Y_col);
2457  auto X_k = subview (X_lcl, ALL (), X_col);
2458 
2459  KokkosBlas::axpby (theAlpha, X_k, theBeta, Y_k);
2460  }
2461  }
2462  }
2463  else { // work on device
2464  // A is a "guest" of this method, so it's more polite to sync
2465  // *this, than to sync A.
2466  this->view_.template sync<typename dev_view_type::memory_space> ();
2467  this->view_.template modify<typename dev_view_type::memory_space> ();
2468  auto Y_lcl = subview (this->view_.d_view, rowRng, colRng);
2469  auto X_lcl = subview (A.view_.d_view, rowRng, colRng);
2470 
2471  if (isConstantStride () && A.isConstantStride ()) {
2472  KokkosBlas::axpby (theAlpha, X_lcl, theBeta, Y_lcl);
2473  }
2474  else {
2475  // Make sure that Kokkos only uses the local length for add.
2476  for (size_t k = 0; k < numVecs; ++k) {
2477  const size_t Y_col = this->isConstantStride () ? k : this->whichVectors_[k];
2478  const size_t X_col = A.isConstantStride () ? k : A.whichVectors_[k];
2479  auto Y_k = subview (Y_lcl, ALL (), Y_col);
2480  auto X_k = subview (X_lcl, ALL (), X_col);
2481 
2482  KokkosBlas::axpby (theAlpha, X_k, theBeta, Y_k);
2483  }
2484  }
2485  }
2486  }
2487 
2488  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
2489  void
2491  update (const Scalar& alpha,
2493  const Scalar& beta,
2495  const Scalar& gamma)
2496  {
2497  using Kokkos::ALL;
2498  using Kokkos::subview;
2499  const char tfecfFuncName[] = "update(alpha,A,beta,B,gamma): ";
2500 
2501  const size_t lclNumRows = this->getLocalLength ();
2502  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
2503  lclNumRows != A.getLocalLength (), std::invalid_argument,
2504  "The input MultiVector A has " << A.getLocalLength () << " local "
2505  "row(s), but this MultiVector has " << lclNumRows << " local "
2506  "row(s).");
2507  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
2508  lclNumRows != B.getLocalLength (), std::invalid_argument,
2509  "The input MultiVector B has " << B.getLocalLength () << " local "
2510  "row(s), but this MultiVector has " << lclNumRows << " local "
2511  "row(s).");
2512  const size_t numVecs = getNumVectors ();
2513  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
2514  A.getNumVectors () != numVecs, std::invalid_argument,
2515  "The input MultiVector A has " << A.getNumVectors () << " column(s), "
2516  "but this MultiVector has " << numVecs << " column(s).");
2517  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
2518  B.getNumVectors () != numVecs, std::invalid_argument,
2519  "The input MultiVector B has " << B.getNumVectors () << " column(s), "
2520  "but this MultiVector has " << numVecs << " column(s).");
2521 
2522  const impl_scalar_type theAlpha = static_cast<impl_scalar_type> (alpha);
2523  const impl_scalar_type theBeta = static_cast<impl_scalar_type> (beta);
2524  const impl_scalar_type theGamma = static_cast<impl_scalar_type> (gamma);
2525 
2526  // We're lucky if *this, A, and B are all sync'd to the same
2527  // memory space. If not, we have to sync _something_. Unlike
2528  // three-argument update() or (say) dot(), we may have to sync one
2529  // of the inputs. For now, we just sync _everything_ to device.
2530  this->view_.template sync<typename dual_view_type::t_dev::memory_space> ();
2531  A.view_.template sync<typename dual_view_type::t_dev::memory_space> ();
2532  B.view_.template sync<typename dual_view_type::t_dev::memory_space> ();
2533 
2534  // This method modifies *this.
2535  this->template modify<typename dual_view_type::t_dev::memory_space> ();
2536 
2537  const std::pair<size_t, size_t> rowRng (0, lclNumRows);
2538  const std::pair<size_t, size_t> colRng (0, numVecs);
2539 
2540  // Prefer 'auto' over specifying the type explicitly. This avoids
2541  // issues with a subview possibly having a different type than the
2542  // original view.
2543  auto C_lcl = subview (this->view_.d_view, rowRng, colRng);
2544  auto A_lcl = subview (A.view_.d_view, rowRng, colRng);
2545  auto B_lcl = subview (B.view_.d_view, rowRng, colRng);
2546 
2547  if (isConstantStride () && A.isConstantStride () && B.isConstantStride ()) {
2548  KokkosBlas::update (theAlpha, A_lcl, theBeta, B_lcl, theGamma, C_lcl);
2549  }
2550  else {
2551  // Some input (or *this) is not constant stride,
2552  // so perform the update one column at a time.
2553  for (size_t k = 0; k < numVecs; ++k) {
2554  const size_t this_col = isConstantStride () ? k : whichVectors_[k];
2555  const size_t A_col = A.isConstantStride () ? k : A.whichVectors_[k];
2556  const size_t B_col = B.isConstantStride () ? k : B.whichVectors_[k];
2557  KokkosBlas::update (theAlpha, subview (A_lcl, rowRng, A_col),
2558  theBeta, subview (B_lcl, rowRng, B_col),
2559  theGamma, subview (C_lcl, rowRng, this_col));
2560  }
2561  }
2562  }
2563 
2564  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
2565  Teuchos::ArrayRCP<const Scalar>
2567  getData (size_t j) const
2568  {
2569  using Kokkos::ALL;
2570  using Kokkos::subview;
2571  typedef typename dual_view_type::host_mirror_space host_type;
2572  typedef typename dual_view_type::t_host host_view_type;
2573 
2574  // Any MultiVector method that called the (classic) Kokkos Node's
2575  // viewBuffer or viewBufferNonConst methods always implied a
2576  // device->host synchronization. Thus, we synchronize here as
2577  // well.
2578  view_.template sync<host_type> ();
2579 
2580  // Get a host view of the entire MultiVector's data.
2581  host_view_type hostView = view_.template view<host_type> ();
2582  // Get a subview of column j.
2583  host_view_type hostView_j;
2584 
2585  const size_t colStart = isConstantStride () ? j : whichVectors_[j];
2586  const std::pair<size_t, size_t> colRng (colStart, colStart+1);
2587  hostView_j = subview (hostView, ALL (), colRng);
2588 
2589  // Wrap up the subview of column j in an ArrayRCP<const impl_scalar_type>.
2590  Teuchos::ArrayRCP<const impl_scalar_type> dataAsArcp =
2591  Kokkos::Compat::persistingView (hostView_j, 0, getLocalLength ());
2592 
2593 #ifdef HAVE_TPETRA_DEBUG
2594  TEUCHOS_TEST_FOR_EXCEPTION(
2595  static_cast<size_t>(hostView_j.dimension_0 ()) < static_cast<size_t>(dataAsArcp.size ()), std::logic_error,
2596  "Tpetra::MultiVector::getData: hostView_j.dimension_0() = "
2597  << hostView_j.dimension_0 () << " < dataAsArcp.size() = "
2598  << dataAsArcp.size () << ". "
2599  "Please report this bug to the Tpetra developers.");
2600 #endif // HAVE_TPETRA_DEBUG
2601 
2602  return Teuchos::arcp_reinterpret_cast<const Scalar> (dataAsArcp);
2603  }
2604 
2605  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
2606  Teuchos::ArrayRCP<Scalar>
2609  {
2610  using Kokkos::ALL;
2611  using Kokkos::subview;
2612  typedef typename dual_view_type::host_mirror_space host_type;
2613  typedef typename dual_view_type::t_host host_view_type;
2614 
2615  // Any MultiVector method that called the (classic) Kokkos Node's
2616  // viewBuffer or viewBufferNonConst methods always implied a
2617  // device->host synchronization. Thus, we synchronize here as
2618  // well.
2619  view_.template sync<host_type> ();
2620 
2621  // Get a host view of the entire MultiVector's data.
2622  host_view_type hostView = view_.template view<host_type> ();
2623  // Get a subview of column j.
2624  host_view_type hostView_j;
2625  if (isConstantStride ()) {
2626  hostView_j = subview (hostView, ALL (), Kokkos::pair<int,int>(j,j+1));
2627  } else {
2628  hostView_j = subview (hostView, ALL (), Kokkos::pair<int,int>(whichVectors_[j],whichVectors_[j]+1));
2629  }
2630 
2631  // Calling getDataNonConst() implies that the user plans to modify
2632  // the values in the MultiVector, so we call modify on the view
2633  // here.
2634  view_.template modify<host_type> ();
2635 
2636  // Wrap up the subview of column j in an ArrayRCP<const impl_scalar_type>.
2637  Teuchos::ArrayRCP<impl_scalar_type> dataAsArcp =
2638  Kokkos::Compat::persistingView (hostView_j, 0, getLocalLength ());
2639 
2640 #ifdef HAVE_TPETRA_DEBUG
2641  TEUCHOS_TEST_FOR_EXCEPTION(
2642  static_cast<size_t>(hostView_j.dimension_0 ()) < static_cast<size_t>(dataAsArcp.size ()), std::logic_error,
2643  "Tpetra::MultiVector::getDataNonConst: hostView_j.dimension_0() = "
2644  << hostView_j.dimension_0 () << " < dataAsArcp.size() = "
2645  << dataAsArcp.size () << ". "
2646  "Please report this bug to the Tpetra developers.");
2647 #endif // HAVE_TPETRA_DEBUG
2648 
2649  return Teuchos::arcp_reinterpret_cast<Scalar> (dataAsArcp);
2650  }
2651 
2652  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
2656  {
2657  if (this != &source) {
2658  base_type::operator= (source);
2659  //
2660  // operator= implements view semantics (shallow copy).
2661  //
2662 
2663  // Kokkos::View operator= also implements view semantics.
2664  view_ = source.view_;
2665  origView_ = source.origView_;
2666 
2667  // NOTE (mfh 24 Mar 2014) Christian wrote here that assigning
2668  // whichVectors_ is "probably not ok" (probably constitutes deep
2669  // copy). I would say that it's OK, because whichVectors_ is
2670  // immutable (from the user's perspective); it's analogous to
2671  // the dimensions or stride. Once we make whichVectors_ a
2672  // Kokkos::View instead of a Teuchos::Array, all debate will go
2673  // away and we will unquestionably have view semantics.
2674  whichVectors_ = source.whichVectors_;
2675  }
2676  return *this;
2677  }
2678 
2679 
2680  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
2681  Teuchos::RCP<MultiVector<Scalar, LocalOrdinal, GlobalOrdinal, Node, classic> >
2683  subCopy (const Teuchos::ArrayView<const size_t>& cols) const
2684  {
2685  using Teuchos::RCP;
2687 
2688  // Check whether the index set in cols is contiguous. If it is,
2689  // use the more efficient Range1D version of subCopy.
2690  bool contiguous = true;
2691  const size_t numCopyVecs = static_cast<size_t> (cols.size ());
2692  for (size_t j = 1; j < numCopyVecs; ++j) {
2693  if (cols[j] != cols[j-1] + static_cast<size_t> (1)) {
2694  contiguous = false;
2695  break;
2696  }
2697  }
2698  if (contiguous && numCopyVecs > 0) {
2699  return this->subCopy (Teuchos::Range1D (cols[0], cols[numCopyVecs-1]));
2700  }
2701  else {
2702  RCP<const MV> X_sub = this->subView (cols);
2703  RCP<MV> Y (new MV (this->getMap (), numCopyVecs, false));
2704  Y->assign (*X_sub);
2705  return Y;
2706  }
2707  }
2708 
2709  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
2710  Teuchos::RCP<MultiVector<Scalar, LocalOrdinal, GlobalOrdinal, Node, classic> >
2712  subCopy (const Teuchos::Range1D &colRng) const
2713  {
2714  using Teuchos::RCP;
2716 
2717  RCP<const MV> X_sub = this->subView (colRng);
2718  RCP<MV> Y (new MV (this->getMap (), static_cast<size_t> (colRng.size ()), false));
2719  Y->assign (*X_sub);
2720  return Y;
2721  }
2722 
2723  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
2724  size_t
2727  return origView_.dimension_0 ();
2728  }
2729 
2730  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
2731  size_t
2734  return origView_.dimension_1 ();
2735  }
2736 
2737  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
2738  Teuchos::RCP<const MultiVector<Scalar, LocalOrdinal, GlobalOrdinal, Node, classic> >
2740  offsetView (const Teuchos::RCP<const map_type>& subMap,
2741  const size_t offset) const
2742  {
2743  using Kokkos::ALL;
2744  using Kokkos::subview;
2745  using Teuchos::RCP;
2746  using Teuchos::rcp;
2748 
2749  const size_t newNumRows = subMap->getNodeNumElements ();
2750  const bool tooManyElts = newNumRows + offset > this->getOrigNumLocalRows ();
2751  if (tooManyElts) {
2752  const int myRank = this->getMap ()->getComm ()->getRank ();
2753  TEUCHOS_TEST_FOR_EXCEPTION(
2754  newNumRows + offset > this->getLocalLength (), std::runtime_error,
2755  "Tpetra::MultiVector::offsetView(NonConst): Invalid input Map. The "
2756  "input Map owns " << newNumRows << " entries on process " << myRank <<
2757  ". offset = " << offset << ". Yet, the MultiVector contains only "
2758  << this->getOrigNumLocalRows () << " rows on this process.");
2759  }
2760 
2761 #ifdef HAVE_TPETRA_DEBUG
2762  const size_t strideBefore = this->isConstantStride () ?
2763  this->getStride () :
2764  static_cast<size_t> (0);
2765  const size_t lclNumRowsBefore = this->getLocalLength ();
2766  const size_t numColsBefore = this->getNumVectors ();
2767  const impl_scalar_type* hostPtrBefore =
2768  this->getDualView ().h_view.ptr_on_device ();
2769 #endif // HAVE_TPETRA_DEBUG
2770 
2771  const std::pair<size_t, size_t> rowRng (offset, offset + newNumRows);
2772  // FIXME (mfh 10 May 2014) Use of origView_ instead of view_ for
2773  // the second argument may be wrong, if view_ resulted from a
2774  // previous call to offsetView with offset != 0.
2775  dual_view_type newView =
2776  subview (origView_, rowRng, ALL ());
2777  // NOTE (mfh 06 Jan 2015) Work-around to deal with Kokkos not
2778  // handling subviews of degenerate Views quite so well. For some
2779  // reason, the ([0,0], [0,2]) subview of a 0 x 2 DualView is 0 x
2780  // 0. We work around by creating a new empty DualView of the
2781  // desired (degenerate) dimensions.
2782  if (newView.dimension_0 () == 0 &&
2783  newView.dimension_1 () != view_.dimension_1 ()) {
2784  newView = allocDualView<Scalar,
2785  LocalOrdinal,
2786  GlobalOrdinal,
2787  Node> (size_t (0),
2788  this->getNumVectors ());
2789  }
2790  RCP<const MV> subViewMV;
2791  if (isConstantStride ()) {
2792  subViewMV = rcp (new MV (subMap, newView, origView_));
2793  } else {
2794  subViewMV = rcp (new MV (subMap, newView, origView_, whichVectors_ ()));
2795  }
2796 
2797 #ifdef HAVE_TPETRA_DEBUG
2798  const size_t strideAfter = this->isConstantStride () ?
2799  this->getStride () :
2800  static_cast<size_t> (0);
2801  const size_t lclNumRowsAfter = this->getLocalLength ();
2802  const size_t numColsAfter = this->getNumVectors ();
2803  const impl_scalar_type* hostPtrAfter =
2804  this->getDualView ().h_view.ptr_on_device ();
2805 
2806  const size_t strideRet = subViewMV->isConstantStride () ?
2807  subViewMV->getStride () :
2808  static_cast<size_t> (0);
2809  const size_t lclNumRowsRet = subViewMV->getLocalLength ();
2810  const size_t numColsRet = subViewMV->getNumVectors ();
2811 
2812  const char prefix[] = "Tpetra::MultiVector::offsetView: ";
2813  const char suffix[] = ". This should never happen. Please report this "
2814  "bug to the Tpetra developers.";
2815 
2816  TEUCHOS_TEST_FOR_EXCEPTION(
2817  ! subMap.is_null () && lclNumRowsRet != subMap->getNodeNumElements (),
2818  std::logic_error, prefix << "Returned MultiVector has a number of rows "
2819  "different than the number of local indices in the input Map. "
2820  "lclNumRowsRet: " << lclNumRowsRet << ", subMap->getNodeNumElements(): "
2821  << subMap->getNodeNumElements () << suffix);
2822  TEUCHOS_TEST_FOR_EXCEPTION(
2823  strideBefore != strideAfter || lclNumRowsBefore != lclNumRowsAfter ||
2824  numColsBefore != numColsAfter || hostPtrBefore != hostPtrAfter,
2825  std::logic_error, prefix << "Original MultiVector changed dimensions, "
2826  "stride, or host pointer after taking offset view. strideBefore: " <<
2827  strideBefore << ", strideAfter: " << strideAfter << ", lclNumRowsBefore: "
2828  << lclNumRowsBefore << ", lclNumRowsAfter: " << lclNumRowsAfter <<
2829  ", numColsBefore: " << numColsBefore << ", numColsAfter: " <<
2830  numColsAfter << ", hostPtrBefore: " << hostPtrBefore << ", hostPtrAfter: "
2831  << hostPtrAfter << suffix);
2832  TEUCHOS_TEST_FOR_EXCEPTION(
2833  strideBefore != strideRet, std::logic_error, prefix << "Returned "
2834  "MultiVector has different stride than original MultiVector. "
2835  "strideBefore: " << strideBefore << ", strideRet: " << strideRet <<
2836  ", numColsBefore: " << numColsBefore << ", numColsRet: " << numColsRet
2837  << suffix);
2838  TEUCHOS_TEST_FOR_EXCEPTION(
2839  numColsBefore != numColsRet, std::logic_error,
2840  prefix << "Returned MultiVector has a different number of columns than "
2841  "original MultiVector. numColsBefore: " << numColsBefore << ", "
2842  "numColsRet: " << numColsRet << suffix);
2843 #endif // HAVE_TPETRA_DEBUG
2844 
2845  return subViewMV;
2846  }
2847 
2848 
2849  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
2850  Teuchos::RCP<MultiVector<Scalar, LocalOrdinal, GlobalOrdinal, Node, classic> >
2852  offsetViewNonConst (const Teuchos::RCP<const map_type>& subMap,
2853  const size_t offset)
2854  {
2856  return Teuchos::rcp_const_cast<MV> (this->offsetView (subMap, offset));
2857  }
2858 
2859 
2860  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
2861  Teuchos::RCP<const MultiVector<Scalar, LocalOrdinal, GlobalOrdinal, Node, classic> >
2863  subView (const Teuchos::ArrayView<const size_t>& cols) const
2864  {
2865  using Teuchos::Array;
2866  using Teuchos::rcp;
2868 
2869  const size_t numViewCols = static_cast<size_t> (cols.size ());
2870  TEUCHOS_TEST_FOR_EXCEPTION(
2871  numViewCols < 1, std::runtime_error, "Tpetra::MultiVector::subView"
2872  "(const Teuchos::ArrayView<const size_t>&): The input array cols must "
2873  "contain at least one entry, but cols.size() = " << cols.size ()
2874  << " == 0.");
2875 
2876  // Check whether the index set in cols is contiguous. If it is,
2877  // use the more efficient Range1D version of subView.
2878  bool contiguous = true;
2879  for (size_t j = 1; j < numViewCols; ++j) {
2880  if (cols[j] != cols[j-1] + static_cast<size_t> (1)) {
2881  contiguous = false;
2882  break;
2883  }
2884  }
2885  if (contiguous) {
2886  if (numViewCols == 0) {
2887  // The output MV has no columns, so there is nothing to view.
2888  return rcp (new MV (this->getMap (), numViewCols));
2889  } else {
2890  // Use the more efficient contiguous-index-range version.
2891  return this->subView (Teuchos::Range1D (cols[0], cols[numViewCols-1]));
2892  }
2893  }
2894 
2895  if (isConstantStride ()) {
2896  return rcp (new MV (this->getMap (), view_, origView_, cols));
2897  }
2898  else {
2899  Array<size_t> newcols (cols.size ());
2900  for (size_t j = 0; j < numViewCols; ++j) {
2901  newcols[j] = whichVectors_[cols[j]];
2902  }
2903  return rcp (new MV (this->getMap (), view_, origView_, newcols ()));
2904  }
2905  }
2906 
2907 
2908  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
2909  Teuchos::RCP<const MultiVector<Scalar, LocalOrdinal, GlobalOrdinal, Node, classic> >
2911  subView (const Teuchos::Range1D& colRng) const
2912  {
2913  using Kokkos::ALL;
2914  using Kokkos::subview;
2915  using Teuchos::Array;
2916  using Teuchos::RCP;
2917  using Teuchos::rcp;
2919  const char tfecfFuncName[] = "subView(Range1D): ";
2920 
2921  const size_t lclNumRows = this->getLocalLength ();
2922  const size_t numVecs = this->getNumVectors ();
2923  // TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
2924  // colRng.size() == 0, std::runtime_error, prefix << "Range must include "
2925  // "at least one vector.");
2926  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
2927  static_cast<size_t> (colRng.size ()) > numVecs, std::runtime_error,
2928  "colRng.size() = " << colRng.size () << " > this->getNumVectors() = "
2929  << numVecs << ".");
2930  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
2931  numVecs != 0 && colRng.size () != 0 &&
2932  (colRng.lbound () < static_cast<Teuchos::Ordinal> (0) ||
2933  static_cast<size_t> (colRng.ubound ()) >= numVecs),
2934  std::invalid_argument, "Nonempty input range [" << colRng.lbound () <<
2935  "," << colRng.ubound () << "] exceeds the valid range of column indices "
2936  "[0, " << numVecs << "].");
2937 
2938  RCP<const MV> X_ret; // the MultiVector subview to return
2939 
2940  // FIXME (mfh 14 Apr 2015) Apparently subview on DualView is still
2941  // broken for the case of views with zero rows. I will brutally
2942  // enforce that the subview has the correct dimensions. In
2943  // particular, in the case of zero rows, I will, if necessary,
2944  // create a new dual_view_type with zero rows and the correct
2945  // number of columns. In a debug build, I will use an all-reduce
2946  // to ensure that it has the correct dimensions on all processes.
2947 
2948  const std::pair<size_t, size_t> rows (0, lclNumRows);
2949  if (colRng.size () == 0) {
2950  const std::pair<size_t, size_t> cols (0, 0); // empty range
2951  dual_view_type X_sub = takeSubview (this->view_, ALL (), cols);
2952  X_ret = rcp (new MV (this->getMap (), X_sub, origView_));
2953  }
2954  else {
2955  // Returned MultiVector is constant stride only if *this is.
2956  if (isConstantStride ()) {
2957  const std::pair<size_t, size_t> cols (colRng.lbound (),
2958  colRng.ubound () + 1);
2959  dual_view_type X_sub = takeSubview (this->view_, ALL (), cols);
2960  X_ret = rcp (new MV (this->getMap (), X_sub, origView_));
2961  }
2962  else {
2963  if (static_cast<size_t> (colRng.size ()) == static_cast<size_t> (1)) {
2964  // We're only asking for one column, so the result does have
2965  // constant stride, even though this MultiVector does not.
2966  const std::pair<size_t, size_t> col (whichVectors_[0] + colRng.lbound (),
2967  whichVectors_[0] + colRng.ubound () + 1);
2968  dual_view_type X_sub = takeSubview (view_, ALL (), col);
2969  X_ret = rcp (new MV (this->getMap (), X_sub, origView_));
2970  }
2971  else {
2972  Array<size_t> which (whichVectors_.begin () + colRng.lbound (),
2973  whichVectors_.begin () + colRng.ubound () + 1);
2974  X_ret = rcp (new MV (this->getMap (), view_, origView_, which));
2975  }
2976  }
2977  }
2978 
2979 #ifdef HAVE_TPETRA_DEBUG
2980  using Teuchos::Comm;
2981  using Teuchos::outArg;
2982  using Teuchos::REDUCE_MIN;
2983  using Teuchos::reduceAll;
2984 
2985  RCP<const Comm<int> > comm = this->getMap ().is_null () ? Teuchos::null :
2986  this->getMap ()->getComm ();
2987  if (! comm.is_null ()) {
2988  int lclSuccess = 1;
2989  int gblSuccess = 1;
2990 
2991  if (X_ret.is_null ()) {
2992  lclSuccess = 0;
2993  }
2994  reduceAll<int, int> (*comm, REDUCE_MIN, lclSuccess, outArg (gblSuccess));
2995  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
2996  lclSuccess != 1, std::logic_error, "X_ret (the subview of this "
2997  "MultiVector; the return value of this method) is null on some MPI "
2998  "process in this MultiVector's communicator. This should never "
2999  "happen. Please report this bug to the Tpetra developers.");
3000 
3001  if (! X_ret.is_null () &&
3002  X_ret->getNumVectors () != static_cast<size_t> (colRng.size ())) {
3003  lclSuccess = 0;
3004  }
3005  reduceAll<int, int> (*comm, REDUCE_MIN, lclSuccess, outArg (gblSuccess));
3006  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3007  lclSuccess != 1, std::logic_error,
3008  "X_ret->getNumVectors() != colRng.size(), on at least one MPI process "
3009  "in this MultiVector's communicator. This should never happen. "
3010  "Please report this bug to the Tpetra developers.");
3011  }
3012 #endif // HAVE_TPETRA_DEBUG
3013 
3014  return X_ret;
3015  }
3016 
3017 
3018  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
3019  Teuchos::RCP<MultiVector<Scalar, LocalOrdinal, GlobalOrdinal, Node, classic> >
3021  subViewNonConst (const ArrayView<const size_t> &cols)
3022  {
3024  return Teuchos::rcp_const_cast<MV> (this->subView (cols));
3025  }
3026 
3027 
3028  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
3029  Teuchos::RCP<MultiVector<Scalar, LocalOrdinal, GlobalOrdinal, Node, classic> >
3031  subViewNonConst (const Teuchos::Range1D &colRng)
3032  {
3034  return Teuchos::rcp_const_cast<MV> (this->subView (colRng));
3035  }
3036 
3037 
3038  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
3039  Teuchos::RCP<const Vector<Scalar, LocalOrdinal, GlobalOrdinal, Node, classic> >
3041  getVector (const size_t j) const
3042  {
3043  using Kokkos::ALL;
3044  using Kokkos::subview;
3045  using Teuchos::rcp;
3047 
3048 #ifdef HAVE_TPETRA_DEBUG
3049  const char tfecfFuncName[] = "getVector(NonConst): ";
3050  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3051  this->vectorIndexOutOfRange (j), std::runtime_error, "Input index j (== "
3052  << j << ") exceeds valid range [0, " << this->getNumVectors ()
3053  << " - 1].");
3054 #endif // HAVE_TPETRA_DEBUG
3055  const size_t jj = this->isConstantStride () ?
3056  static_cast<size_t> (j) :
3057  static_cast<size_t> (this->whichVectors_[j]);
3058  const std::pair<size_t, size_t> rng (jj, jj+1);
3059  return rcp (new V (this->getMap (),
3060  takeSubview (this->view_, ALL (), rng),
3061  origView_));
3062  }
3063 
3064 
3065  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
3066  Teuchos::RCP<Vector<Scalar, LocalOrdinal, GlobalOrdinal, Node, classic> >
3068  getVectorNonConst (const size_t j)
3069  {
3071  return Teuchos::rcp_const_cast<V> (this->getVector (j));
3072  }
3073 
3074 
3075  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
3076  void
3078  get1dCopy (const Teuchos::ArrayView<Scalar>& A, const size_t LDA) const
3079  {
3080  using Kokkos::subview;
3081  typedef impl_scalar_type IST;
3082  typedef MakeUnmanagedView<IST, device_type> view_getter_type;
3083  typedef typename view_getter_type::view_type input_col_type;
3084  // Types of views of this MultiVector's data.
3085  typedef typename dual_view_type::t_host host_view_type;
3086  typedef typename dual_view_type::t_dev dev_view_type;
3087  typedef Kokkos::View<IST*,
3088  typename host_view_type::array_layout,
3089  typename host_view_type::memory_space> host_col_type;
3090  typedef Kokkos::View<IST*,
3091  typename dev_view_type::array_layout,
3092  typename dev_view_type::memory_space> dev_col_type;
3093  const char tfecfFuncName[] = "get1dCopy: ";
3094 
3095  const size_t numRows = this->getLocalLength ();
3096  const size_t numCols = this->getNumVectors ();
3097  const std::pair<size_t, size_t> rowRange (0, numRows);
3098  const std::pair<size_t, size_t> colRange (0, numCols);
3099 
3100  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3101  LDA < numRows, std::runtime_error,
3102  "LDA = " << LDA << " < numRows = " << numRows << ".");
3103  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3104  numRows > static_cast<size_t> (0) &&
3105  numCols > static_cast<size_t> (0) &&
3106  static_cast<size_t> (A.size ()) < LDA * (numCols - 1) + numRows,
3107  std::runtime_error,
3108  "A.size() = " << A.size () << ", but its size must be at least "
3109  << (LDA * (numCols - 1) + numRows) << " to hold all the entries.");
3110 
3111  // FIXME (mfh 22 Jul 2014, 10 Dec 2014) Currently, it doesn't work
3112  // to do a 2-D copy, even if this MultiVector has constant stride.
3113  // This is because Kokkos can't currently tell the difference
3114  // between padding (which permits a single deep_copy for the whole
3115  // 2-D View) and stride > numRows (which does NOT permit a single
3116  // deep_copy for the whole 2-D View). Carter is working on this,
3117  // but for now, the temporary fix is to copy one column at a time.
3118 
3119  for (size_t j = 0; j < numCols; ++j) {
3120  const size_t srcCol =
3121  this->isConstantStride () ? j : this->whichVectors_[j];
3122  const size_t dstCol = j;
3123  IST* const dstColRaw =
3124  reinterpret_cast<IST*> (A.getRawPtr () + LDA * dstCol);
3125  input_col_type dstColView (dstColRaw, numRows);
3126  // Use the most recently updated version of this MultiVector's
3127  // data. This avoids sync'ing, which could violate users'
3128  // expectations.
3129  if (view_.modified_host >= view_.modified_device) {
3130  host_col_type srcColView =
3131  subview (view_.h_view, rowRange, srcCol);
3132  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3133  dstColView.dimension_0 () != srcColView.dimension_0 (),
3134  std::logic_error, ": srcColView and dstColView have different "
3135  "dimensions. Please report this bug to the Tpetra developers.");
3136  Kokkos::deep_copy (dstColView, srcColView);
3137  }
3138  else {
3139  dev_col_type srcColView =
3140  subview (view_.d_view, rowRange, srcCol);
3141  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3142  dstColView.dimension_0 () != srcColView.dimension_0 (),
3143  std::logic_error, ": srcColView and dstColView have different "
3144  "dimensions. Please report this bug to the Tpetra developers.");
3145  Kokkos::deep_copy (dstColView, srcColView);
3146  }
3147  }
3148  }
3149 
3150 
3151  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
3152  void
3154  get2dCopy (const Teuchos::ArrayView<const Teuchos::ArrayView<Scalar> >& ArrayOfPtrs) const
3155  {
3157  const char tfecfFuncName[] = "get2dCopy: ";
3158  const size_t numRows = this->getLocalLength ();
3159  const size_t numCols = this->getNumVectors ();
3160 
3161  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3162  static_cast<size_t> (ArrayOfPtrs.size ()) != numCols,
3163  std::runtime_error, "Input array of pointers must contain as many "
3164  "entries (arrays) as the MultiVector has columns. ArrayOfPtrs.size() = "
3165  << ArrayOfPtrs.size () << " != getNumVectors() = " << numCols << ".");
3166 
3167  if (numRows != 0 && numCols != 0) {
3168  // No side effects until we've validated the input.
3169  for (size_t j = 0; j < numCols; ++j) {
3170  const size_t dstLen = static_cast<size_t> (ArrayOfPtrs[j].size ());
3171  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3172  dstLen < numRows, std::invalid_argument, "Array j = " << j << " of "
3173  "the input array of arrays is not long enough to fit all entries in "
3174  "that column of the MultiVector. ArrayOfPtrs[j].size() = " << dstLen
3175  << " < getLocalLength() = " << numRows << ".");
3176  }
3177 
3178  // We've validated the input, so it's safe to start copying.
3179  for (size_t j = 0; j < numCols; ++j) {
3180  RCP<const V> X_j = this->getVector (j);
3181  const size_t LDA = static_cast<size_t> (ArrayOfPtrs[j].size ());
3182  X_j->get1dCopy (ArrayOfPtrs[j], LDA);
3183  }
3184  }
3185  }
3186 
3187  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
3188  Teuchos::ArrayRCP<const Scalar>
3190  get1dView () const
3191  {
3192  if (getLocalLength () == 0 || getNumVectors () == 0) {
3193  return Teuchos::null;
3194  } else {
3195  TEUCHOS_TEST_FOR_EXCEPTION(
3196  ! isConstantStride (), std::runtime_error, "Tpetra::MultiVector::"
3197  "get1dView: This MultiVector does not have constant stride, so it is "
3198  "not possible to view its data as a single array. You may check "
3199  "whether a MultiVector has constant stride by calling "
3200  "isConstantStride().");
3201  // NOTE (mfh 09 2014) get1dView() and get1dViewNonConst() have
3202  // always been device->host synchronization points. We might
3203  // want to change this in the future.
3204  typedef typename dual_view_type::host_mirror_space host_type;
3205  view_.template sync<host_type> ();
3206  // Both get1dView() and get1dViewNonConst() return a host view
3207  // of the data.
3208  Teuchos::ArrayRCP<const impl_scalar_type> dataAsArcp =
3209  Kokkos::Compat::persistingView (view_.template view<host_type> ());
3210  return Teuchos::arcp_reinterpret_cast<const Scalar> (dataAsArcp);
3211  }
3212  }
3213 
3214  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
3215  Teuchos::ArrayRCP<Scalar>
3218  {
3219  if (getLocalLength () == 0 || getNumVectors () == 0) {
3220  return Teuchos::null;
3221  } else {
3222  TEUCHOS_TEST_FOR_EXCEPTION(
3223  ! isConstantStride (), std::runtime_error, "Tpetra::MultiVector::"
3224  "get1dViewNonConst: This MultiVector does not have constant stride, so "
3225  "it is not possible to view its data as a single array. You may check "
3226  "whether a MultiVector has constant stride by calling "
3227  "isConstantStride().");
3228  // NOTE (mfh 09 May 2014) get1dView() and get1dViewNonConst()
3229  // have always been device->host synchronization points. We
3230  // might want to change this in the future.
3231  typedef typename dual_view_type::host_mirror_space host_type;
3232  view_.template sync<host_type> ();
3233  // Both get1dView() and get1dViewNonConst() return a host view
3234  // of the data.
3235  Teuchos::ArrayRCP<impl_scalar_type> dataAsArcp =
3236  Kokkos::Compat::persistingView (view_.template view<host_type> ());
3237  return Teuchos::arcp_reinterpret_cast<Scalar> (dataAsArcp);
3238  }
3239  }
3240 
3241  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
3242  Teuchos::ArrayRCP<Teuchos::ArrayRCP<Scalar> >
3245  {
3246  using Teuchos::ArrayRCP;
3247  typedef Kokkos::DualView<impl_scalar_type*,
3248  typename dual_view_type::array_layout, device_type> col_dual_view_type;
3249 
3250  const size_t numCols = getNumVectors ();
3251  ArrayRCP<ArrayRCP<Scalar> > views (numCols);
3252  for (size_t j = 0; j < numCols; ++j) {
3253  const size_t col = isConstantStride () ? j : whichVectors_[j];
3254  col_dual_view_type X_col =
3255  Kokkos::subview (view_, Kokkos::ALL (), col);
3256  ArrayRCP<impl_scalar_type> X_col_arcp =
3257  Kokkos::Compat::persistingView (X_col.d_view);
3258  views[j] = Teuchos::arcp_reinterpret_cast<Scalar> (X_col_arcp);
3259  }
3260  return views;
3261  }
3262 
3263  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
3264  Teuchos::ArrayRCP<Teuchos::ArrayRCP<const Scalar> >
3266  get2dView () const
3267  {
3268  using Teuchos::ArrayRCP;
3269  typedef Kokkos::DualView<const impl_scalar_type*,
3270  typename dual_view_type::array_layout, device_type> col_dual_view_type;
3271 
3272  const size_t numCols = getNumVectors ();
3273  ArrayRCP<ArrayRCP<const Scalar> > views (numCols);
3274  for (size_t j = 0; j < numCols; ++j) {
3275  const size_t col = isConstantStride () ? j : whichVectors_[j];
3276  col_dual_view_type X_col =
3277  Kokkos::subview (view_, Kokkos::ALL (), col);
3278  ArrayRCP<const impl_scalar_type> X_col_arcp =
3279  Kokkos::Compat::persistingView (X_col.d_view);
3280  views[j] = Teuchos::arcp_reinterpret_cast<const Scalar> (X_col_arcp);
3281  }
3282  return views;
3283  }
3284 
3285  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
3286  void
3288  multiply (Teuchos::ETransp transA,
3289  Teuchos::ETransp transB,
3290  const Scalar& alpha,
3293  const Scalar& beta)
3294  {
3295  using Teuchos::CONJ_TRANS;
3296  using Teuchos::NO_TRANS;
3297  using Teuchos::TRANS;
3298  using Teuchos::RCP;
3299  using Teuchos::rcp;
3300  using Teuchos::rcpFromRef;
3301  typedef Kokkos::Details::ArithTraits<impl_scalar_type> ATS;
3302  typedef Teuchos::ScalarTraits<Scalar> STS;
3304  const char errPrefix[] = "Tpetra::MultiVector::multiply: ";
3305 
3306  // This routine performs a variety of matrix-matrix multiply
3307  // operations, interpreting the MultiVector (this-aka C , A and B)
3308  // as 2D matrices. Variations are due to the fact that A, B and C
3309  // can be local replicated or global distributed MultiVectors and
3310  // that we may or may not operate with the transpose of A and B.
3311  // Possible cases are:
3312  //
3313  // Operations # Cases Notes
3314  // 1) C(local) = A^X(local) * B^X(local) 4 X=Trans or Not, no comm needed
3315  // 2) C(local) = A^T(distr) * B (distr) 1 2-D dot product, replicate C
3316  // 3) C(distr) = A (distr) * B^X(local) 2 2-D vector update, no comm needed
3317  //
3318  // The following operations are not meaningful for 1-D
3319  // distributions:
3320  //
3321  // u1) C(local) = A^T(distr) * B^T(distr) 1
3322  // u2) C(local) = A (distr) * B^X(distr) 2
3323  // u3) C(distr) = A^X(local) * B^X(local) 4
3324  // u4) C(distr) = A^X(local) * B^X(distr) 4
3325  // u5) C(distr) = A^T(distr) * B^X(local) 2
3326  // u6) C(local) = A^X(distr) * B^X(local) 4
3327  // u7) C(distr) = A^X(distr) * B^X(local) 4
3328  // u8) C(local) = A^X(local) * B^X(distr) 4
3329  //
3330  // Total number of cases: 32 (= 2^5).
3331 
3332  TEUCHOS_TEST_FOR_EXCEPTION(
3333  ATS::is_complex && (transA == TRANS || transB == TRANS),
3334  std::invalid_argument, errPrefix << "Transpose without conjugation "
3335  "(transA == TRANS || transB == TRANS) is not supported for complex Scalar "
3336  "types.");
3337 
3338  transA = (transA == NO_TRANS ? NO_TRANS : CONJ_TRANS);
3339  transB = (transB == NO_TRANS ? NO_TRANS : CONJ_TRANS);
3340 
3341  // Compute effective dimensions, w.r.t. transpose operations on
3342  size_t A_nrows = (transA==CONJ_TRANS) ? A.getNumVectors() : A.getLocalLength();
3343  size_t A_ncols = (transA==CONJ_TRANS) ? A.getLocalLength() : A.getNumVectors();
3344  size_t B_nrows = (transB==CONJ_TRANS) ? B.getNumVectors() : B.getLocalLength();
3345  size_t B_ncols = (transB==CONJ_TRANS) ? B.getLocalLength() : B.getNumVectors();
3346 
3347  impl_scalar_type beta_local = beta; // local copy of beta; might be reassigned below
3348 
3349  TEUCHOS_TEST_FOR_EXCEPTION(
3350  getLocalLength () != A_nrows || getNumVectors () != B_ncols ||
3351  A_ncols != B_nrows, std::runtime_error, errPrefix << "Dimensions of "
3352  "*this, op(A), and op(B) must be consistent. Local part of *this is "
3353  << getLocalLength() << " x " << getNumVectors()
3354  << ", A is " << A_nrows << " x " << A_ncols
3355  << ", and B is " << B_nrows << " x " << B_ncols << ".");
3356 
3357  const bool A_is_local = ! A.isDistributed ();
3358  const bool B_is_local = ! B.isDistributed ();
3359  const bool C_is_local = ! this->isDistributed ();
3360  // Case 1: C(local) = A^X(local) * B^X(local)
3361  const bool Case1 = C_is_local && A_is_local && B_is_local;
3362  // Case 2: C(local) = A^T(distr) * B (distr)
3363  const bool Case2 = C_is_local && ! A_is_local && ! B_is_local &&
3364  transA == CONJ_TRANS && transB == NO_TRANS;
3365  // Case 3: C(distr) = A (distr) * B^X(local)
3366  const bool Case3 = ! C_is_local && ! A_is_local && B_is_local &&
3367  transA == NO_TRANS;
3368 
3369  // Test that we are considering a meaningful case
3370  TEUCHOS_TEST_FOR_EXCEPTION(
3371  ! Case1 && ! Case2 && ! Case3, std::runtime_error, errPrefix
3372  << "Multiplication of op(A) and op(B) into *this is not a "
3373  "supported use case.");
3374 
3375  if (beta != STS::zero () && Case2) {
3376  // If Case2, then C is local and contributions must be summed
3377  // across all processes. However, if beta != 0, then accumulate
3378  // beta*C into the sum. When summing across all processes, we
3379  // only want to accumulate this once, so set beta == 0 on all
3380  // processes except Process 0.
3381  const int myRank = this->getMap ()->getComm ()->getRank ();
3382  if (myRank != 0) {
3383  beta_local = STS::zero ();
3384  }
3385  }
3386 
3387  // We only know how to do matrix-matrix multiplies if all the
3388  // MultiVectors have constant stride. If not, we have to make
3389  // temporary copies of those MultiVectors (including possibly
3390  // *this) that don't have constant stride.
3391  RCP<MV> C_tmp;
3392  if (! isConstantStride ()) {
3393  C_tmp = rcp (new MV (*this, Teuchos::Copy)); // deep copy
3394  } else {
3395  C_tmp = rcp (this, false);
3396  }
3397 
3398  RCP<const MV> A_tmp;
3399  if (! A.isConstantStride ()) {
3400  A_tmp = rcp (new MV (A, Teuchos::Copy)); // deep copy
3401  } else {
3402  A_tmp = rcpFromRef (A);
3403  }
3404 
3405  RCP<const MV> B_tmp;
3406  if (! B.isConstantStride ()) {
3407  B_tmp = rcp (new MV (B, Teuchos::Copy)); // deep copy
3408  } else {
3409  B_tmp = rcpFromRef (B);
3410  }
3411 
3412  TEUCHOS_TEST_FOR_EXCEPTION(
3413  ! C_tmp->isConstantStride () || ! B_tmp->isConstantStride () ||
3414  ! A_tmp->isConstantStride (), std::logic_error, errPrefix
3415  << "Failed to make temporary constant-stride copies of MultiVectors.");
3416 
3418 
3419  gemm_type::GEMM (transA, transB, alpha,
3420  A_tmp->getDualView ().d_view, B_tmp->getDualView ().d_view,
3421  beta_local, C_tmp->getDualView ().d_view);
3422  if (! isConstantStride ()) {
3423  deep_copy (*this, *C_tmp); // Copy the result back into *this.
3424  }
3425 
3426  // Dispose of (possibly) extra copies of A and B.
3427  A_tmp = Teuchos::null;
3428  B_tmp = Teuchos::null;
3429 
3430  // If Case 2 then sum up *this and distribute it to all processes.
3431  if (Case2) {
3432  this->reduce ();
3433  }
3434  }
3435 
3436  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
3437  void
3439  elementWiseMultiply (Scalar scalarAB,
3442  Scalar scalarThis)
3443  {
3444  using Kokkos::ALL;
3445  using Kokkos::subview;
3446  const char tfecfFuncName[] = "elementWiseMultiply: ";
3447  const size_t numVecs = this->getNumVectors ();
3448 
3449 #ifdef HAVE_TPETRA_DEBUG
3450  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3451  getLocalLength() != A.getLocalLength() ||
3452  getLocalLength() != B.getLocalLength(), std::runtime_error,
3453  "MultiVectors do not have the same local length.");
3454 #endif // HAVE_TPETRA_DEBUG
3455  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3456  numVecs != B.getNumVectors (), std::runtime_error, "this->getNumVectors"
3457  "() = " << numVecs << " != B.getNumVectors() = " << B.getNumVectors ()
3458  << ".");
3459 
3460  if (isConstantStride () && B.isConstantStride ()) {
3461  // A is just a Vector; it only has one column, so it always has
3462  // constant stride.
3463  //
3464  // If both *this and B have constant stride, we can do an
3465  // element-wise multiply on all columns at once.
3466  view_.template sync<device_type> ();
3467  view_.template modify<device_type> ();
3468  A.view_.template sync<device_type> ();
3469  B.view_.template sync<device_type> ();
3470  KokkosBlas::mult (scalarThis, view_.d_view, scalarAB,
3471  subview (A.view_.d_view, ALL (), 0),
3472  B.view_.d_view);
3473  }
3474  else {
3475  view_.template sync<device_type> ();
3476  view_.template modify<device_type> ();
3477  A.view_.template sync<device_type> ();
3478  B.view_.template sync<device_type> ();
3479 
3480  for (size_t j = 0; j < numVecs; ++j) {
3481  const size_t C_col = isConstantStride () ? j : whichVectors_[j];
3482  const size_t B_col = B.isConstantStride () ? j : B.whichVectors_[j];
3483 
3484  KokkosBlas::mult (scalarThis,
3485  subview (view_.d_view, ALL (), C_col),
3486  scalarAB,
3487  subview (A.view_.d_view, ALL (), 0),
3488  subview (B.view_.d_view, ALL (), B_col));
3489  }
3490  }
3491  }
3492 
3493  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
3494  void
3497  {
3498  using Kokkos::ALL;
3499  using Kokkos::subview;
3500  using Teuchos::reduceAll;
3501  using Teuchos::REDUCE_SUM;
3502  typedef typename dual_view_type::t_dev device_view_type;
3503  typedef typename dual_view_type::host_mirror_space host_mirror_space;
3504 
3505  TEUCHOS_TEST_FOR_EXCEPTION(
3506  this->isDistributed (), std::runtime_error,
3507  "Tpetra::MultiVector::reduce() should only be called with locally "
3508  "replicated or otherwise not distributed MultiVector objects.");
3509  const Teuchos::Comm<int>& comm = * (this->getMap ()->getComm ());
3510  if (comm.getSize () == 1) {
3511  return;
3512  }
3513 
3514  const size_t numLclRows = getLocalLength ();
3515  const size_t numCols = getNumVectors ();
3516 
3517  // FIXME (mfh 16 June 2014) This exception will cause deadlock if
3518  // it triggers on only some processes. We don't have a good way
3519  // to pack this result into the all-reduce below, but this would
3520  // be a good reason to set a "local error flag" and find other
3521  // opportunities to let it propagate.
3522  TEUCHOS_TEST_FOR_EXCEPTION(
3523  numLclRows > static_cast<size_t> (std::numeric_limits<int>::max ()),
3524  std::runtime_error, "Tpetra::MultiVector::reduce: On Process " <<
3525  comm.getRank () << ", the number of local rows " << numLclRows <<
3526  " does not fit in int.");
3527 
3528  //
3529  // Use MPI to sum the entries across all local blocks.
3530  //
3531  // If this MultiVector's local data are stored contiguously, we
3532  // can use the local View as the source buffer in the
3533  // MPI_Allreduce. Otherwise, we have to allocate a temporary
3534  // source buffer and pack.
3535  const bool contig = isConstantStride () && getStride () == numLclRows;
3536  device_view_type srcBuf;
3537  if (contig) {
3538  srcBuf = view_.d_view;
3539  }
3540  else {
3541  srcBuf = device_view_type ("srcBuf", numLclRows, numCols);
3542  Kokkos::deep_copy (srcBuf, view_.d_view);
3543  }
3544 
3545  // MPI requires that the send and receive buffers don't alias one
3546  // another, so we have to copy temporary storage for the result.
3547  //
3548  // We expect that MPI implementations will know how to read device
3549  // pointers.
3550  device_view_type tgtBuf ("tgtBuf", numLclRows, numCols);
3551 
3552  const int reduceCount = static_cast<int> (numLclRows * numCols);
3553  reduceAll<int, impl_scalar_type> (comm, REDUCE_SUM, reduceCount,
3554  srcBuf.ptr_on_device (),
3555  tgtBuf.ptr_on_device ());
3556 
3557  // Tell the DualView that we plan to modify the device data.
3558  view_.template modify<execution_space> ();
3559 
3560  const std::pair<size_t, size_t> lclRowRange (0, numLclRows);
3561  device_view_type d_view =
3562  subview (view_.d_view, lclRowRange, ALL ());
3563 
3564  if (contig || isConstantStride ()) {
3565  Kokkos::deep_copy (d_view, tgtBuf);
3566  }
3567  else {
3568  for (size_t j = 0; j < numCols; ++j) {
3569  device_view_type d_view_j =
3570  subview (d_view, ALL (), std::pair<int,int>(j,j+1));
3571  device_view_type tgtBuf_j =
3572  subview (tgtBuf, ALL (), std::pair<int,int>(j,j+1));
3573  Kokkos::deep_copy (d_view_j, tgtBuf_j);
3574  }
3575  }
3576 
3577  // Synchronize the host with changes on the device.
3578  //
3579  // FIXME (mfh 16 June 2014) This raises the question of whether we
3580  // want to synchronize always. Users will find it reassuring if
3581  // MultiVector methods always leave the MultiVector in a
3582  // synchronized state, but it seems silly to synchronize to host
3583  // if they hardly ever need host data.
3584  view_.template sync<host_mirror_space> ();
3585  }
3586 
3587 
3588  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
3589  void
3591  replaceLocalValue (LocalOrdinal MyRow,
3592  size_t VectorIndex,
3593  const impl_scalar_type& ScalarValue)
3594  {
3595 #ifdef HAVE_TPETRA_DEBUG
3596  const LocalOrdinal minLocalIndex = this->getMap()->getMinLocalIndex();
3597  const LocalOrdinal maxLocalIndex = this->getMap()->getMaxLocalIndex();
3598  TEUCHOS_TEST_FOR_EXCEPTION(
3599  MyRow < minLocalIndex || MyRow > maxLocalIndex,
3600  std::runtime_error,
3601  "Tpetra::MultiVector::replaceLocalValue: row index " << MyRow
3602  << " is invalid. The range of valid row indices on this process "
3603  << this->getMap()->getComm()->getRank() << " is [" << minLocalIndex
3604  << ", " << maxLocalIndex << "].");
3605  TEUCHOS_TEST_FOR_EXCEPTION(
3606  vectorIndexOutOfRange(VectorIndex),
3607  std::runtime_error,
3608  "Tpetra::MultiVector::replaceLocalValue: vector index " << VectorIndex
3609  << " of the multivector is invalid.");
3610 #endif
3611  const size_t colInd = isConstantStride () ?
3612  VectorIndex : whichVectors_[VectorIndex];
3613  view_.h_view (MyRow, colInd) = ScalarValue;
3614  }
3615 
3616 
3617  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
3618  void
3620  sumIntoLocalValue (LocalOrdinal MyRow,
3621  size_t VectorIndex,
3622  const impl_scalar_type& ScalarValue)
3623  {
3624 #ifdef HAVE_TPETRA_DEBUG
3625  const LocalOrdinal minLocalIndex = this->getMap()->getMinLocalIndex();
3626  const LocalOrdinal maxLocalIndex = this->getMap()->getMaxLocalIndex();
3627  TEUCHOS_TEST_FOR_EXCEPTION(
3628  MyRow < minLocalIndex || MyRow > maxLocalIndex,
3629  std::runtime_error,
3630  "Tpetra::MultiVector::sumIntoLocalValue: row index " << MyRow
3631  << " is invalid. The range of valid row indices on this process "
3632  << this->getMap()->getComm()->getRank() << " is [" << minLocalIndex
3633  << ", " << maxLocalIndex << "].");
3634  TEUCHOS_TEST_FOR_EXCEPTION(
3635  vectorIndexOutOfRange(VectorIndex),
3636  std::runtime_error,
3637  "Tpetra::MultiVector::sumIntoLocalValue: vector index " << VectorIndex
3638  << " of the multivector is invalid.");
3639 #endif
3640  const size_t colInd = isConstantStride () ?
3641  VectorIndex : whichVectors_[VectorIndex];
3642  view_.h_view (MyRow, colInd) += ScalarValue;
3643  }
3644 
3645 
3646  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
3647  void
3649  replaceGlobalValue (GlobalOrdinal GlobalRow,
3650  size_t VectorIndex,
3651  const impl_scalar_type& ScalarValue)
3652  {
3653  const LocalOrdinal MyRow = this->getMap ()->getLocalElement (GlobalRow);
3654 #ifdef HAVE_TPETRA_DEBUG
3655  TEUCHOS_TEST_FOR_EXCEPTION(
3656  MyRow == Teuchos::OrdinalTraits<LocalOrdinal>::invalid (),
3657  std::runtime_error,
3658  "Tpetra::MultiVector::replaceGlobalValue: Global row index " << GlobalRow
3659  << "is not present on this process "
3660  << this->getMap ()->getComm ()->getRank () << ".");
3661  TEUCHOS_TEST_FOR_EXCEPTION(
3662  vectorIndexOutOfRange (VectorIndex), std::runtime_error,
3663  "Tpetra::MultiVector::replaceGlobalValue: Vector index " << VectorIndex
3664  << " of the multivector is invalid.");
3665 #endif // HAVE_TPETRA_DEBUG
3666  replaceLocalValue (MyRow, VectorIndex, ScalarValue);
3667  }
3668 
3669  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
3670  void
3672  sumIntoGlobalValue (GlobalOrdinal GlobalRow,
3673  size_t VectorIndex,
3674  const impl_scalar_type& ScalarValue)
3675  {
3676  const LocalOrdinal MyRow = this->getMap ()->getLocalElement (GlobalRow);
3677 #ifdef HAVE_TEUCHOS_DEBUG
3678  TEUCHOS_TEST_FOR_EXCEPTION(
3679  MyRow == Teuchos::OrdinalTraits<LocalOrdinal>::invalid (),
3680  std::runtime_error,
3681  "Tpetra::MultiVector::sumIntoGlobalValue: Global row index " << GlobalRow
3682  << "is not present on this process "
3683  << this->getMap ()->getComm ()->getRank () << ".");
3684  TEUCHOS_TEST_FOR_EXCEPTION(
3685  vectorIndexOutOfRange(VectorIndex),
3686  std::runtime_error,
3687  "Tpetra::MultiVector::sumIntoGlobalValue: Vector index " << VectorIndex
3688  << " of the multivector is invalid.");
3689 #endif
3690  sumIntoLocalValue (MyRow, VectorIndex, ScalarValue);
3691  }
3692 
3693  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
3694  template <class T>
3695  Teuchos::ArrayRCP<T>
3697  getSubArrayRCP (Teuchos::ArrayRCP<T> arr,
3698  size_t j) const
3699  {
3700  typedef Kokkos::DualView<impl_scalar_type*,
3701  typename dual_view_type::array_layout,
3702  execution_space> col_dual_view_type;
3703  const size_t col = isConstantStride () ? j : whichVectors_[j];
3704  col_dual_view_type X_col =
3705  Kokkos::subview (view_, Kokkos::ALL (), col);
3706  return Kokkos::Compat::persistingView (X_col.d_view);
3707  }
3708 
3709  template <class Scalar,
3710  class LocalOrdinal,
3711  class GlobalOrdinal,
3712  class Node,
3713  const bool classic>
3714  KokkosClassic::MultiVector<Scalar, Node>
3716  getLocalMV () const
3717  {
3718  using Teuchos::ArrayRCP;
3719  typedef KokkosClassic::MultiVector<Scalar, Node> KMV;
3720 
3721  // This method creates the KokkosClassic object on the fly. Thus,
3722  // it's OK to leave this in place for backwards compatibility.
3723  ArrayRCP<Scalar> data;
3724  if (getLocalLength () == 0 || getNumVectors () == 0) {
3725  data = Teuchos::null;
3726  }
3727  else {
3728  ArrayRCP<impl_scalar_type> dataTmp = (getLocalLength() > 0) ?
3729  Kokkos::Compat::persistingView (view_.d_view) :
3730  Teuchos::null;
3731  data = Teuchos::arcp_reinterpret_cast<Scalar> (dataTmp);
3732  }
3733  size_t stride[8];
3734  origView_.stride (stride);
3735  const size_t LDA =
3736  origView_.dimension_1 () > 1 ? stride[1] : origView_.dimension_0 ();
3737 
3738  KMV kmv (this->getMap ()->getNode ());
3739  kmv.initializeValues (getLocalLength (), getNumVectors (),
3740  data, LDA, getOrigNumLocalRows (),
3741  getOrigNumLocalCols ());
3742  return kmv;
3743  }
3744 
3745  template <class Scalar,
3746  class LocalOrdinal,
3747  class GlobalOrdinal,
3748  class Node,
3749  const bool classic>
3752  getDualView () const {
3753  return view_;
3754  }
3755 
3756  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
3757  std::string
3760  {
3761  using std::endl;
3762  std::ostringstream oss;
3763  oss << Teuchos::typeName (*this) << " {"
3764  << "label: \"" << this->getObjectLabel () << "\""
3765  << ", numRows: " << getGlobalLength ()
3766  << ", numCols: " << getNumVectors ()
3767  << ", isConstantStride: " << isConstantStride ();
3768  if (isConstantStride ()) {
3769  oss << ", columnStride: " << getStride ();
3770  }
3771  oss << "}";
3772  return oss.str();
3773  }
3774 
3775  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
3776  void
3778  describe (Teuchos::FancyOStream &out,
3779  const Teuchos::EVerbosityLevel verbLevel) const
3780  {
3781  using Teuchos::ArrayRCP;
3782  using Teuchos::RCP;
3783  using Teuchos::VERB_DEFAULT;
3784  using Teuchos::VERB_NONE;
3785  using Teuchos::VERB_LOW;
3786  using Teuchos::VERB_MEDIUM;
3787  using Teuchos::VERB_HIGH;
3788  using Teuchos::VERB_EXTREME;
3789  using std::endl;
3790  using std::setw;
3791 
3792  // Set default verbosity if applicable.
3793  const Teuchos::EVerbosityLevel vl =
3794  (verbLevel == VERB_DEFAULT) ? VERB_LOW : verbLevel;
3795 
3796  RCP<const Teuchos::Comm<int> > comm = this->getMap()->getComm();
3797  const int myImageID = comm->getRank();
3798  const int numImages = comm->getSize();
3799 
3800  if (vl != VERB_NONE) {
3801  // Don't set the tab level unless we're printing something.
3802  Teuchos::OSTab tab0 (out);
3803 
3804  if (myImageID == 0) { // >= VERB_LOW prints description()
3805  out << "Tpetra::MultiVector:" << endl;
3806  Teuchos::OSTab tab1 (out);
3807  out << "Template parameters:" << endl;
3808  {
3809  Teuchos::OSTab tab2 (out);
3810  out << "Scalar: " << Teuchos::TypeNameTraits<Scalar>::name () << endl
3811  << "LocalOrdinal: " << Teuchos::TypeNameTraits<LocalOrdinal>::name () << endl
3812  << "GlobalOrdinal: " << Teuchos::TypeNameTraits<GlobalOrdinal>::name () << endl
3813  << "Node: " << Teuchos::TypeNameTraits<Node>::name () << endl;
3814  }
3815  out << "label: \"" << this->getObjectLabel () << "\"" << endl
3816  << "numRows: " << getGlobalLength () << endl
3817  << "numCols: " << getNumVectors () << endl
3818  << "isConstantStride: " << isConstantStride () << endl;
3819  if (isConstantStride ()) {
3820  out << "columnStride: " << getStride () << endl;
3821  }
3822  }
3823  for (int imageCtr = 0; imageCtr < numImages; ++imageCtr) {
3824  if (myImageID == imageCtr) {
3825  if (vl != VERB_LOW) {
3826  // At verbosity > VERB_LOW, each process prints something.
3827  out << "Process " << myImageID << ":" << endl;
3828  Teuchos::OSTab tab2 (out);
3829 
3830  // >= VERB_MEDIUM: print the local vector length.
3831  out << "localNumRows: " << getLocalLength() << endl
3832  << "isConstantStride: " << isConstantStride () << endl;
3833  if (vl != VERB_MEDIUM) {
3834  // >= VERB_HIGH: print isConstantStride() and getStride()
3835  if (isConstantStride()) {
3836  out << "columnStride: " << getStride() << endl;
3837  }
3838  if (vl == VERB_EXTREME) {
3839  // VERB_EXTREME: print all the values in the multivector.
3840  out << "values: " << endl;
3841  typename dual_view_type::t_host X = this->getDualView ().h_view;
3842  out << "[";
3843  for (size_t i = 0; i < getLocalLength (); ++i) {
3844  for (size_t j = 0; j < getNumVectors (); ++j) {
3845  const size_t col = isConstantStride () ? j : whichVectors_[j];
3846  out << X(i,col);
3847  if (j + 1 < getNumVectors ()) {
3848  out << ", ";
3849  }
3850  } // for each column
3851  if (i + 1 < getLocalLength ()) {
3852  out << "; ";
3853  }
3854  } // for each row
3855  out << "]" << endl;
3856  } // if vl == VERB_EXTREME
3857  } // if (vl != VERB_MEDIUM)
3858  else { // vl == VERB_LOW
3859  out << endl;
3860  }
3861  } // if vl != VERB_LOW
3862  } // if it is my process' turn to print
3863  comm->barrier ();
3864  } // for each process in the communicator
3865  } // if vl != VERB_NONE
3866  }
3867 
3868 #if TPETRA_USE_KOKKOS_DISTOBJECT
3869  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
3870  void
3872  createViews () const
3873  {
3874  // Do nothing in Kokkos::View implementation
3875  }
3876 
3877  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
3878  void
3880  createViewsNonConst (KokkosClassic::ReadWriteOption rwo)
3881  {
3882  // Do nothing in Kokkos::View implementation
3883  }
3884 
3885  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
3886  void
3888  releaseViews () const
3889  {
3890  // Do nothing in Kokkos::View implementation
3891  }
3892 
3893 #else // NOT TPETRA_USE_KOKKOS_DISTOBJECT
3894 
3895  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
3896  void
3899  {}
3900 
3901  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
3902  void
3904  createViewsNonConst (KokkosClassic::ReadWriteOption /* rwo */ )
3905  {}
3906 
3907  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
3908  void
3911  {}
3912 
3913 #endif // TPETRA_USE_KOKKOS_DISTOBJECT
3914 
3915  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
3916  void
3918  removeEmptyProcessesInPlace (const Teuchos::RCP<const map_type>& newMap)
3919  {
3920  replaceMap (newMap);
3921  }
3922 
3923  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
3924  void
3927  {
3928  using Kokkos::parallel_for;
3929  typedef LocalOrdinal LO;
3930  typedef device_type DT;
3931  typedef typename dual_view_type::host_mirror_space::device_type HMDT;
3932  const bool debug = false;
3933 
3934  TEUCHOS_TEST_FOR_EXCEPTION(
3935  this->getGlobalLength () != src.getGlobalLength () ||
3936  this->getNumVectors () != src.getNumVectors (), std::invalid_argument,
3937  "Tpetra::deep_copy: Global dimensions of the two Tpetra::MultiVector "
3938  "objects do not match. src has dimensions [" << src.getGlobalLength ()
3939  << "," << src.getNumVectors () << "], and *this has dimensions ["
3940  << this->getGlobalLength () << "," << this->getNumVectors () << "].");
3941  // FIXME (mfh 28 Jul 2014) Don't throw; just set a local error flag.
3942  TEUCHOS_TEST_FOR_EXCEPTION(
3943  this->getLocalLength () != src.getLocalLength (), std::invalid_argument,
3944  "Tpetra::deep_copy: The local row counts of the two Tpetra::MultiVector "
3945  "objects do not match. src has " << src.getLocalLength () << " row(s) "
3946  << " and *this has " << this->getLocalLength () << " row(s).");
3947 
3948  if (debug && this->getMap ()->getComm ()->getRank () == 0) {
3949  std::cout << "*** MultiVector::assign: ";
3950  }
3951 
3952  if (src.isConstantStride () && this->isConstantStride ()) {
3953  if (debug && this->getMap ()->getComm ()->getRank () == 0) {
3954  std::cout << "Both *this and src have constant stride" << std::endl;
3955  }
3956 
3957  if (src.getDualView ().modified_device >= src.getDualView ().modified_host) {
3958  // Device memory has the most recent version of src.
3959  this->template modify<DT> (); // We are about to modify dst on device.
3960  // Copy from src to dst on device.
3961  Details::localDeepCopyConstStride (this->getDualView ().template view<DT> (),
3962  src.getDualView ().template view<DT> ());
3963  this->template sync<HMDT> (); // Sync dst from device to host.
3964  }
3965  else { // Host memory has the most recent version of src.
3966  this->template modify<HMDT> (); // We are about to modify dst on host.
3967  // Copy from src to dst on host.
3968  Details::localDeepCopyConstStride (this->getDualView ().template view<HMDT> (),
3969  src.getDualView ().template view<HMDT> ());
3970  this->template sync<DT> (); // Sync dst from host to device.
3971  }
3972  }
3973  else {
3974  if (this->isConstantStride ()) {
3975  if (debug && this->getMap ()->getComm ()->getRank () == 0) {
3976  std::cout << "Only *this has constant stride";
3977  }
3978 
3979  const LO numWhichVecs = static_cast<LO> (src.whichVectors_.size ());
3980  const std::string whichVecsLabel ("MV::deep_copy::whichVecs");
3981 
3982  // We can't sync src, since it is only an input argument.
3983  // Thus, we have to use the most recently modified version of
3984  // src, device or host.
3985  if (src.getDualView ().modified_device >= src.getDualView ().modified_host) {
3986  if (debug && this->getMap ()->getComm ()->getRank () == 0) {
3987  std::cout << "; Copy from device version of src" << std::endl;
3988  }
3989  // Copy from the device version of src.
3990  //
3991  // whichVecs tells the kernel which vectors (columns) of src
3992  // to copy. Fill whichVecs on the host, and sync to device.
3993  typedef Kokkos::DualView<LO*, DT> whichvecs_type;
3994  whichvecs_type srcWhichVecs (whichVecsLabel, numWhichVecs);
3995  srcWhichVecs.template modify<HMDT> ();
3996  for (LO i = 0; i < numWhichVecs; ++i) {
3997  srcWhichVecs.h_view(i) = static_cast<LO> (src.whichVectors_[i]);
3998  }
3999  // Sync the host version of srcWhichVecs to the device.
4000  srcWhichVecs.template sync<DT> ();
4001 
4002  // Mark the device version of dst's DualView as modified.
4003  this->template modify<DT> ();
4004 
4005  // Copy from the selected vectors of src to dst, on the
4006  // device. The function ignores its dstWhichVecs argument
4007  // in this case.
4008  Details::localDeepCopy (this->getDualView ().template view<DT> (),
4009  src.getDualView ().template view<DT> (),
4010  true, false, srcWhichVecs.d_view, srcWhichVecs.d_view);
4011  // Sync *this' DualView to the host. This is cheaper than
4012  // repeating the above copy from src to *this on the host.
4013  this->template sync<HMDT> ();
4014  }
4015  else { // host version of src was the most recently modified
4016  if (debug && this->getMap ()->getComm ()->getRank () == 0) {
4017  std::cout << "; Copy from host version of src" << std::endl;
4018  }
4019  // Copy from the host version of src.
4020  //
4021  // whichVecs tells the kernel which vectors (columns) of src
4022  // to copy. Fill whichVecs on the host, and use it there.
4023  typedef Kokkos::View<LO*, HMDT> whichvecs_type;
4024  whichvecs_type srcWhichVecs (whichVecsLabel, numWhichVecs);
4025  for (LO i = 0; i < numWhichVecs; ++i) {
4026  srcWhichVecs(i) = static_cast<LO> (src.whichVectors_[i]);
4027  }
4028  // Copy from the selected vectors of src to dst, on the
4029  // host. The function ignores its dstWhichVecs argument in
4030  // this case.
4031  Details::localDeepCopy (this->getDualView ().template view<HMDT> (),
4032  src.getDualView ().template view<HMDT> (),
4033  true, false, srcWhichVecs, srcWhichVecs);
4034  // Sync dst back to the device, since we only copied on the host.
4035  this->template sync<DT> ();
4036  }
4037  }
4038  else { // dst is NOT constant stride
4039  if (src.isConstantStride ()) {
4040  if (debug && this->getMap ()->getComm ()->getRank () == 0) {
4041  std::cout << "Only src has constant stride" << std::endl;
4042  }
4043 
4044  if (src.getDualView ().modified_device >= src.getDualView ().modified_host) {
4045  // Copy from the device version of src.
4046  //
4047  // whichVecs tells the kernel which vectors (columns) of dst
4048  // to copy. Fill whichVecs on the host, and sync to device.
4049  typedef Kokkos::DualView<LO*, DT> whichvecs_type;
4050  const std::string whichVecsLabel ("MV::deep_copy::whichVecs");
4051  const LO numWhichVecs = static_cast<LO> (this->whichVectors_.size ());
4052  whichvecs_type whichVecs (whichVecsLabel, numWhichVecs);
4053  whichVecs.template modify<HMDT> ();
4054  for (LO i = 0; i < numWhichVecs; ++i) {
4055  whichVecs.h_view(i) = this->whichVectors_[i];
4056  }
4057  // Sync the host version of whichVecs to the device.
4058  whichVecs.template sync<DT> ();
4059 
4060  // Copy src to the selected vectors of dst, on the device.
4061  Details::localDeepCopy (this->getDualView ().template view<DT> (),
4062  src.getDualView ().template view<DT> (),
4063  this->isConstantStride (), src.isConstantStride (),
4064  whichVecs.d_view, whichVecs.d_view);
4065  // We can't sync src and repeat the above copy on the
4066  // host, so sync dst back to the host.
4067  //
4068  // FIXME (mfh 29 Jul 2014) This may overwrite columns that
4069  // don't actually belong to dst's view.
4070  this->template sync<HMDT> ();
4071  }
4072  else { // host version of src was the most recently modified
4073  // Copy from the host version of src.
4074  //
4075  // whichVecs tells the kernel which vectors (columns) of src
4076  // to copy. Fill whichVecs on the host, and use it there.
4077  typedef Kokkos::View<LO*, HMDT> whichvecs_type;
4078  const LO numWhichVecs = static_cast<LO> (this->whichVectors_.size ());
4079  whichvecs_type whichVecs ("MV::deep_copy::whichVecs", numWhichVecs);
4080  for (LO i = 0; i < numWhichVecs; ++i) {
4081  whichVecs(i) = static_cast<LO> (this->whichVectors_[i]);
4082  }
4083  // Copy from src to the selected vectors of dst, on the
4084  // host. The functor ignores its 4th arg in this case.
4085  Details::localDeepCopy (this->getDualView ().template view<HMDT> (),
4086  src.getDualView ().template view<HMDT> (),
4087  this->isConstantStride (), src.isConstantStride (),
4088  whichVecs, whichVecs);
4089  // Sync dst back to the device, since we only copied on the host.
4090  //
4091  // FIXME (mfh 29 Jul 2014) This may overwrite columns that
4092  // don't actually belong to dst's view.
4093  this->template sync<DT> ();
4094  }
4095  }
4096  else { // neither src nor dst have constant stride
4097  if (debug && this->getMap ()->getComm ()->getRank () == 0) {
4098  std::cout << "Neither *this nor src has constant stride" << std::endl;
4099  }
4100 
4101  if (src.getDualView ().modified_device >= src.getDualView ().modified_host) {
4102  // Copy from the device version of src.
4103  //
4104  // whichVectorsDst tells the kernel which vectors
4105  // (columns) of dst to copy. Fill it on the host, and
4106  // sync to device.
4107  const LO dstNumWhichVecs = static_cast<LO> (this->whichVectors_.size ());
4108  Kokkos::DualView<LO*, DT> whichVecsDst ("MV::deep_copy::whichVecsDst",
4109  dstNumWhichVecs);
4110  whichVecsDst.template modify<HMDT> ();
4111  for (LO i = 0; i < dstNumWhichVecs; ++i) {
4112  whichVecsDst.h_view(i) = static_cast<LO> (this->whichVectors_[i]);
4113  }
4114  // Sync the host version of whichVecsDst to the device.
4115  whichVecsDst.template sync<DT> ();
4116 
4117  // whichVectorsSrc tells the kernel which vectors
4118  // (columns) of src to copy. Fill it on the host, and
4119  // sync to device. Use the destination MultiVector's
4120  // LocalOrdinal type here.
4121  const LO srcNumWhichVecs = static_cast<LO> (src.whichVectors_.size ());
4122  Kokkos::DualView<LO*, DT> whichVecsSrc ("MV::deep_copy::whichVecsSrc",
4123  srcNumWhichVecs);
4124  whichVecsSrc.template modify<HMDT> ();
4125  for (LO i = 0; i < srcNumWhichVecs; ++i) {
4126  whichVecsSrc.h_view(i) = static_cast<LO> (src.whichVectors_[i]);
4127  }
4128  // Sync the host version of whichVecsSrc to the device.
4129  whichVecsSrc.template sync<DT> ();
4130 
4131  // Copy from the selected vectors of src to the selected
4132  // vectors of dst, on the device.
4133  Details::localDeepCopy (this->getDualView ().template view<DT> (),
4134  src.getDualView ().template view<DT> (),
4135  this->isConstantStride (), src.isConstantStride (),
4136  whichVecsDst.d_view, whichVecsSrc.d_view);
4137  }
4138  else {
4139  const LO dstNumWhichVecs = static_cast<LO> (this->whichVectors_.size ());
4140  Kokkos::View<LO*, HMDT> whichVectorsDst ("dstWhichVecs", dstNumWhichVecs);
4141  for (LO i = 0; i < dstNumWhichVecs; ++i) {
4142  whichVectorsDst(i) = this->whichVectors_[i];
4143  }
4144 
4145  // Use the destination MultiVector's LocalOrdinal type here.
4146  const LO srcNumWhichVecs = static_cast<LO> (src.whichVectors_.size ());
4147  Kokkos::View<LO*, HMDT> whichVectorsSrc ("srcWhichVecs", srcNumWhichVecs);
4148  for (LO i = 0; i < srcNumWhichVecs; ++i) {
4149  whichVectorsSrc(i) = src.whichVectors_[i];
4150  }
4151 
4152  // Copy from the selected vectors of src to the selected
4153  // vectors of dst, on the host.
4154  Details::localDeepCopy (this->getDualView ().template view<HMDT> (),
4155  src.getDualView ().template view<HMDT> (),
4156  this->isConstantStride (), src.isConstantStride (),
4157  whichVectorsDst, whichVectorsSrc);
4158 
4159  // We can't sync src and repeat the above copy on the
4160  // host, so sync dst back to the host.
4161  //
4162  // FIXME (mfh 29 Jul 2014) This may overwrite columns that
4163  // don't actually belong to dst's view.
4164  this->template sync<HMDT> ();
4165  }
4166  }
4167  }
4168  }
4169  }
4170 
4171  template <class Scalar, class LO, class GO, class NT, const bool classic>
4172  Teuchos::RCP<MultiVector<Scalar, LO, GO, NT, classic> >
4173  createMultiVector (const Teuchos::RCP<const Map<LO, GO, NT> >& map,
4174  size_t numVectors)
4175  {
4177  return Teuchos::rcp (new MV (map, numVectors));
4178  }
4179 
4180  template <class ST, class LO, class GO, class NT, const bool classic>
4183  {
4185  MV cpy (src.getMap (), src.getNumVectors (), false);
4186  cpy.assign (src);
4187  return cpy;
4188  }
4189 
4190 } // namespace Tpetra
4191 
4192 //
4193 // Explicit instantiation macro
4194 //
4195 // Must be expanded from within the Tpetra namespace!
4196 //
4197 
4198 #define TPETRA_MULTIVECTOR_INSTANT(SCALAR,LO,GO,NODE) \
4199  template class MultiVector< SCALAR , LO , GO , NODE >; \
4200  template MultiVector< SCALAR , LO , GO , NODE > createCopy( const MultiVector< SCALAR , LO , GO , NODE >& src); \
4201  template Teuchos::RCP<MultiVector< SCALAR , LO , GO , NODE > > createMultiVector (const Teuchos::RCP<const Map<LO, GO, NODE> >& map, size_t numVectors);
4202 
4203 #endif // TPETRA_MULTIVECTOR_DEF_HPP
void scale(const Scalar &alpha)
Scale in place: this = alpha*this.
size_t getLocalLength() const
Local number of rows on the calling process.
void sumIntoLocalValue(LocalOrdinal localRow, size_t col, const impl_scalar_type &value)
Add value to existing value, using local (row) index.
Namespace Tpetra contains the class and methods constituting the Tpetra library.
void get1dCopy(const Teuchos::ArrayView< Scalar > &A, const size_t LDA) const
Fill the given array with a copy of this multivector&#39;s local values.
Teuchos::RCP< MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node, classic > > subViewNonConst(const Teuchos::Range1D &colRng)
Return a MultiVector with views of selected columns.
void update(const Scalar &alpha, const MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node, classic > &A, const Scalar &beta)
Update: this = beta*this + alpha*A.
void sumIntoGlobalValue(GlobalOrdinal globalRow, size_t col, const impl_scalar_type &value)
Add value to existing value, using global (row) index.
friend void deep_copy(MultiVector< DS, DL, DG, DN, dstClassic > &dst, const MultiVector< SS, SL, SG, SN, srcClassic > &src)
Copy the contents of the MultiVector src into dst.
Teuchos::ArrayRCP< const Scalar > get1dView() const
Const persisting (1-D) view of this multivector&#39;s local values.
virtual void removeEmptyProcessesInPlace(const Teuchos::RCP< const map_type > &newMap)
Remove processes owning zero rows from the Map and their communicator.
MultiVector< ST, LO, GO, NT, classic > createCopy(const MultiVector< ST, LO, GO, NT, classic > &src)
Return a deep copy of the given MultiVector.
void replaceLocalValue(LocalOrdinal localRow, size_t col, const impl_scalar_type &value)
Replace value, using local (row) index.
void normInf(const Kokkos::View< mag_type *, device_type > &norms) const
Compute the infinity-norm of each vector (column), storing the result in a device view...
void abs(const MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node, classic > &A)
Put element-wise absolute values of input Multi-vector in target: A = abs(this)
Teuchos::Array< size_t > whichVectors_
Indices of columns this multivector is viewing.
One or more distributed dense vectors.
Teuchos::ArrayRCP< Scalar > getDataNonConst(size_t j)
View of the local values in a particular vector of this multivector.
Teuchos::ArrayRCP< T > getSubArrayRCP(Teuchos::ArrayRCP< T > arr, size_t j) const
Persisting view of j-th column in the given ArrayRCP.
void deep_copy(MultiVector< DS, DL, DG, DN, dstClassic > &dst, const MultiVector< SS, SL, SG, SN, srcClassic > &src)
Copy the contents of the MultiVector src into dst.
void get2dCopy(const Teuchos::ArrayView< const Teuchos::ArrayView< Scalar > > &ArrayOfPtrs) const
Fill the given array with a copy of this multivector&#39;s local values.
Node::device_type device_type
The Kokkos device type.
Teuchos::RCP< MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node, classic > > offsetViewNonConst(const Teuchos::RCP< const map_type > &subMap, const size_t offset)
Return a nonconst view of a subset of rows.
size_t getOrigNumLocalCols() const
"Original" number of columns in the (local) data.
void randomize()
Set all values in the multivector to pseudorandom numbers.
bool isConstantStride() const
Whether this multivector has constant stride between columns.
virtual bool checkSizes(const SrcDistObject &sourceObj)
Whether data redistribution between sourceObj and this object is legal.
dual_view_type view_
The Kokkos::DualView containing the MultiVector&#39;s data.
Kokkos::Details::InnerProductSpaceTraits< impl_scalar_type >::dot_type dot_type
Type of an inner ("dot") product result.
void norm1(const Kokkos::View< mag_type *, device_type > &norms) const
Compute the one-norm of each vector (column), storing the result in a device view.
Teuchos::RCP< const Vector< Scalar, LocalOrdinal, GlobalOrdinal, Node, classic > > getVector(const size_t j) const
Return a Vector which is a const view of column j.
Teuchos::RCP< MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node, classic > > createMultiVector(const Teuchos::RCP< const Map< LocalOrdinal, GlobalOrdinal, Node > > &map, const size_t numVectors)
Nonmember MultiVector "constructor": Create a MultiVector from a given Map.
void replaceMap(const Teuchos::RCP< const map_type > &map)
Replace the underlying Map in place.
void meanValue(const Teuchos::ArrayView< impl_scalar_type > &means) const
Compute mean (average) value of each column.
void reduce()
Sum values of a locally replicated multivector across all processes.
size_t global_size_t
Global size_t object.
Teuchos::ArrayRCP< Teuchos::ArrayRCP< const Scalar > > get2dView() const
Return const persisting pointers to values.
EWhichNormImpl
Input argument for localNormImpl() (which see).
void replaceGlobalValue(GlobalOrdinal globalRow, size_t col, const impl_scalar_type &value)
Replace value, using global (row) index.
void norm2(const Kokkos::View< mag_type *, device_type > &norms) const
Compute the two-norm of each vector (column), storing the result in a device view.
Insert new values that don&#39;t currently exist.
void createViews() const
Hook for creating a const view.
dual_view_type getDualView() const
Get the Kokkos::DualView which implements local storage.
virtual void describe(Teuchos::FancyOStream &out, const Teuchos::EVerbosityLevel verbLevel=Teuchos::Describable::verbLevel_default) const
Print the object with the given verbosity level to a FancyOStream.
Teuchos::RCP< const MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node, classic > > offsetView(const Teuchos::RCP< const map_type > &subMap, const size_t offset) const
Return a const view of a subset of rows.
Node::execution_space execution_space
Type of the (new) Kokkos execution space.
void normWeighted(const MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node, classic > &weights, const Teuchos::ArrayView< mag_type > &norms) const
void multiply(Teuchos::ETransp transA, Teuchos::ETransp transB, const Scalar &alpha, const MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node, classic > &A, const MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node, classic > &B, const Scalar &beta)
Matrix-matrix multiplication: this = beta*this + alpha*op(A)*op(B).
Sets up and executes a communication plan for a Tpetra DistObject.
CombineMode
Rule for combining data in an Import or Export.
Sum new values into existing values.
MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node, classic > & operator=(const MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node, classic > &source)
Assign the contents of source to this multivector (deep copy).
Teuchos::RCP< const map_type > map_
The Map over which this object is distributed.
void createViewsNonConst(KokkosClassic::ReadWriteOption rwo)
Hook for creating a nonconst view.
bool isDistributed() const
Whether this is a globally distributed object.
Teuchos::ArrayRCP< const Scalar > getData(size_t j) const
Const view of the local values in a particular vector of this multivector.
Replace old value with maximum of magnitudes of old and new values.
Abstract base class for objects that can be the source of an Import or Export operation.
Teuchos::ArrayRCP< Scalar > get1dViewNonConst()
Nonconst persisting (1-D) view of this multivector&#39;s local values.
Replace existing values with new values.
size_t getStride() const
Stride between columns in the multivector.
void reciprocal(const MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node, classic > &A)
Put element-wise reciprocal values of input Multi-vector in target, this(i,j) = 1/A(i,j).
KokkosClassic::MultiVector< Scalar, Node > getLocalMV() const
A view of the underlying KokkosClassic::MultiVector object.
void putScalar(const Scalar &value)
Set all values in the multivector with the given value.
global_size_t getGlobalLength() const
Global number of rows in the multivector.
Kokkos::Details::ArithTraits< Scalar >::val_type impl_scalar_type
The type used internally in place of Scalar.
virtual ~MultiVector()
Destructor (virtual for memory safety of derived classes).
size_t getOrigNumLocalRows() const
"Original" number of rows in the (local) data.
void releaseViews() const
Hook for releasing views.
Kokkos::Details::ArithTraits< impl_scalar_type >::mag_type mag_type
Type of a norm result.
Kokkos::DualView< impl_scalar_type **, Kokkos::LayoutLeft, typename execution_space::execution_space > dual_view_type
Kokkos::DualView specialization used by this class.
Describes a parallel distribution of objects over processes.
Class that provides GEMM for a particular Kokkos Device.
A distributed dense vector.
Stand-alone utility functions and macros.
virtual std::string description() const
A simple one-line description of this object.
void assign(const MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node, classic > &src)
Copy the contents of src into *this (deep copy).
size_t getNumVectors() const
Number of columns in the multivector.
MultiVector()
Default constructor: makes a MultiVector with no rows or columns.
Teuchos::ArrayRCP< Teuchos::ArrayRCP< Scalar > > get2dViewNonConst()
Return non-const persisting pointers to values.
EWhichNorm
Input argument for normImpl() (which see).
virtual Teuchos::RCP< const map_type > getMap() const
The Map describing the parallel distribution of this object.
virtual size_t constantNumberOfPackets() const
Number of packets to send per LID.
void dot(const MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node, classic > &A, const Teuchos::ArrayView< dot_type > &dots) const
Compute the dot product of each corresponding pair of vectors (columns) in A and B.
Teuchos::RCP< Vector< Scalar, LocalOrdinal, GlobalOrdinal, Node, classic > > getVectorNonConst(const size_t j)
Return a Vector which is a nonconst view of column j.
Teuchos::RCP< const MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node, classic > > subView(const Teuchos::Range1D &colRng) const
Return a const MultiVector with const views of selected columns.
void elementWiseMultiply(Scalar scalarAB, const Vector< Scalar, LocalOrdinal, GlobalOrdinal, Node, classic > &A, const MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node, classic > &B, Scalar scalarThis)
Multiply a Vector A elementwise by a MultiVector B.
Teuchos::RCP< MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node, classic > > subCopy(const Teuchos::Range1D &colRng) const
Return a MultiVector with copies of selected columns.
void normImpl(const Kokkos::View< mag_type *, device_type > &norms, const EWhichNorm whichNorm) const
Compute the norm of each vector (column), storing the result in a device View.
dual_view_type origView_
The "original view" of the MultiVector&#39;s data.