Tpetra parallel linear algebra  Version of the Day
Tpetra_CrsMatrix_def.hpp
1 // @HEADER
2 // ***********************************************************************
3 //
4 // Tpetra: Templated Linear Algebra Services Package
5 // Copyright (2008) Sandia Corporation
6 //
7 // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
8 // the U.S. Government retains certain rights in this software.
9 //
10 // Redistribution and use in source and binary forms, with or without
11 // modification, are permitted provided that the following conditions are
12 // met:
13 //
14 // 1. Redistributions of source code must retain the above copyright
15 // notice, this list of conditions and the following disclaimer.
16 //
17 // 2. Redistributions in binary form must reproduce the above copyright
18 // notice, this list of conditions and the following disclaimer in the
19 // documentation and/or other materials provided with the distribution.
20 //
21 // 3. Neither the name of the Corporation nor the names of the
22 // contributors may be used to endorse or promote products derived from
23 // this software without specific prior written permission.
24 //
25 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
26 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
29 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
30 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
31 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
32 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
33 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
34 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
35 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36 //
37 // Questions? Contact Michael A. Heroux (maherou@sandia.gov)
38 //
39 // ************************************************************************
40 // @HEADER
41 
42 #ifndef TPETRA_CRSMATRIX_DEF_HPP
43 #define TPETRA_CRSMATRIX_DEF_HPP
44 
52 
53 #include "Tpetra_RowMatrix.hpp"
54 #include "Tpetra_Import_Util.hpp"
55 #include "Tpetra_Import_Util2.hpp"
56 #include "Tpetra_Util.hpp"
57 #include "Teuchos_SerialDenseMatrix.hpp"
58 #include "Teuchos_as.hpp"
59 #include "Teuchos_ArrayRCP.hpp"
60 #include <typeinfo>
61 
62 // CrsMatrix relies on template methods implemented in Tpetra_CrsGraph_def.hpp
63 //
64 // FIXME (mfh 01 Oct 2015) If I comment this out, I get link errors
65 // for CrsGraph's templated methods, even though supposedly the ETI
66 // for CrsGraph instantiates them.
67 #include "Tpetra_CrsGraph_def.hpp"
68 
69 namespace Tpetra {
70  //
71  // Users must never rely on anything in the Details namespace.
72  //
73  namespace Details {
83  template<class Scalar>
84  struct AbsMax {
86  Scalar operator() (const Scalar& x, const Scalar& y) {
87  typedef Teuchos::ScalarTraits<Scalar> STS;
88  return std::max (STS::magnitude (x), STS::magnitude (y));
89  }
90  };
91 
103  template <class Ordinal, class Scalar>
104  struct CrsIJV {
110  CrsIJV () :
111  i (Teuchos::OrdinalTraits<Ordinal>::invalid ()),
112  j (Teuchos::OrdinalTraits<Ordinal>::invalid ()),
113  v (Teuchos::ScalarTraits<Scalar>::zero ())
114  {}
115 
121  CrsIJV (Ordinal row, Ordinal col, const Scalar &val) :
122  i (row), j (col), v (val)
123  {}
124 
130  bool operator< (const CrsIJV<Ordinal, Scalar>& rhs) const {
131  // FIXME (mfh 10 May 2013): This is what I found when I moved
132  // this operator out of the std namespace to be an instance
133  // method of CrsIJV. It's a little odd to me that it doesn't
134  // include the column index in the sort order (for the usual
135  // lexicographic sort). It doesn't really matter because
136  // CrsMatrix will sort rows by column index anyway, but it's
137  // still odd.
138  return this->i < rhs.i;
139  }
140 
141  Ordinal i;
142  Ordinal j;
143  Scalar v;
144  };
145 
146  } // namespace Details
147 } // namespace Tpetra
148 
149 namespace Teuchos {
150  // SerializationTraits specialization for Tpetra::Details::CrsIJV.
151  //
152  // Tpetra::Details::CrsIJV can be serialized using
153  // DirectSerialization. This lets Comm send and receive instances
154  // of this class.
155  //
156  // NOTE (mfh 16 Dec 2012): This won't work if Scalar does not
157  // support direct serialization ("just taking the address"). The
158  // usual Scalar types (float, double, dd_real, qd_real, or
159  // std::complex<T> for any of these types) _do_ support direct
160  // serialization.
161  template <typename Ordinal, typename Scalar>
162  class SerializationTraits<int, Tpetra::Details::CrsIJV<Ordinal, Scalar> >
163  : public DirectSerializationTraits<int, Tpetra::Details::CrsIJV<Ordinal, Scalar> >
164  {};
165 } // namespace Teuchos
166 
167 namespace Tpetra {
168 
169  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
171  CrsMatrix (const Teuchos::RCP<const map_type>& rowMap,
172  size_t maxNumEntriesPerRow,
173  ProfileType pftype,
174  const RCP<Teuchos::ParameterList>& params) :
175  dist_object_type (rowMap),
176  storageStatus_ (pftype == StaticProfile ?
177  Details::STORAGE_1D_UNPACKED :
178  Details::STORAGE_2D),
179  fillComplete_ (false),
180  frobNorm_ (-STM::one ())
181  {
182  using Teuchos::rcp;
183  try {
184  myGraph_ = rcp (new crs_graph_type (rowMap, maxNumEntriesPerRow,
185  pftype, params));
186  }
187  catch (std::exception& e) {
188  TEUCHOS_TEST_FOR_EXCEPTION(
189  true, std::runtime_error, "Tpetra::CrsMatrix constructor: Caught "
190  "exception while allocating CrsGraph: " << e.what ());
191  }
192  staticGraph_ = myGraph_;
193  resumeFill (params);
194  checkInternalState ();
195  }
196 
197  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
199  CrsMatrix (const Teuchos::RCP<const map_type>& rowMap,
200  const Teuchos::ArrayRCP<const size_t>& NumEntriesPerRowToAlloc,
201  ProfileType pftype,
202  const Teuchos::RCP<Teuchos::ParameterList>& params) :
203  dist_object_type (rowMap),
204  storageStatus_ (pftype == StaticProfile ?
205  Details::STORAGE_1D_UNPACKED :
206  Details::STORAGE_2D),
207  fillComplete_ (false),
208  frobNorm_ (-STM::one ())
209  {
210  using Teuchos::rcp;
211  try {
212  myGraph_ = rcp (new Graph (rowMap, NumEntriesPerRowToAlloc, pftype, params));
213  }
214  catch (std::exception &e) {
215  TEUCHOS_TEST_FOR_EXCEPTION(
216  true, std::runtime_error, "Tpetra::CrsMatrix constructor: Caught "
217  "exception while allocating CrsGraph: " << e.what ());
218  }
219  staticGraph_ = myGraph_;
220  resumeFill (params);
222  }
223 
224  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
226  CrsMatrix (const Teuchos::RCP<const map_type>& rowMap,
227  const Teuchos::RCP<const map_type>& colMap,
228  size_t maxNumEntriesPerRow,
229  ProfileType pftype,
230  const Teuchos::RCP<Teuchos::ParameterList>& params) :
231  dist_object_type (rowMap),
232  storageStatus_ (pftype == StaticProfile ?
233  Details::STORAGE_1D_UNPACKED :
234  Details::STORAGE_2D),
235  fillComplete_ (false),
236  frobNorm_ (-STM::one ())
237  {
238  using Teuchos::rcp;
239  TEUCHOS_TEST_FOR_EXCEPTION(! staticGraph_.is_null(), std::logic_error,
240  "Tpetra::CrsMatrix ctor (row Map, col Map, maxNumEntriesPerRow, ...): "
241  "staticGraph_ is not null at the beginning of the constructor. "
242  "Please report this bug to the Tpetra developers.");
243  TEUCHOS_TEST_FOR_EXCEPTION(! myGraph_.is_null(), std::logic_error,
244  "Tpetra::CrsMatrix ctor (row Map, col Map, maxNumEntriesPerRow, ...): "
245  "myGraph_ is not null at the beginning of the constructor. "
246  "Please report this bug to the Tpetra developers.");
247  try {
248  myGraph_ = rcp (new Graph (rowMap, colMap, maxNumEntriesPerRow,
249  pftype, params));
250  }
251  catch (std::exception &e) {
252  TEUCHOS_TEST_FOR_EXCEPTION(
253  true, std::runtime_error, "Tpetra::CrsMatrix constructor: Caught "
254  "exception while allocating CrsGraph: " << e.what ());
255  }
256  staticGraph_ = myGraph_;
257  resumeFill (params);
259  }
260 
261  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
263  CrsMatrix (const Teuchos::RCP<const map_type>& rowMap,
264  const Teuchos::RCP<const map_type>& colMap,
265  const Teuchos::ArrayRCP<const size_t>& numEntPerRow,
266  ProfileType pftype,
267  const Teuchos::RCP<Teuchos::ParameterList>& params) :
268  dist_object_type (rowMap),
269  storageStatus_ (pftype == StaticProfile ?
270  Details::STORAGE_1D_UNPACKED :
271  Details::STORAGE_2D),
272  fillComplete_ (false),
273  frobNorm_ (-STM::one ())
274  {
275  using Teuchos::rcp;
276  try {
277  myGraph_ = rcp (new Graph (rowMap, colMap, numEntPerRow, pftype, params));
278  }
279  catch (std::exception &e) {
280  TEUCHOS_TEST_FOR_EXCEPTION(
281  true, std::runtime_error, "Tpetra::CrsMatrix constructor: Caught "
282  "exception while allocating CrsGraph: " << e.what ());
283  }
284  staticGraph_ = myGraph_;
285  resumeFill (params);
287  }
288 
289  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
291  CrsMatrix (const Teuchos::RCP<const crs_graph_type>& graph,
292  const Teuchos::RCP<Teuchos::ParameterList>& params) :
293  dist_object_type (graph->getRowMap ()),
294  staticGraph_ (graph),
295  storageStatus_ (Details::STORAGE_1D_PACKED),
296  fillComplete_ (false),
297  frobNorm_ (-STM::one ())
298  {
299  const char tfecfFuncName[] = "CrsMatrix(graph[,params])";
300  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(staticGraph_.is_null (),
301  std::runtime_error, ": When calling the CrsMatrix constructor that "
302  "accepts a static graph, the pointer to the graph must not be null.");
303  // We prohibit the case where the graph is not yet filled.
304  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC( ! staticGraph_->isFillComplete (),
305  std::runtime_error, ": The specified graph is not fill-complete. You "
306  "must invoke fillComplete() on the graph before using it to construct a "
307  "CrsMatrix. Note that calling resumeFill() makes the graph not fill-"
308  "complete, even if you had previously called fillComplete(). In that "
309  "case, you must call fillComplete() on the graph again.");
310  // the graph has entries, and the matrix should have entries as well, set to zero. no need or point in lazy allocating in this case.
311  // first argument LocalIndices is ignored; the graph is already allocated (local or global, we don't care here)
312  allocateValues (LocalIndices, GraphAlreadyAllocated);
313  resumeFill (params);
315  }
316 
317  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
319  CrsMatrix (const Teuchos::RCP<const map_type>& rowMap,
320  const Teuchos::RCP<const map_type>& colMap,
321  const typename local_matrix_type::row_map_type& rowPointers,
322  const typename local_graph_type::entries_type::non_const_type& columnIndices,
323  const typename local_matrix_type::values_type& values,
324  const Teuchos::RCP<Teuchos::ParameterList>& params) :
325  dist_object_type (rowMap),
326  storageStatus_ (Details::STORAGE_1D_PACKED),
327  fillComplete_ (false),
328  frobNorm_ (-STM::one ())
329  {
330  using Teuchos::rcp;
331  try {
332  myGraph_ = rcp (new Graph (rowMap, colMap, rowPointers,
333  columnIndices, params));
334  }
335  catch (std::exception &e) {
336  TEUCHOS_TEST_FOR_EXCEPTION(
337  true, std::runtime_error, "Tpetra::CrsMatrix constructor: Caught "
338  "exception while allocating CrsGraph: " << e.what ());
339  }
340  staticGraph_ = myGraph_;
341  k_values1D_ = values;
342  resumeFill (params);
344  }
345 
346  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
348  CrsMatrix (const Teuchos::RCP<const map_type>& rowMap,
349  const Teuchos::RCP<const map_type>& colMap,
350  const Teuchos::ArrayRCP<size_t> & rowPointers,
351  const Teuchos::ArrayRCP<LocalOrdinal> & columnIndices,
352  const Teuchos::ArrayRCP<Scalar> & values,
353  const Teuchos::RCP<Teuchos::ParameterList>& params) :
354  dist_object_type (rowMap),
355  storageStatus_ (Details::STORAGE_1D_PACKED),
356  fillComplete_ (false),
357  frobNorm_ (-STM::one ())
358  {
359  using Teuchos::rcp;
360  try {
361  myGraph_ = rcp (new Graph (rowMap, colMap, rowPointers,
362  columnIndices, params));
363  }
364  catch (std::exception &e) {
365  TEUCHOS_TEST_FOR_EXCEPTION(
366  true, std::runtime_error, "Tpetra::CrsMatrix constructor: Caught "
367  "exception while allocating CrsGraph: " << e.what ());
368  }
369  staticGraph_ = myGraph_;
370  // FIXME (mfh 05 Aug 2014) It should be possible to convince the
371  // ArrayRCP to relinquish its allocation, but that might require
372  // passing the ArrayRCP in by nonconst reference.
373  Teuchos::ArrayRCP<impl_scalar_type> vals =
374  Teuchos::arcp_reinterpret_cast<impl_scalar_type> (values);
375  k_values1D_ = Kokkos::Compat::getKokkosViewDeepCopy<device_type> (vals ());
376  resumeFill (params);
378  }
379 
380  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
382  CrsMatrix (const Teuchos::RCP<const map_type>& rowMap,
383  const Teuchos::RCP<const map_type>& colMap,
384  const local_matrix_type& lclMatrix,
385  const Teuchos::RCP<Teuchos::ParameterList>& params) :
386  dist_object_type (rowMap),
387  lclMatrix_ (lclMatrix),
388  storageStatus_ (Details::STORAGE_1D_PACKED),
389  fillComplete_ (false),
390  frobNorm_ (-STM::one ())
391  {
392  using Teuchos::ArrayRCP;
393  using Teuchos::arcp;
394  using Teuchos::rcp;
395  using Teuchos::RCP;
396  const char tfecfFuncName[] = "Tpetra::CrsMatrix(rowMap,colMap,lclMatrix,params): ";
397 
398  try {
399  myGraph_ = rcp (new Graph (rowMap, colMap, lclMatrix.graph, params));
400  }
401  catch (std::exception &e) {
402  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
403  true, std::runtime_error, "Caught exception while allocating "
404  "CrsGraph: " << e.what ());
405  }
406  staticGraph_ = myGraph_;
408 
409  k_values1D_ = lclMatrix_.values;
410 
411  // FIXME (mfh 28 Aug 2014) "Preserve Local Graph" bool parameter no longer used.
412 
413  // Now we're fill complete!
414  fillComplete_ = true;
415 
416  // Sanity checks at the end.
417 #ifdef HAVE_TPETRA_DEBUG
418  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(isFillActive (), std::logic_error,
419  "We're at the end of fillComplete(), but isFillActive() is true. "
420  "Please report this bug to the Tpetra developers.");
421  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(! isFillComplete (), std::logic_error,
422  "We're at the end of fillComplete(), but isFillComplete() is false. "
423  "Please report this bug to the Tpetra developers.");
424 #endif // HAVE_TPETRA_DEBUG
426  }
427 
428  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
431  {}
432 
433  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
434  Teuchos::RCP<const Teuchos::Comm<int> >
436  getComm () const {
437  return getCrsGraph ()->getComm ();
438  }
439 
440  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
441  Teuchos::RCP<Node>
443  getNode () const {
444  return getCrsGraph ()->getNode ();
445  }
446 
447  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
450  getProfileType () const {
451  return getCrsGraph ()->getProfileType ();
452  }
453 
454  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
455  bool
457  isFillComplete () const {
458  return fillComplete_;
459  }
460 
461  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
462  bool
464  isFillActive () const {
465  return ! fillComplete_;
466  }
467 
468  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
469  bool
472  return getCrsGraph()->isStorageOptimized();
473  }
474 
475  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
476  bool
479  return getCrsGraph ()->isLocallyIndexed ();
480  }
481 
482  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
483  bool
486  return getCrsGraph ()->isGloballyIndexed ();
487  }
488 
489  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
490  bool
492  hasColMap () const {
493  return getCrsGraph ()->hasColMap ();
494  }
495 
496  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
500  return getCrsGraph ()->getGlobalNumEntries ();
501  }
502 
503  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
504  size_t
507  return getCrsGraph ()->getNodeNumEntries ();
508  }
509 
510  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
514  return getCrsGraph ()->getGlobalNumRows ();
515  }
516 
517  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
521  return getCrsGraph ()->getGlobalNumCols ();
522  }
523 
524  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
525  size_t
527  getNodeNumRows () const {
528  return getCrsGraph ()->getNodeNumRows ();
529  }
530 
531  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
532  size_t
534  getNodeNumCols () const {
535  return getCrsGraph ()->getNodeNumCols ();
536  }
537 
538  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
542  return getCrsGraph ()->getGlobalNumDiags ();
543  }
544 
545  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
546  size_t
549  return getCrsGraph ()->getNodeNumDiags ();
550  }
551 
552  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
553  size_t
555  getNumEntriesInGlobalRow (GlobalOrdinal globalRow) const {
556  return getCrsGraph ()->getNumEntriesInGlobalRow (globalRow);
557  }
558 
559  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
560  size_t
562  getNumEntriesInLocalRow (LocalOrdinal localRow) const {
563  return getCrsGraph ()->getNumEntriesInLocalRow (localRow);
564  }
565 
566  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
567  size_t
570  return getCrsGraph ()->getGlobalMaxNumRowEntries ();
571  }
572 
573  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
574  size_t
577  return getCrsGraph ()->getNodeMaxNumRowEntries ();
578  }
579 
580  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
581  GlobalOrdinal
583  getIndexBase () const {
584  return getRowMap ()->getIndexBase ();
585  }
586 
587  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
588  Teuchos::RCP<const Map<LocalOrdinal, GlobalOrdinal, Node> >
590  getRowMap () const {
591  return getCrsGraph ()->getRowMap ();
592  }
593 
594  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
595  Teuchos::RCP<const Map<LocalOrdinal, GlobalOrdinal, Node> >
597  getColMap () const {
598  return getCrsGraph ()->getColMap ();
599  }
600 
601  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
602  Teuchos::RCP<const Map<LocalOrdinal, GlobalOrdinal, Node> >
604  getDomainMap () const {
605  return getCrsGraph ()->getDomainMap ();
606  }
607 
608  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
609  Teuchos::RCP<const Map<LocalOrdinal, GlobalOrdinal, Node> >
611  getRangeMap () const {
612  return getCrsGraph()->getRangeMap();
613  }
614 
615  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
616  Teuchos::RCP<const RowGraph<LocalOrdinal, GlobalOrdinal, Node> >
618  getGraph () const {
619  if (staticGraph_ != Teuchos::null) {
620  return staticGraph_;
621  }
622  return myGraph_;
623  }
624 
625  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
626  Teuchos::RCP<const CrsGraph<LocalOrdinal, GlobalOrdinal, Node, classic> >
628  getCrsGraph () const {
629  if (staticGraph_ != Teuchos::null) {
630  return staticGraph_;
631  }
632  return myGraph_;
633  }
634 
635  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
636  bool
639  return getCrsGraph ()->isLowerTriangular ();
640  }
641 
642  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
643  bool
646  return getCrsGraph ()->isUpperTriangular ();
647  }
648 
649  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
650  bool
652  isStaticGraph () const {
653  return myGraph_.is_null ();
654  }
655 
656  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
657  bool
660  return true;
661  }
662 
663  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
664  bool
667  return true;
668  }
669 
670  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
671  void
673  allocateValues (ELocalGlobal lg, GraphAllocationStatus gas)
674  {
675 #ifdef HAVE_TPETRA_DEBUG
676  // If the graph indices are already allocated, then gas should be
677  // GraphAlreadyAllocated. Otherwise, gas should be
678  // GraphNotYetAllocated.
679  if ((gas == GraphAlreadyAllocated) != staticGraph_->indicesAreAllocated()) {
680  const std::string err1 ("allocateValues: The caller has asserted that "
681  "the graph is ");
682  const std::string err2 ("already allocated, but the static graph says "
683  "that its indices are ");
684  const std::string err3 ("already allocated. Please report this bug to "
685  "the Tpetra developers.");
686  TEUCHOS_TEST_FOR_EXCEPTION(gas == GraphAlreadyAllocated && ! staticGraph_->indicesAreAllocated(),
687  std::logic_error, err1 << err2 << "not " << err3);
688  TEUCHOS_TEST_FOR_EXCEPTION(gas != GraphAlreadyAllocated && staticGraph_->indicesAreAllocated(),
689  std::logic_error, err1 << "not " << err2 << err3);
690  }
691 
692  // If the graph is unallocated, then it had better be a
693  // matrix-owned graph. ("Matrix-owned graph" means that the
694  // matrix gets to define the graph structure. If the CrsMatrix
695  // constructor that takes an RCP<const CrsGraph> was used, then
696  // the matrix does _not_ own the graph.)
697  TEUCHOS_TEST_FOR_EXCEPTION(
698  ! staticGraph_->indicesAreAllocated() && myGraph_.is_null(),
699  std::logic_error,
700  "allocateValues: The static graph says that its indices are not "
701  "allocated, but the graph is not owned by the matrix. Please report "
702  "this bug to the Tpetra developers.");
703 #endif // HAVE_TPETRA_DEBUG
704 
705  if (gas == GraphNotYetAllocated) {
706  myGraph_->allocateIndices (lg);
707  }
708 
709  // Allocate matrix values.
710  if (getProfileType () == StaticProfile) {
711  // "Static profile" means that the number of matrix entries in
712  // each row was fixed at the time the CrsMatrix constructor was
713  // called. This lets us use 1-D storage for the matrix's
714  // values. ("1-D storage" means the same as that used by the
715  // three arrays in the classic compressed sparse row format.)
716 
717  const size_t lclNumRows = staticGraph_->getNodeNumRows ();
718  typename Graph::local_graph_type::row_map_type k_ptrs =
719  staticGraph_->k_rowPtrs_;
720  TEUCHOS_TEST_FOR_EXCEPTION(
721  k_ptrs.dimension_0 () != lclNumRows+1, std::logic_error,
722  "Tpetra::CrsMatrix::allocateValues: With StaticProfile, row offsets "
723  "array has length " << k_ptrs.dimension_0 () << " != (lclNumRows+1) = "
724  << (lclNumRows+1) << ".");
725  // FIXME (mfh 08 Aug 2014) This assumes UVM. We could fix this
726  // either by storing the row offsets in the graph as a DualView,
727  // or by making a device View of that entry, and copying it back
728  // to host.
729  const size_t lclTotalNumEntries = k_ptrs(lclNumRows);
730 
731  // Allocate array of (packed???) matrix values.
732  typedef typename local_matrix_type::values_type values_type;
733  k_values1D_ = values_type ("Tpetra::CrsMatrix::val", lclTotalNumEntries);
734  }
735  else {
736  // "Dynamic profile" means the number of matrix entries in each
737  // row is not fixed and may expand. Thus, we store the matrix's
738  // values in "2-D storage," meaning an array of arrays. The
739  // outer array has as many inner arrays as there are rows in the
740  // matrix, and each inner array stores the values in that row.
741  values2D_ = staticGraph_->template allocateValues2D<impl_scalar_type> ();
742  }
743  }
744 
745  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
746  void
748  getAllValues (Teuchos::ArrayRCP<const size_t>& rowPointers,
749  Teuchos::ArrayRCP<const LocalOrdinal>& columnIndices,
750  Teuchos::ArrayRCP<const Scalar>& values) const
751  {
752  const char tfecfFuncName[] = "getAllValues: ";
753  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
754  columnIndices.size () != values.size (), std::runtime_error,
755  "Requires that columnIndices and values are the same size.");
756 
757  RCP<const crs_graph_type> relevantGraph = getCrsGraph ();
758  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
759  relevantGraph.is_null (), std::runtime_error,
760  "Requires that getCrsGraph() is not null.");
761  try {
762  rowPointers = relevantGraph->getNodeRowPtrs ();
763  }
764  catch (std::exception &e) {
765  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
766  true, std::runtime_error,
767  "Caught exception while calling graph->getNodeRowPtrs(): "
768  << e.what ());
769  }
770  try {
771  columnIndices = relevantGraph->getNodePackedIndices ();
772  }
773  catch (std::exception &e) {
774  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
775  true, std::runtime_error,
776  "Caught exception while calling graph->getNodePackedIndices(): "
777  << e.what ());
778  }
779  Teuchos::ArrayRCP<const impl_scalar_type> vals =
780  Kokkos::Compat::persistingView (k_values1D_);
781  values = Teuchos::arcp_reinterpret_cast<const Scalar> (vals);
782  }
783 
784  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
785  void
787  fillLocalGraphAndMatrix (const Teuchos::RCP<Teuchos::ParameterList>& params)
788  {
789  using Kokkos::create_mirror_view;
790  using Teuchos::arcp_const_cast;
791  using Teuchos::ArrayRCP;
792  using Teuchos::null;
793  using Teuchos::RCP;
794  using Teuchos::rcp;
795  typedef ArrayRCP<size_t>::size_type size_type;
796  typedef typename local_matrix_type::row_map_type row_map_type;
797  typedef typename Graph::t_numRowEntries_ row_entries_type;
798  typedef typename Graph::local_graph_type::entries_type::non_const_type lclinds_1d_type;
799  typedef typename local_matrix_type::values_type values_type;
800 
801  // fillComplete() only calls fillLocalGraphAndMatrix() if the
802  // matrix owns the graph, which means myGraph_ is not null.
803  TEUCHOS_TEST_FOR_EXCEPTION(
804  myGraph_.is_null (), std::logic_error, "Tpetra::CrsMatrix::"
805  "fillLocalGraphAndMatrix (called from fillComplete or "
806  "expertStaticFillComplete): The nonconst graph (myGraph_) is null. This "
807  "means that the matrix has a const (a.k.a. \"static\") graph. This may "
808  "mean that fillComplete or expertStaticFillComplete has a bug, since it "
809  "should never call fillLocalGraphAndMatrix in that case. "
810  "Please report this bug to the Tpetra developers.");
811 
812  const size_t lclNumRows = this->getNodeNumRows ();
813 
814  // This method's goal is to fill in the three arrays (compressed
815  // sparse row format) that define the sparse graph's and matrix's
816  // structure, and the sparse matrix's values.
817  //
818  // Use the nonconst version of row_map_type for k_ptrs,
819  // because row_map_type is const and we need to modify k_ptrs here.
820  typename row_map_type::non_const_type k_ptrs;
821  row_map_type k_ptrs_const;
822  lclinds_1d_type k_inds;
823  values_type k_vals;
824 
825  // Get references to the data in myGraph_, so we can modify them
826  // as well. Note that we only call fillLocalGraphAndMatrix() if
827  // the matrix owns the graph, which means myGraph_ is not null.
828  lclinds_1d_type k_lclInds1D_ = myGraph_->k_lclInds1D_;
829 
830  // The number of entries in each locally owned row. This is a
831  // DualView. 2-D storage lives on host and is currently not
832  // thread-safe for parallel kernels even on host, so we have to
833  // work sequentially with host storage in that case.
834  row_entries_type k_numRowEnt = myGraph_->k_numRowEntries_;
835  typename row_entries_type::t_host h_numRowEnt = k_numRowEnt.h_view;
836 
837  if (getProfileType () == DynamicProfile) {
838  // Pack 2-D storage (DynamicProfile) into 1-D packed storage.
839  //
840  // DynamicProfile means that the matrix's column indices and
841  // values are currently stored in a 2-D "unpacked" format, in
842  // the arrays-of-arrays myGraph_->lclInds2D_ (for column
843  // indices) and values2D_ (for values). We allocate 1-D storage
844  // (k_inds resp. k_vals), and then copy from 2-D storage
845  // (lclInds2D_ resp. values2D_) into 1-D storage (k_inds
846  // resp. k_vals).
847  TEUCHOS_TEST_FOR_EXCEPTION(
848  static_cast<size_t> (k_numRowEnt.dimension_0 ()) != lclNumRows,
849  std::logic_error, "Tpetra::CrsMatrix::fillLocalGraphAndMatrix (called "
850  "from fillComplete or expertStaticFillComplete): For the "
851  "DynamicProfile branch, k_numRowEnt has the wrong length. "
852  "k_numRowEnt.dimension_0() = " << k_numRowEnt.dimension_0 ()
853  << " != getNodeNumRows() = " << lclNumRows << "");
854 
855  // Pack the row offsets into k_ptrs, by doing a sum-scan of
856  // the array of valid entry counts per row (h_numRowEnt).
857  //
858  // Total number of entries in the matrix on the calling
859  // process. We will compute this in the loop below. It's
860  // cheap to compute and useful as a sanity check.
861  size_t lclTotalNumEntries = 0;
862  // This will be a host view of packed row offsets.
863  typename row_map_type::non_const_type::HostMirror h_ptrs;
864  {
865  // Allocate the packed row offsets array. We use a nonconst
866  // temporary (packedRowOffsets) here, because k_ptrs is const.
867  // We will assign packedRowOffsets to k_ptrs below.
868  typename row_map_type::non_const_type packedRowOffsets ("Tpetra::CrsGraph::ptr",
869  lclNumRows+1);
870  //
871  // FIXME hack until we get parallel_scan in kokkos
872  //
873  h_ptrs = create_mirror_view (packedRowOffsets);
874  h_ptrs(0) = 0;
875  for (size_type i = 0; i < static_cast<size_type> (lclNumRows); ++i) {
876  const size_t numEnt = h_numRowEnt(i);
877  lclTotalNumEntries += numEnt;
878  h_ptrs(i+1) = h_ptrs(i) + numEnt;
879  }
880  Kokkos::deep_copy (packedRowOffsets, h_ptrs);
881  // packedRowOffsets is modifiable; k_ptrs isn't, so we have to
882  // use packedRowOffsets in the loop above and assign here.
883  k_ptrs = packedRowOffsets;
884  k_ptrs_const = k_ptrs;
885  }
886 
887  TEUCHOS_TEST_FOR_EXCEPTION(
888  static_cast<size_t> (k_ptrs.dimension_0 ()) != lclNumRows + 1,
889  std::logic_error, "Tpetra::CrsMatrix::fillLocalGraphAndMatrix: In "
890  "DynamicProfile branch, after packing k_ptrs, k_ptrs.dimension_0()"
891  " = " << k_ptrs.dimension_0 () << " != (lclNumRows+1) = "
892  << (lclNumRows+1) << ".");
893  TEUCHOS_TEST_FOR_EXCEPTION(
894  static_cast<size_t> (h_ptrs.dimension_0 ()) != lclNumRows + 1,
895  std::logic_error, "Tpetra::CrsMatrix::fillLocalGraphAndMatrix: In "
896  "DynamicProfile branch, after packing h_ptrs, h_ptrs.dimension_0()"
897  " = " << h_ptrs.dimension_0 () << " != (lclNumRows+1) = "
898  << (lclNumRows+1) << ".");
899  // FIXME (mfh 08 Aug 2014) This assumes UVM.
900  TEUCHOS_TEST_FOR_EXCEPTION(
901  k_ptrs(lclNumRows) != lclTotalNumEntries, std::logic_error,
902  "Tpetra::CrsMatrix::fillLocalGraphAndMatrix: In DynamicProfile branch, "
903  "after packing k_ptrs, k_ptrs(lclNumRows = " << lclNumRows << ") = " <<
904  k_ptrs(lclNumRows) << " != total number of entries on the calling "
905  "process = " << lclTotalNumEntries << ".");
906 
907  // Allocate the arrays of packed column indices and values.
908  k_inds = lclinds_1d_type ("Tpetra::CrsGraph::ind", lclTotalNumEntries);
909  k_vals = values_type ("Tpetra::CrsMatrix::val", lclTotalNumEntries);
910 
911  // We need host views of the above, since 2-D storage lives on host.
912  typename lclinds_1d_type::HostMirror h_inds = create_mirror_view (k_inds);
913  typename values_type::HostMirror h_vals = create_mirror_view (k_vals);
914 
915  // Pack the column indices and values on the host.
916  ArrayRCP<Array<LocalOrdinal> > lclInds2D = myGraph_->lclInds2D_;
917  for (size_t row = 0; row < lclNumRows; ++row) {
918  const size_t numEnt = h_numRowEnt(row);
919  std::copy (lclInds2D[row].begin(),
920  lclInds2D[row].begin() + numEnt,
921  h_inds.ptr_on_device() + h_ptrs(row));
922  std::copy (values2D_[row].begin(),
923  values2D_[row].begin() + numEnt,
924  h_vals.ptr_on_device() + h_ptrs(row));
925  }
926  // Copy the packed column indices and values to the device.
927  Kokkos::deep_copy (k_inds, h_inds);
928  Kokkos::deep_copy (k_vals, h_vals);
929 
930  // Sanity check of packed row offsets.
931  if (k_ptrs.dimension_0 () != 0) {
932  const size_t numOffsets = static_cast<size_t> (k_ptrs.dimension_0 ());
933  TEUCHOS_TEST_FOR_EXCEPTION(
934  static_cast<size_t> (k_ptrs(numOffsets-1)) != k_vals.dimension_0 (),
935  std::logic_error, "Tpetra::CrsMatrix::fillLocalGraphAndMatrix: "
936  "In DynamicProfile branch, after packing, k_ptrs(" << (numOffsets-1)
937  << ") = " << k_ptrs(numOffsets-1) << " != k_vals.dimension_0() = "
938  << k_vals.dimension_0 () << ".");
939  TEUCHOS_TEST_FOR_EXCEPTION(
940  static_cast<size_t> (k_ptrs(numOffsets-1)) != k_inds.dimension_0 (),
941  std::logic_error, "Tpetra::CrsMatrix::fillLocalGraphAndMatrix: "
942  "In DynamicProfile branch, after packing, k_ptrs(" << (numOffsets-1)
943  << ") = " << k_ptrs(numOffsets-1) << " != k_inds.dimension_0() = "
944  << k_inds.dimension_0 () << ".");
945  }
946  }
947  else if (getProfileType () == StaticProfile) {
948  // StaticProfile means that the matrix's column indices and
949  // values are currently stored in a 1-D format, with row offsets
950  // in k_rowPtrs_ and local column indices in k_lclInds1D_.
951 
952  // StaticProfile also means that the graph's array of row
953  // offsets must already be allocated.
954  typename Graph::local_graph_type::row_map_type curRowOffsets =
955  myGraph_->k_rowPtrs_;
956  TEUCHOS_TEST_FOR_EXCEPTION(
957  curRowOffsets.dimension_0 () == 0, std::logic_error,
958  "curRowOffsets has size zero, but shouldn't");
959  TEUCHOS_TEST_FOR_EXCEPTION(
960  curRowOffsets.dimension_0 () != lclNumRows + 1, std::logic_error,
961  "Tpetra::CrsMatrix::fillLocalGraphAndMatrix: curRowOffsets has size "
962  << curRowOffsets.dimension_0 () << " != lclNumRows + 1 = "
963  << (lclNumRows + 1) << ".")
964  {
965  const size_t numOffsets = curRowOffsets.dimension_0 ();
966  // FIXME (mfh 06 Aug 2014) This relies on UVM.
967  TEUCHOS_TEST_FOR_EXCEPTION(
968  numOffsets != 0 &&
969  myGraph_->k_lclInds1D_.dimension_0 () != curRowOffsets(numOffsets - 1),
970  std::logic_error, "Tpetra::CrsMatrix::fillLocalGraphAndMatrix: "
971  "numOffsets = " << numOffsets << " != 0 and "
972  "myGraph_->k_lclInds1D_.dimension_0() = "
973  << myGraph_->k_lclInds1D_.dimension_0 ()
974  << " != curRowOffsets(" << numOffsets << ") = "
975  << curRowOffsets(numOffsets - 1) << ".");
976  }
977 
978  if (myGraph_->nodeNumEntries_ != myGraph_->nodeNumAllocated_) {
979  // The matrix's current 1-D storage is "unpacked." This means
980  // the row offsets may differ from what the final row offsets
981  // should be. This could happen, for example, if the user
982  // specified StaticProfile in the constructor and set an upper
983  // bound on the number of entries per row, but didn't fill all
984  // those entries.
985  TEUCHOS_TEST_FOR_EXCEPTION(
986  static_cast<size_t> (k_numRowEnt.dimension_0 ()) != lclNumRows,
987  std::logic_error, "Tpetra::CrsMatrix::fillLocalGraphAndMatrix (called"
988  " from fillComplete or expertStaticFillComplete): In StaticProfile "
989  "unpacked branch, k_numRowEnt has the wrong length. "
990  "k_numRowEnt.dimension_0() = " << k_numRowEnt.dimension_0 ()
991  << " != getNodeNumRows() = " << lclNumRows << ".");
992 
993  if (curRowOffsets.dimension_0 () != 0) {
994  const size_t numOffsets =
995  static_cast<size_t> (curRowOffsets.dimension_0 ());
996  TEUCHOS_TEST_FOR_EXCEPTION(
997  curRowOffsets(numOffsets-1) != static_cast<size_t> (k_values1D_.dimension_0 ()),
998  std::logic_error, "Tpetra::CrsMatrix::fillLocalGraphAndMatrix: "
999  "In StaticProfile branch, before allocating or packing, "
1000  "curRowOffsets(" << (numOffsets-1) << ") = "
1001  << curRowOffsets(numOffsets - 1)
1002  << " != k_values1D_.dimension_0() = "
1003  << k_values1D_.dimension_0 () << ".");
1004  TEUCHOS_TEST_FOR_EXCEPTION(
1005  static_cast<size_t> (curRowOffsets(numOffsets - 1)) !=
1006  myGraph_->k_lclInds1D_.dimension_0 (),
1007  std::logic_error, "Tpetra::CrsMatrix::fillLocalGraphAndMatrix: "
1008  "In StaticProfile branch, before allocating or packing, "
1009  "curRowOffsets(" << (numOffsets-1) << ") = "
1010  << curRowOffsets(numOffsets - 1)
1011  << " != myGraph_->k_lclInds1D_.dimension_0() = "
1012  << myGraph_->k_lclInds1D_.dimension_0 () << ".");
1013  }
1014 
1015  // Pack the row offsets into k_ptrs, by doing a sum-scan of
1016  // the array of valid entry counts per row (h_numRowEnt).
1017 
1018  // Total number of entries in the matrix on the calling
1019  // process. We will compute this in the loop below. It's
1020  // cheap to compute and useful as a sanity check.
1021  size_t lclTotalNumEntries = 0;
1022  // This will be a host view of packed row offsets.
1023  typename row_map_type::non_const_type::HostMirror h_ptrs;
1024  {
1025  // Allocate the packed row offsets array. We use a nonconst
1026  // temporary (packedRowOffsets) here, because k_ptrs is
1027  // const. We will assign packedRowOffsets to k_ptrs below.
1028  typename row_map_type::non_const_type packedRowOffsets ("Tpetra::CrsGraph::ptr",
1029  lclNumRows+1);
1030  //
1031  // FIXME hack until we get parallel_scan in Kokkos
1032  //
1033  // Unlike in the 2-D storage case above, we don't need the
1034  // host view of the packed row offsets array after packing
1035  // the row offsets.
1036  h_ptrs = create_mirror_view (packedRowOffsets);
1037  h_ptrs(0) = 0;
1038  for (size_type i = 0; i < static_cast<size_type> (lclNumRows); ++i) {
1039  const size_t numEnt = h_numRowEnt(i);
1040  lclTotalNumEntries += numEnt;
1041  h_ptrs(i+1) = h_ptrs(i) + numEnt;
1042  }
1043  Kokkos::deep_copy (packedRowOffsets, h_ptrs);
1044  // packedRowOffsets is modifiable; k_ptrs isn't, so we have
1045  // to use packedRowOffsets in the loop above and assign here.
1046  k_ptrs = packedRowOffsets;
1047  k_ptrs_const = k_ptrs;
1048  }
1049 
1050  TEUCHOS_TEST_FOR_EXCEPTION(
1051  static_cast<size_t> (k_ptrs.dimension_0 ()) != lclNumRows + 1,
1052  std::logic_error, "Tpetra::CrsMatrix::fillLocalGraphAndMatrix: For "
1053  "the StaticProfile unpacked-but-pack branch, after packing k_ptrs, "
1054  "k_ptrs.dimension_0() = " << k_ptrs.dimension_0 () << " != "
1055  "lclNumRows+1 = " << (lclNumRows+1) << ".");
1056  // FIXME (mfh 06 Aug 2014) This assumes UVM.
1057  TEUCHOS_TEST_FOR_EXCEPTION(
1058  k_ptrs(lclNumRows) != lclTotalNumEntries, std::logic_error,
1059  "Tpetra::CrsMatrix::fillLocalGraphAndMatrix: In StaticProfile "
1060  "unpacked-but-pack branch, after filling k_ptrs, k_ptrs(lclNumRows="
1061  << lclNumRows << ") = " << k_ptrs(lclNumRows) << " != total number "
1062  "of entries on the calling process = " << lclTotalNumEntries << ".");
1063 
1064  // Allocate the arrays of packed column indices and values.
1065  k_inds = lclinds_1d_type ("Tpetra::CrsGraph::ind", lclTotalNumEntries);
1066  k_vals = values_type ("Tpetra::CrsMatrix::val", lclTotalNumEntries);
1067 
1068  // curRowOffsets (myGraph_->k_rowPtrs_) (???), k_lclInds1D_,
1069  // and k_values1D_ are currently unpacked. Pack them, using
1070  // the packed row offsets array k_ptrs that we created above.
1071  //
1072  // FIXME (mfh 06 Aug 2014) If "Optimize Storage" is false, we
1073  // need to keep around the unpacked row offsets, column
1074  // indices, and values arrays.
1075 
1076  // Pack the column indices from unpacked k_lclInds1D_ into
1077  // packed k_inds. We will replace k_lclInds1D_ below.
1078  typedef pack_functor<typename Graph::local_graph_type::entries_type::non_const_type,
1079  typename Graph::local_graph_type::row_map_type>
1080  inds_packer_type;
1081  inds_packer_type indsPacker (k_inds, myGraph_->k_lclInds1D_,
1082  k_ptrs, curRowOffsets);
1083  Kokkos::parallel_for (lclNumRows, indsPacker);
1084 
1085  // Pack the values from unpacked k_values1D_ into packed
1086  // k_vals. We will replace k_values1D_ below.
1087  typedef pack_functor<values_type, row_map_type> vals_packer_type;
1088  vals_packer_type valsPacker (k_vals, this->k_values1D_,
1089  k_ptrs, curRowOffsets);
1090  Kokkos::parallel_for (lclNumRows, valsPacker);
1091 
1092  TEUCHOS_TEST_FOR_EXCEPTION(
1093  k_ptrs.dimension_0 () == 0, std::logic_error, "Tpetra::CrsMatrix::"
1094  "fillLocalGraphAndMatrix: In StaticProfile \"Optimize Storage\" = "
1095  "true branch, after packing, k_ptrs.dimension_0() = 0. This "
1096  "probably means that k_rowPtrs_ was never allocated.");
1097  if (k_ptrs.dimension_0 () != 0) {
1098  const size_t numOffsets = static_cast<size_t> (k_ptrs.dimension_0 ());
1099  TEUCHOS_TEST_FOR_EXCEPTION(
1100  static_cast<size_t> (k_ptrs(numOffsets - 1)) != k_vals.dimension_0 (),
1101  std::logic_error, "Tpetra::CrsMatrix::fillLocalGraphAndMatrix: "
1102  "In StaticProfile \"Optimize Storage\"=true branch, after packing, "
1103  "k_ptrs(" << (numOffsets-1) << ") = " << k_ptrs(numOffsets-1) <<
1104  " != k_vals.dimension_0() = " << k_vals.dimension_0 () << ".");
1105  TEUCHOS_TEST_FOR_EXCEPTION(
1106  static_cast<size_t> (k_ptrs(numOffsets - 1)) != k_inds.dimension_0 (),
1107  std::logic_error, "Tpetra::CrsMatrix::fillLocalGraphAndMatrix: "
1108  "In StaticProfile \"Optimize Storage\"=true branch, after packing, "
1109  "k_ptrs(" << (numOffsets-1) << ") = " << k_ptrs(numOffsets-1) <<
1110  " != k_inds.dimension_0() = " << k_inds.dimension_0 () << ".");
1111  }
1112  }
1113  else { // We don't have to pack, so just set the pointers.
1114  k_ptrs_const = myGraph_->k_rowPtrs_;
1115  k_inds = myGraph_->k_lclInds1D_;
1116  k_vals = this->k_values1D_;
1117 
1118  TEUCHOS_TEST_FOR_EXCEPTION(
1119  k_ptrs_const.dimension_0 () == 0, std::logic_error, "Tpetra::CrsMatrix::"
1120  "fillLocalGraphAndMatrix: In StaticProfile \"Optimize Storage\" = "
1121  "false branch, k_ptrs_const.dimension_0() = 0. This probably means that "
1122  "k_rowPtrs_ was never allocated.");
1123  if (k_ptrs_const.dimension_0 () != 0) {
1124  const size_t numOffsets = static_cast<size_t> (k_ptrs_const.dimension_0 ());
1125  TEUCHOS_TEST_FOR_EXCEPTION(
1126  static_cast<size_t> (k_ptrs_const(numOffsets - 1)) != k_vals.dimension_0 (),
1127  std::logic_error, "Tpetra::CrsMatrix::fillLocalGraphAndMatrix: "
1128  "In StaticProfile \"Optimize Storage\" = false branch, "
1129  "k_ptrs_const(" << (numOffsets-1) << ") = " << k_ptrs_const(numOffsets - 1)
1130  << " != k_vals.dimension_0() = " << k_vals.dimension_0 () << ".");
1131  TEUCHOS_TEST_FOR_EXCEPTION(
1132  static_cast<size_t> (k_ptrs_const(numOffsets - 1)) != k_inds.dimension_0 (),
1133  std::logic_error, "Tpetra::CrsMatrix::fillLocalGraphAndMatrix: "
1134  "In StaticProfile \"Optimize Storage\" = false branch, "
1135  "k_ptrs_const(" << (numOffsets-1) << ") = " << k_ptrs_const(numOffsets - 1)
1136  << " != k_inds.dimension_0() = " << k_inds.dimension_0 () << ".");
1137  }
1138  }
1139  }
1140 
1141  // Extra sanity checks.
1142  TEUCHOS_TEST_FOR_EXCEPTION(
1143  static_cast<size_t> (k_ptrs_const.dimension_0 ()) != lclNumRows + 1,
1144  std::logic_error, "Tpetra::CrsMatrix::fillLocalGraphAndMatrix: After "
1145  "packing, k_ptrs_const.dimension_0() = " << k_ptrs_const.dimension_0 ()
1146  << " != lclNumRows+1 = " << (lclNumRows+1) << ".");
1147  if (k_ptrs_const.dimension_0 () != 0) {
1148  const size_t numOffsets = static_cast<size_t> (k_ptrs_const.dimension_0 ());
1149  TEUCHOS_TEST_FOR_EXCEPTION(
1150  static_cast<size_t> (k_ptrs_const(numOffsets - 1)) != k_vals.dimension_0 (),
1151  std::logic_error, "Tpetra::CrsMatrix::fillLocalGraphAndMatrix: After "
1152  "packing, k_ptrs_const(" << (numOffsets-1) << ") = " << k_ptrs_const(numOffsets-1)
1153  << " != k_vals.dimension_0() = " << k_vals.dimension_0 () << ".");
1154  TEUCHOS_TEST_FOR_EXCEPTION(
1155  static_cast<size_t> (k_ptrs_const(numOffsets - 1)) != k_inds.dimension_0 (),
1156  std::logic_error, "Tpetra::CrsMatrix::fillLocalGraphAndMatrix: After "
1157  "packing, k_ptrs_const(" << (numOffsets-1) << ") = " << k_ptrs_const(numOffsets-1)
1158  << " != k_inds.dimension_0() = " << k_inds.dimension_0 () << ".");
1159  }
1160 
1161  // May we ditch the old allocations for the packed (and otherwise
1162  // "optimized") allocations, later in this routine? Optimize
1163  // storage if the graph is not static, or if the graph already has
1164  // optimized storage.
1165  const bool defaultOptStorage =
1166  ! isStaticGraph () || staticGraph_->isStorageOptimized ();
1167  const bool requestOptimizedStorage =
1168  (! params.is_null () && params->get ("Optimize Storage", defaultOptStorage)) ||
1169  (params.is_null () && defaultOptStorage);
1170 
1171  // The graph has optimized storage when indices are allocated,
1172  // myGraph_->k_numRowEntries_ is empty, and there are more than
1173  // zero rows on this process. It's impossible for the graph to
1174  // have dynamic profile (getProfileType() == DynamicProfile) and
1175  // be optimized (isStorageOptimized()).
1176  if (requestOptimizedStorage) {
1177  // Free the old, unpacked, unoptimized allocations.
1178  // Change the graph from dynamic to static allocation profile
1179 
1180  // Free graph data structures that are only needed for 2-D or
1181  // unpacked 1-D storage.
1182  myGraph_->lclInds2D_ = null; // legacy KokkosClassic 2-D storage
1183  myGraph_->k_numRowEntries_ = row_entries_type ();
1184 
1185  // Free the matrix's 2-D storage.
1186  this->values2D_ = null;
1187 
1188  // Keep the new 1-D packed allocations.
1189  myGraph_->k_rowPtrs_ = k_ptrs_const;
1190  myGraph_->k_lclInds1D_ = k_inds;
1191  this->k_values1D_ = k_vals;
1192 
1193  // Storage is packed now, so the number of allocated entries is
1194  // the same as the actual number of entries.
1195  myGraph_->nodeNumAllocated_ = myGraph_->nodeNumEntries_;
1196  // The graph is definitely StaticProfile now, whether or not it
1197  // was before.
1198  myGraph_->pftype_ = StaticProfile;
1199  myGraph_->storageStatus_ = Details::STORAGE_1D_PACKED;
1200  this->storageStatus_ = Details::STORAGE_1D_PACKED;
1201  }
1202 
1203  // Make the local graph, using the arrays of row offsets and
1204  // column indices that we built above. The local graph should be
1205  // null, but we delete it first so that any memory can be freed
1206  // before we allocate the new one.
1207  //
1208  // FIXME (mfh 06,28 Aug 2014) It would make more sense for
1209  // Tpetra::CrsGraph to have a protected method that accepts k_inds
1210  // and k_ptrs, and creates the local graph lclGraph_.
1211  myGraph_->lclGraph_ =
1212  typename Graph::local_graph_type (k_inds, k_ptrs_const);
1213 
1214  // Make the local matrix, using the local graph and vals array.
1215  lclMatrix_ = local_matrix_type ("Tpetra::CrsMatrix::lclMatrix_",
1216  getNodeNumCols (), k_vals,
1217  myGraph_->lclGraph_);
1218  }
1219 
1220  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
1221  void
1223  fillLocalMatrix (const Teuchos::RCP<Teuchos::ParameterList>& params)
1224  {
1225  using Kokkos::create_mirror_view;
1226  using Teuchos::ArrayRCP;
1227  using Teuchos::Array;
1228  using Teuchos::null;
1229  using Teuchos::RCP;
1230  using Teuchos::rcp;
1231  typedef LocalOrdinal LO;
1232  typedef typename Graph::t_numRowEntries_ row_entries_type;
1233  typedef typename Graph::local_graph_type::row_map_type row_map_type;
1234  typedef typename row_map_type::non_const_type non_const_row_map_type;
1235  typedef typename local_matrix_type::values_type values_type;
1236 
1237  const size_t lclNumRows = getNodeNumRows();
1238  const map_type& rowMap = * (getRowMap ());
1239  RCP<node_type> node = rowMap.getNode ();
1240 
1241  // The goals of this routine are first, to allocate and fill
1242  // packed 1-D storage (see below for an explanation) in the vals
1243  // array, and second, to give vals to the local matrix and
1244  // finalize the local matrix. We only need k_ptrs, the packed 1-D
1245  // row offsets, within the scope of this routine, since we're only
1246  // filling the local matrix here (use fillLocalGraphAndMatrix() to
1247  // fill both the graph and the matrix at the same time).
1248 
1249  // get data from staticGraph_
1250  ArrayRCP<Array<LO> > lclInds2D = staticGraph_->lclInds2D_;
1251  size_t nodeNumEntries = staticGraph_->nodeNumEntries_;
1252  size_t nodeNumAllocated = staticGraph_->nodeNumAllocated_;
1253  row_map_type k_rowPtrs_ = staticGraph_->lclGraph_.row_map;
1254 
1255  row_map_type k_ptrs; // "packed" row offsets array
1256  values_type k_vals; // "packed" values array
1257 
1258  // May we ditch the old allocations for the packed (and otherwise
1259  // "optimized") allocations, later in this routine? Request
1260  // optimized storage by default.
1261  bool requestOptimizedStorage = true;
1262  const bool default_OptimizeStorage =
1263  ! isStaticGraph () || staticGraph_->isStorageOptimized ();
1264  if (! params.is_null () && ! params->get ("Optimize Storage", default_OptimizeStorage)) {
1265  requestOptimizedStorage = false;
1266  }
1267  // If we're not allowed to change a static graph, then we can't
1268  // change the storage of the matrix, either. This means that if
1269  // the graph's storage isn't already optimized, we can't optimize
1270  // the matrix's storage either. Check and give warning, as
1271  // appropriate.
1272  if (! staticGraph_->isStorageOptimized () && requestOptimizedStorage) {
1273  TPETRA_ABUSE_WARNING(true, std::runtime_error,
1274  "::fillLocalMatrix(): You requested optimized storage by setting the"
1275  "\"Optimize Storage\" flag to \"true\" in the parameter list, or by virtue"
1276  "of default behavior. However, the associated CrsGraph was filled separately"
1277  "and requested not to optimize storage. Therefore, the CrsMatrix cannot"
1278  "optimize storage.");
1279  requestOptimizedStorage = false;
1280  }
1281 
1282  // The number of entries in each locally owned row. This is a
1283  // DualView. 2-D storage lives on host and is currently not
1284  // thread-safe for parallel kernels even on host, so we have to
1285  // work sequentially with host storage in that case.
1286  row_entries_type k_numRowEnt = staticGraph_->k_numRowEntries_;
1287  typename row_entries_type::t_host h_numRowEnt = k_numRowEnt.h_view;
1288 
1289  if (getProfileType() == DynamicProfile) {
1290  // Pack 2-D storage (DynamicProfile) into 1-D packed storage.
1291  //
1292  // DynamicProfile means that the matrix's values are currently
1293  // stored in a 2-D "unpacked" format, in the array-of-arrays
1294  // values2D_. We allocate 1-D storage and then copy from 2-D
1295  // storage in values2D_ into 1-D storage in k_vals. Since we're
1296  // only allocating the local matrix here, not the local graph,
1297  // we don't need to keep the row offsets array, but we do need
1298  // it here temporarily in order to convert to 1-D storage. (The
1299  // allocStorage() function needs it.) We'll free ptrs later in
1300  // this method.
1301  //
1302  // FIXME (mfh 08 Aug 2014) If we're in this method, then the
1303  // graph should already have packed 1-D storage. Why can't we
1304  // just use the graph's current row offsets array?
1305 
1306  // Pack the row offsets into k_ptrs, by doing a sum-scan of
1307  // the array of valid entry counts per row (h_numRowEnt).
1308  //
1309  // Total number of entries in the matrix on the calling
1310  // process. We will compute this in the loop below. It's
1311  // cheap to compute and useful as a sanity check.
1312  size_t lclTotalNumEntries = 0;
1313  // This will be a host view of packed row offsets.
1314  typename non_const_row_map_type::HostMirror h_ptrs;
1315  {
1316  non_const_row_map_type packedRowOffsets ("Tpetra::CrsGraph::ptr",
1317  lclNumRows+1);
1318  //
1319  // FIXME hack until we get parallel_scan in Kokkos
1320  //
1321  h_ptrs = create_mirror_view (packedRowOffsets);
1322  h_ptrs(0) = 0;
1323  for (size_t i = 0; i < lclNumRows; ++i) {
1324  const size_t numEnt = h_numRowEnt(i);
1325  lclTotalNumEntries += numEnt;
1326  h_ptrs(i+1) = h_ptrs(i) + numEnt;
1327  }
1328  Kokkos::deep_copy (packedRowOffsets, h_ptrs);
1329  k_ptrs = packedRowOffsets;
1330  }
1331 
1332  TEUCHOS_TEST_FOR_EXCEPTION(
1333  static_cast<size_t> (k_ptrs.dimension_0 ()) != lclNumRows + 1,
1334  std::logic_error, "Tpetra::CrsMatrix::fillLocalMatrix: In "
1335  "DynamicProfile branch, after packing k_ptrs, k_ptrs.dimension_0()"
1336  " = " << k_ptrs.dimension_0 () << " != (lclNumRows+1) = "
1337  << (lclNumRows+1) << ".");
1338  TEUCHOS_TEST_FOR_EXCEPTION(
1339  static_cast<size_t> (h_ptrs.dimension_0 ()) != lclNumRows + 1,
1340  std::logic_error, "Tpetra::CrsMatrix::fillLocalMatrix: In "
1341  "DynamicProfile branch, after packing h_ptrs, h_ptrs.dimension_0()"
1342  " = " << h_ptrs.dimension_0 () << " != (lclNumRows+1) = "
1343  << (lclNumRows+1) << ".");
1344  // FIXME (mfh 08 Aug 2014) This assumes UVM.
1345  TEUCHOS_TEST_FOR_EXCEPTION(
1346  k_ptrs(lclNumRows) != lclTotalNumEntries, std::logic_error,
1347  "Tpetra::CrsMatrix::fillLocalMatrix: In DynamicProfile branch, "
1348  "after packing k_ptrs, k_ptrs(lclNumRows = " << lclNumRows << ") = " <<
1349  k_ptrs(lclNumRows) << " != total number of entries on the calling "
1350  "process = " << lclTotalNumEntries << ".");
1351 
1352  // Allocate the array of packed values.
1353  k_vals = values_type ("Tpetra::CrsMatrix::val", lclTotalNumEntries);
1354  // We need a host view of the above, since 2-D storage lives on host.
1355  typename values_type::HostMirror h_vals = create_mirror_view (k_vals);
1356  // Pack the values on the host.
1357  for (size_t lclRow = 0; lclRow < lclNumRows; ++lclRow) {
1358  const size_t numEnt = h_numRowEnt(lclRow);
1359  std::copy (values2D_[lclRow].begin(),
1360  values2D_[lclRow].begin() + numEnt,
1361  h_vals.ptr_on_device() + h_ptrs(lclRow));
1362  }
1363  // Copy the packed values to the device.
1364  Kokkos::deep_copy (k_vals, h_vals);
1365 
1366  // Sanity check of packed row offsets.
1367  if (k_ptrs.dimension_0 () != 0) {
1368  const size_t numOffsets = static_cast<size_t> (k_ptrs.dimension_0 ());
1369  TEUCHOS_TEST_FOR_EXCEPTION(
1370  static_cast<size_t> (k_ptrs(numOffsets-1)) != k_vals.dimension_0 (),
1371  std::logic_error, "Tpetra::CrsMatrix::fillLocalMatrix: "
1372  "In DynamicProfile branch, after packing, k_ptrs(" << (numOffsets-1)
1373  << ") = " << k_ptrs(numOffsets-1) << " != k_vals.dimension_0() = "
1374  << k_vals.dimension_0 () << ".");
1375  }
1376  }
1377  else if (getProfileType () == StaticProfile) {
1378  // StaticProfile means that the matrix's values are currently
1379  // stored in a 1-D format. However, this format is "unpacked";
1380  // it doesn't necessarily have the same row offsets as indicated
1381  // by the ptrs array returned by allocRowPtrs. This could
1382  // happen, for example, if the user specified StaticProfile in
1383  // the constructor and fixed the number of matrix entries in
1384  // each row, but didn't fill all those entries.
1385  //
1386  // As above, we don't need to keep the "packed" row offsets
1387  // array ptrs here, but we do need it here temporarily, so we
1388  // have to allocate it. We'll free ptrs later in this method.
1389  //
1390  // Note that this routine checks whether storage has already
1391  // been packed. This is a common case for solution of nonlinear
1392  // PDEs using the finite element method, as long as the
1393  // structure of the sparse matrix does not change between linear
1394  // solves.
1395  if (nodeNumEntries != nodeNumAllocated) {
1396  // We have to pack the 1-D storage, since the user didn't fill
1397  // up all requested storage.
1398  non_const_row_map_type tmpk_ptrs ("Tpetra::CrsGraph::ptr",
1399  lclNumRows+1);
1400  // Total number of entries in the matrix on the calling
1401  // process. We will compute this in the loop below. It's
1402  // cheap to compute and useful as a sanity check.
1403  size_t lclTotalNumEntries = 0;
1404  k_ptrs = tmpk_ptrs;
1405  {
1406  //
1407  // FIXME hack until we get parallel_scan in Kokkos
1408  //
1409  typename non_const_row_map_type::HostMirror h_ptrs =
1410  create_mirror_view (tmpk_ptrs);
1411  h_ptrs(0) = 0;
1412  for (size_t i = 0; i < lclNumRows; ++i) {
1413  const size_t numEnt = h_numRowEnt(i);
1414  lclTotalNumEntries += numEnt;
1415  h_ptrs(i+1) = h_ptrs(i) + numEnt;
1416  }
1417  Kokkos::deep_copy (tmpk_ptrs, h_ptrs);
1418  }
1419 
1420  // Allocate the "packed" values array.
1421  // It has exactly the right number of entries.
1422  k_vals = values_type ("Tpetra::CrsMatrix::val", lclTotalNumEntries);
1423 
1424  // Pack k_values1D_ into k_vals. We will replace k_values1D_ below.
1425  typedef pack_functor<values_type, row_map_type> packer_type;
1426  packer_type valsPacker (k_vals, k_values1D_, tmpk_ptrs, k_rowPtrs_);
1427  Kokkos::parallel_for (lclNumRows, valsPacker);
1428  }
1429  else { // We don't have to pack, so just set the pointer.
1430  k_vals = k_values1D_;
1431  }
1432  }
1433 
1434  // May we ditch the old allocations for the packed one?
1435  if (requestOptimizedStorage) {
1436  // The user requested optimized storage, so we can dump the
1437  // unpacked 2-D and 1-D storage, and keep the packed storage.
1438  values2D_ = null;
1439  k_values1D_ = k_vals;
1440  this->storageStatus_ = Details::STORAGE_1D_PACKED;
1441  }
1442 
1443  // Build the local sparse matrix object. At this point, the local
1444  // matrix certainly has a column Map. Remember that the local
1445  // matrix's number of columns comes from the column Map, not the
1446  // domain Map.
1447  lclMatrix_ = local_matrix_type ("Tpetra::CrsMatrix::lclMatrix_",
1448  getColMap ()->getNodeNumElements (),
1449  k_vals,
1450  staticGraph_->getLocalGraph ());
1451  }
1452 
1453  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
1454  void
1456  insertLocalValues (const LocalOrdinal localRow,
1457  const Teuchos::ArrayView<const LocalOrdinal>& indices,
1458  const Teuchos::ArrayView<const Scalar>& values)
1459  {
1460  using Teuchos::Array;
1461  using Teuchos::ArrayView;
1462  using Teuchos::av_reinterpret_cast;
1463  using Teuchos::toString;
1464  using std::endl;
1465  const char tfecfFuncName[] = "insertLocalValues";
1466 
1467  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(! isFillActive (), std::runtime_error,
1468  ": Fill is not active. After calling fillComplete, you must call "
1469  "resumeFill before you may insert entries into the matrix again.");
1470  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(isStaticGraph (), std::runtime_error,
1471  " cannot insert indices with static graph; use replaceLocalValues() instead.");
1472  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(myGraph_->isGloballyIndexed(),
1473  std::runtime_error, ": graph indices are global; use insertGlobalValues().");
1474  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(! hasColMap (), std::runtime_error,
1475  " cannot insert local indices without a column map.");
1476  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(values.size() != indices.size(),
1477  std::runtime_error, ": values.size() must equal indices.size().");
1478  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
1479  ! getRowMap()->isNodeLocalElement(localRow), std::runtime_error,
1480  ": Local row index " << localRow << " does not belong to this process.");
1481 
1482  if (! myGraph_->indicesAreAllocated ()) {
1483  try {
1484  allocateValues (LocalIndices, GraphNotYetAllocated);
1485  }
1486  catch (std::exception& e) {
1487  TEUCHOS_TEST_FOR_EXCEPTION(
1488  true, std::runtime_error, "Tpetra::CrsMatrix::insertLocalValues: "
1489  "allocateValues(LocalIndices,GraphNotYetAllocated) threw an "
1490  "exception: " << e.what ());
1491  }
1492  }
1493 
1494  const size_t numEntriesToAdd = static_cast<size_t> (indices.size ());
1495 #ifdef HAVE_TPETRA_DEBUG
1496  // In a debug build, if the matrix has a column Map, test whether
1497  // any of the given column indices are not in the column Map.
1498  // Keep track of the invalid column indices so we can tell the
1499  // user about them.
1500  if (hasColMap ()) {
1501  const map_type& colMap = * (getColMap ());
1502  Array<LocalOrdinal> badColInds;
1503  bool allInColMap = true;
1504  for (size_t k = 0; k < numEntriesToAdd; ++k) {
1505  if (! colMap.isNodeLocalElement (indices[k])) {
1506  allInColMap = false;
1507  badColInds.push_back (indices[k]);
1508  }
1509  }
1510  if (! allInColMap) {
1511  std::ostringstream os;
1512  os << "Tpetra::CrsMatrix::insertLocalValues: You attempted to insert "
1513  "entries in owned row " << localRow << ", at the following column "
1514  "indices: " << toString (indices) << "." << endl;
1515  os << "Of those, the following indices are not in the column Map on "
1516  "this process: " << toString (badColInds) << "." << endl << "Since "
1517  "the matrix has a column Map already, it is invalid to insert "
1518  "entries at those locations.";
1519  TEUCHOS_TEST_FOR_EXCEPTION(! allInColMap, std::invalid_argument, os.str ());
1520  }
1521  }
1522 #endif // HAVE_TPETRA_DEBUG
1523 
1524 #ifdef HAVE_TPETRA_DEBUG
1525  RowInfo rowInfo;
1526  try {
1527  rowInfo = myGraph_->getRowInfo (localRow);
1528  } catch (std::exception& e) {
1529  TEUCHOS_TEST_FOR_EXCEPTION(
1530  true, std::runtime_error, "Tpetra::CrsMatrix::insertLocalValues: "
1531  "myGraph_->getRowInfo threw an exception: " << e.what ());
1532  }
1533 #else
1534  RowInfo rowInfo = myGraph_->getRowInfo (localRow);
1535 #endif // HAVE_TPETRA_DEBUG
1536 
1537  const size_t curNumEntries = rowInfo.numEntries;
1538  const size_t newNumEntries = curNumEntries + numEntriesToAdd;
1539  if (newNumEntries > rowInfo.allocSize) {
1540  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
1541  getProfileType() == StaticProfile, std::runtime_error,
1542  ": new indices exceed statically allocated graph structure.");
1543 
1544  // Make space for the new matrix entries.
1545  try {
1546  rowInfo = myGraph_->template updateLocalAllocAndValues<impl_scalar_type> (rowInfo,
1547  newNumEntries,
1548  values2D_[localRow]);
1549  } catch (std::exception& e) {
1550  TEUCHOS_TEST_FOR_EXCEPTION(
1551  true, std::runtime_error, "Tpetra::CrsMatrix::insertLocalValues: "
1552  "myGraph_->updateGlobalAllocAndValues threw an exception: "
1553  << e.what ());
1554  }
1555  }
1556  typename Graph::SLocalGlobalViews indsView;
1557  indsView.linds = indices;
1558 
1559 #ifdef HAVE_TPETRA_DEBUG
1560  ArrayView<impl_scalar_type> valsView;
1561  try {
1562  valsView = this->getViewNonConst (rowInfo);
1563  } catch (std::exception& e) {
1564  TEUCHOS_TEST_FOR_EXCEPTION(
1565  true, std::runtime_error, "Tpetra::CrsMatrix::insertLocalValues: "
1566  "getViewNonConst threw an exception: " << e.what ());
1567  }
1568 #else
1569  ArrayView<impl_scalar_type> valsView = this->getViewNonConst (rowInfo);
1570 #endif // HAVE_TPETRA_DEBUG
1571 
1572  ArrayView<const impl_scalar_type> valsIn =
1573  av_reinterpret_cast<const impl_scalar_type> (values);
1574  try {
1575  myGraph_->template insertIndicesAndValues<impl_scalar_type> (rowInfo, indsView,
1576  valsView, valsIn,
1577  LocalIndices,
1578  LocalIndices);
1579  } catch (std::exception& e) {
1580  TEUCHOS_TEST_FOR_EXCEPTION(
1581  true, std::runtime_error, "Tpetra::CrsMatrix::insertLocalValues: "
1582  "myGraph_->insertIndicesAndValues threw an exception: "
1583  << e.what ());
1584  }
1585 
1586 #ifdef HAVE_TPETRA_DEBUG
1587  const size_t chkNewNumEntries = myGraph_->getNumEntriesInLocalRow (localRow);
1588  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
1589  chkNewNumEntries != newNumEntries, std::logic_error,
1590  ": The row should have " << newNumEntries << " entries after insert, but "
1591  "instead has " << chkNewNumEntries << ". Please report this bug to the "
1592  "Tpetra developers.");
1593  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(! isLocallyIndexed(), std::logic_error,
1594  ": At end of insertLocalValues(), this CrsMatrix is not locally indexed. "
1595  "Please report this bug to the Tpetra developers.");
1596 #endif // HAVE_TPETRA_DEBUG
1597  }
1598 
1599  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
1600  void
1602  insertLocalValuesFiltered (const LocalOrdinal localRow,
1603  const Teuchos::ArrayView<const LocalOrdinal>& indices,
1604  const Teuchos::ArrayView<const Scalar>& values)
1605  {
1606  using Teuchos::Array;
1607  using Teuchos::ArrayView;
1608  using Teuchos::av_reinterpret_cast;
1609  const char tfecfFuncName[] = "insertLocalValues: ";
1610 
1611  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(! isFillActive (), std::runtime_error,
1612  "Requires that fill is active.");
1613  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(isStaticGraph (), std::runtime_error,
1614  "Cannot insert indices with static graph; use replaceLocalValues() instead.");
1615  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(myGraph_->isGloballyIndexed(),
1616  std::runtime_error, "Graph indices are global; use insertGlobalValues().");
1617  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
1618  ! hasColMap (), std::runtime_error, "The matrix has no column Map yet, "
1619  "so you cannot insert local indices. If you created the matrix without "
1620  "a column Map (or without a fill-complete graph), you must call "
1621  "fillComplete to create the column Map, before you may work with local "
1622  "indices.");
1623  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
1624  values.size () != indices.size (), std::runtime_error, "values.size() = "
1625  << values.size () << " != indices.size() = " << indices.size ()<< ".");
1626  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
1627  ! getRowMap()->isNodeLocalElement (localRow), std::runtime_error,
1628  "Local row index " << localRow << " does not belong to this process.");
1629  if (! myGraph_->indicesAreAllocated ()) {
1630  allocateValues (LocalIndices, GraphNotYetAllocated);
1631  }
1632  // Use the graph to filter incoming entries whose column indices
1633  // aren't in the column Map.
1634  Array<LocalOrdinal> f_inds (indices);
1635  ArrayView<const impl_scalar_type> valsIn =
1636  av_reinterpret_cast<const impl_scalar_type> (values);
1637  Array<impl_scalar_type> f_vals (valsIn);
1638  const size_t numFilteredEntries =
1639  myGraph_->template filterLocalIndicesAndValues<impl_scalar_type> (f_inds (),
1640  f_vals ());
1641  if (numFilteredEntries > 0) {
1642  RowInfo rowInfo = myGraph_->getRowInfo (localRow);
1643  const size_t curNumEntries = rowInfo.numEntries;
1644  const size_t newNumEntries = curNumEntries + numFilteredEntries;
1645  if (newNumEntries > rowInfo.allocSize) {
1646  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
1647  getProfileType () == StaticProfile, std::runtime_error,
1648  ": new indices exceed statically allocated graph structure. "
1649  "newNumEntries (" << newNumEntries << " > rowInfo.allocSize ("
1650  << rowInfo.allocSize << ").");
1651  // Make space for the new matrix entries.
1652  rowInfo =
1653  myGraph_->template updateLocalAllocAndValues<impl_scalar_type> (rowInfo,
1654  newNumEntries,
1655  values2D_[localRow]);
1656  }
1657  typename Graph::SLocalGlobalViews inds_view;
1658  inds_view.linds = f_inds (0, numFilteredEntries);
1659  myGraph_->template insertIndicesAndValues<impl_scalar_type> (rowInfo, inds_view,
1660  this->getViewNonConst (rowInfo),
1661  f_vals, LocalIndices,
1662  LocalIndices);
1663 #ifdef HAVE_TPETRA_DEBUG
1664  const size_t chkNewNumEntries = myGraph_->getNumEntriesInLocalRow (localRow);
1665  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(chkNewNumEntries != newNumEntries,
1666  std::logic_error, ": Internal logic error. Please contact Tpetra team.");
1667 #endif // HAVE_TPETRA_DEBUG
1668  }
1669 #ifdef HAVE_TPETRA_DEBUG
1670  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(! isLocallyIndexed(), std::logic_error,
1671  ": At end of insertLocalValues(), this CrsMatrix is not locally indexed. "
1672  "Please report this bug to the Tpetra developers.");
1673 #endif // HAVE_TPETRA_DEBUG
1674  }
1675 
1676 
1677  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
1678  void
1680  insertGlobalValues (const GlobalOrdinal globalRow,
1681  const Teuchos::ArrayView<const GlobalOrdinal>& indices,
1682  const Teuchos::ArrayView<const Scalar>& values)
1683  {
1684  using Teuchos::Array;
1685  using Teuchos::ArrayView;
1686  using Teuchos::av_reinterpret_cast;
1687  using Teuchos::toString;
1688  using std::endl;
1689  typedef LocalOrdinal LO;
1690  typedef GlobalOrdinal GO;
1691  typedef typename ArrayView<const GO>::size_type size_type;
1692  const char tfecfFuncName[] = "insertGlobalValues: ";
1693 
1694 #ifdef HAVE_TPETRA_DEBUG
1695  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
1696  values.size () != indices.size (), std::runtime_error,
1697  "values.size() = " << values.size() << " != indices.size() = "
1698  << indices.size() << ".");
1699 #endif // HAVE_TPETRA_DEBUG
1700 
1701  const LO localRow = getRowMap ()->getLocalElement (globalRow);
1702 
1703  if (localRow == OTL::invalid ()) { // globalRow _not_ owned by calling process
1704  insertNonownedGlobalValues (globalRow, indices, values);
1705  }
1706  else { // globalRow _is_ owned by calling process
1707  if (this->isStaticGraph ()) {
1708  // Uh oh! Not allowed to insert into owned rows in that case.
1709  std::ostringstream err;
1710  const int myRank = getRowMap ()->getComm ()->getRank ();
1711  const int numProcs = getRowMap ()->getComm ()->getSize ();
1712 
1713  err << "The matrix was constructed with a constant (\"static\") graph, "
1714  "yet the given global row index " << globalRow << " is in the row "
1715  "Map on the calling process (with rank " << myRank << ", of " <<
1716  numProcs << " process(es)). In this case, you may not insert new "
1717  "entries into rows owned by the calling process.";
1718 
1719  if (! getRowMap ()->isNodeGlobalElement (globalRow)) {
1720  err << " Furthermore, GID->LID conversion with the row Map claims that "
1721  "the global row index is owned on the calling process, yet "
1722  "getRowMap()->isNodeGlobalElement(globalRow) returns false. That's"
1723  " weird! This might indicate a Map bug. Please report this to the"
1724  " Tpetra developers.";
1725  }
1726  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
1727  this->isStaticGraph (), std::runtime_error, err.str ());
1728  }
1729 
1730  if (! myGraph_->indicesAreAllocated ()) {
1731  try {
1732  allocateValues (GlobalIndices, GraphNotYetAllocated);
1733  }
1734  catch (std::exception& e) {
1735  TEUCHOS_TEST_FOR_EXCEPTION(
1736  true, std::runtime_error, "Tpetra::CrsMatrix::insertGlobalValues: "
1737  "allocateValues(GlobalIndices,GraphNotYetAllocated) threw an "
1738  "exception: " << e.what ());
1739  }
1740  }
1741 
1742  const size_type numEntriesToInsert = indices.size ();
1743  // If the matrix has a column Map, check at this point whether
1744  // the column indices belong to the column Map.
1745  //
1746  // FIXME (mfh 16 May 2013) We may want to consider deferring the
1747  // test to the CrsGraph method, since it may have to do this
1748  // anyway.
1749  if (hasColMap ()) {
1750  const map_type& colMap = * (getColMap ());
1751  // In a debug build, keep track of the nonowned ("bad") column
1752  // indices, so that we can display them in the exception
1753  // message. In a release build, just ditch the loop early if
1754  // we encounter a nonowned column index.
1755 #ifdef HAVE_TPETRA_DEBUG
1756  Array<GO> badColInds;
1757 #endif // HAVE_TPETRA_DEBUG
1758  bool allInColMap = true;
1759  for (size_type k = 0; k < numEntriesToInsert; ++k) {
1760  if (! colMap.isNodeGlobalElement (indices[k])) {
1761  allInColMap = false;
1762 #ifdef HAVE_TPETRA_DEBUG
1763  badColInds.push_back (indices[k]);
1764 #else
1765  break;
1766 #endif // HAVE_TPETRA_DEBUG
1767  }
1768  }
1769  if (! allInColMap) {
1770  std::ostringstream os;
1771  os << "You attempted to insert entries in owned row " << globalRow
1772  << ", at the following column indices: " << toString (indices)
1773  << "." << endl;
1774 #ifdef HAVE_TPETRA_DEBUG
1775  os << "Of those, the following indices are not in the column Map on "
1776  "this process: " << toString (badColInds) << "." << endl << "Since "
1777  "the matrix has a column Map already, it is invalid to insert "
1778  "entries at those locations.";
1779 #else
1780  os << "At least one of those indices is not in the column Map on this "
1781  "process." << endl << "It is invalid to insert into columns not in "
1782  "the column Map on the process that owns the row.";
1783 #endif // HAVE_TPETRA_DEBUG
1784  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
1785  ! allInColMap, std::invalid_argument, os.str ());
1786  }
1787  }
1788 
1789  typename Graph::SLocalGlobalViews inds_view;
1790  ArrayView<const impl_scalar_type> vals_view;
1791 
1792  inds_view.ginds = indices;
1793  vals_view = av_reinterpret_cast<const impl_scalar_type> (values);
1794 
1795 #ifdef HAVE_TPETRA_DEBUG
1796  RowInfo rowInfo;
1797  try {
1798  rowInfo = myGraph_->getRowInfo (localRow);
1799  } catch (std::exception& e) {
1800  TEUCHOS_TEST_FOR_EXCEPTION(
1801  true, std::runtime_error, "myGraph_->getRowInfo(localRow=" << localRow
1802  << ") threw an exception: " << e.what ());
1803  }
1804 #else
1805  RowInfo rowInfo = myGraph_->getRowInfo (localRow);
1806 #endif // HAVE_TPETRA_DEBUG
1807 
1808  const size_t curNumEntries = rowInfo.numEntries;
1809  const size_t newNumEntries =
1810  curNumEntries + static_cast<size_t> (numEntriesToInsert);
1811  if (newNumEntries > rowInfo.allocSize) {
1812  TEUCHOS_TEST_FOR_EXCEPTION(
1813  getProfileType () == StaticProfile && newNumEntries > rowInfo.allocSize,
1814  std::runtime_error, "Tpetra::CrsMatrix::insertGlobalValues: new "
1815  "indices exceed statically allocated graph structure. curNumEntries"
1816  " (" << curNumEntries << ") + numEntriesToInsert (" <<
1817  numEntriesToInsert << ") > allocSize (" << rowInfo.allocSize << ").");
1818 
1819  // Update allocation only as much as necessary
1820  try {
1821  rowInfo =
1822  myGraph_->template updateGlobalAllocAndValues<impl_scalar_type> (rowInfo,
1823  newNumEntries,
1824  values2D_[localRow]);
1825  } catch (std::exception& e) {
1826  TEUCHOS_TEST_FOR_EXCEPTION(
1827  true, std::runtime_error, "myGraph_->updateGlobalAllocAndValues"
1828  "(...) threw an exception: " << e.what ());
1829  }
1830  }
1831  try {
1832  if (isGloballyIndexed ()) {
1833  // lg=GlobalIndices, I=GlobalIndices means the method calls
1834  // getGlobalViewNonConst() and does direct copying, which
1835  // should be reasonably fast.
1836  myGraph_->template insertIndicesAndValues<impl_scalar_type> (rowInfo, inds_view,
1837  this->getViewNonConst (rowInfo),
1838  vals_view,
1839  GlobalIndices, GlobalIndices);
1840  }
1841  else {
1842  // lg=GlobalIndices, I=LocalIndices means the method calls
1843  // the Map's getLocalElement() method once per entry to
1844  // insert. This may be slow.
1845  myGraph_->template insertIndicesAndValues<impl_scalar_type> (rowInfo, inds_view,
1846  this->getViewNonConst (rowInfo),
1847  vals_view,
1848  GlobalIndices, LocalIndices);
1849  }
1850  }
1851  catch (std::exception& e) {
1852  TEUCHOS_TEST_FOR_EXCEPTION(
1853  true, std::runtime_error, "myGraph_->insertIndicesAndValues(...) "
1854  "threw an exception: " << e.what ());
1855  }
1856 
1857 #ifdef HAVE_TPETRA_DEBUG
1858  const size_t chkNewNumEntries = myGraph_->getNumEntriesInLocalRow (localRow);
1859  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(chkNewNumEntries != newNumEntries,
1860  std::logic_error, ": There should be a total of " << newNumEntries
1861  << " entries in the row, but the graph now reports " << chkNewNumEntries
1862  << " entries. Please report this bug to the Tpetra developers.");
1863 #endif // HAVE_TPETRA_DEBUG
1864  }
1865  }
1866 
1867 
1868  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
1869  void
1871  insertGlobalValuesFiltered (const GlobalOrdinal globalRow,
1872  const Teuchos::ArrayView<const GlobalOrdinal>& indices,
1873  const Teuchos::ArrayView<const Scalar>& values)
1874  {
1875  using Teuchos::Array;
1876  using Teuchos::ArrayView;
1877  using Teuchos::av_reinterpret_cast;
1878  typedef LocalOrdinal LO;
1879  typedef GlobalOrdinal GO;
1880  typedef impl_scalar_type ST;
1881  const char tfecfFuncName[] = "insertGlobalValuesFiltered: ";
1882 
1883  // mfh 14 Dec 2012: Defer test for static graph until we know that
1884  // globalRow is in the row Map. If it's not in the row Map, it
1885  // doesn't matter whether or not the graph is static; the data
1886  // just get stashed for later use by globalAssemble().
1887  //
1888  // TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
1889  // isStaticGraph(), std::runtime_error,
1890  // ": matrix was constructed with static graph. Cannot insert new entries.");
1891 #ifdef HAVE_TPETRA_DEBUG
1892  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
1893  values.size () != indices.size (), std::runtime_error,
1894  "values.size() = " << values.size() << " != indices.size() = "
1895  << indices.size() << ".");
1896 #endif // HAVE_TPETRA_DEBUG
1897 
1898  ArrayView<const ST> valsIn = av_reinterpret_cast<const ST> (values);
1899  const LO lrow = getRowMap ()->getLocalElement (globalRow);
1900 
1901  if (lrow != Teuchos::OrdinalTraits<LO>::invalid ()) { // globalRow is in our row Map.
1902  // If the matrix has a static graph, this process is now allowed
1903  // to insert into rows it owns.
1904  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
1905  this->isStaticGraph (), std::runtime_error,
1906  "The matrix was constructed with a static graph. In that case, "
1907  "it is forbidden to insert new entries into rows owned by the "
1908  "calling process.");
1909  if (! myGraph_->indicesAreAllocated ()) {
1910  allocateValues (GlobalIndices, GraphNotYetAllocated);
1911  }
1912  typename Graph::SLocalGlobalViews inds_view;
1913  ArrayView<const ST> vals_view;
1914 
1915  // We have to declare these Arrays here rather than in the
1916  // hasColMap() if branch, so that views to them will remain
1917  // valid for the whole scope.
1918  Array<GO> filtered_indices;
1919  Array<ST> filtered_values;
1920  if (hasColMap ()) { // We have a column Map.
1921  // Use column Map to filter the indices and corresponding
1922  // values, so that we only insert entries into columns we own.
1923  filtered_indices.assign (indices.begin (), indices.end ());
1924  filtered_values.assign (valsIn.begin (), valsIn.end ());
1925  const size_t numFilteredEntries =
1926  myGraph_->template filterGlobalIndicesAndValues<ST> (filtered_indices (),
1927  filtered_values ());
1928  inds_view.ginds = filtered_indices (0, numFilteredEntries);
1929  vals_view = filtered_values (0, numFilteredEntries);
1930  }
1931  else { // we don't have a column Map.
1932  inds_view.ginds = indices;
1933  vals_view = valsIn;
1934  }
1935  const size_t numFilteredEntries = vals_view.size ();
1936  // add the new indices and values
1937  if (numFilteredEntries > 0) {
1938  RowInfo rowInfo = myGraph_->getRowInfo (lrow);
1939  const size_t curNumEntries = rowInfo.numEntries;
1940  const size_t newNumEntries = curNumEntries + numFilteredEntries;
1941  if (newNumEntries > rowInfo.allocSize) {
1942  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
1943  getProfileType () == StaticProfile, std::runtime_error,
1944  "New indices exceed statically allocated graph structure.");
1945 
1946  // Update allocation only as much as necessary
1947  rowInfo = myGraph_->template updateGlobalAllocAndValues<ST> (rowInfo,
1948  newNumEntries,
1949  values2D_[lrow]);
1950  }
1951  if (isGloballyIndexed ()) {
1952  // lg=GlobalIndices, I=GlobalIndices means the method calls
1953  // getGlobalViewNonConst() and does direct copying, which
1954  // should be reasonably fast.
1955  myGraph_->template insertIndicesAndValues<ST> (rowInfo, inds_view,
1956  this->getViewNonConst (rowInfo),
1957  vals_view,
1958  GlobalIndices, GlobalIndices);
1959  }
1960  else {
1961  // lg=GlobalIndices, I=LocalIndices means the method calls
1962  // the Map's getLocalElement() method once per entry to
1963  // insert. This may be slow.
1964  myGraph_->template insertIndicesAndValues<ST> (rowInfo, inds_view,
1965  this->getViewNonConst (rowInfo),
1966  vals_view,
1967  GlobalIndices, LocalIndices);
1968  }
1969 #ifdef HAVE_TPETRA_DEBUG
1970  {
1971  const size_t chkNewNumEntries = myGraph_->getNumEntriesInLocalRow (lrow);
1972  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(chkNewNumEntries != newNumEntries,
1973  std::logic_error, ": There should be a total of " << newNumEntries
1974  << " entries in the row, but the graph now reports " << chkNewNumEntries
1975  << " entries. Please report this bug to the Tpetra developers.");
1976  }
1977 #endif // HAVE_TPETRA_DEBUG
1978  }
1979  }
1980  else { // The calling process doesn't own the given row.
1981  insertNonownedGlobalValues (globalRow, indices, values);
1982  }
1983  }
1984 
1985 
1986  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
1987  LocalOrdinal
1989  replaceLocalValues (const LocalOrdinal localRow,
1990  const Teuchos::ArrayView<const LocalOrdinal> &indices,
1991  const Teuchos::ArrayView<const Scalar>& values)
1992  {
1993  using Teuchos::Array;
1994  using Teuchos::ArrayView;
1995  using Teuchos::av_reinterpret_cast;
1996  typedef LocalOrdinal LO;
1997  typedef GlobalOrdinal GO;
1998  typedef impl_scalar_type ST;
1999  // project2nd is a binary function that returns its second
2000  // argument. This replaces entries in the given row with their
2001  // corresponding entry of values.
2002  typedef Tpetra::project2nd<ST, ST> f_type;
2003  typedef typename ArrayView<GO>::size_type size_type;
2004 
2005  ArrayView<const ST> valsIn = av_reinterpret_cast<const ST> (values);
2006  if (! isFillActive ()) {
2007  // Fill must be active in order to call this method.
2008  return Teuchos::OrdinalTraits<LO>::invalid ();
2009  }
2010  else if (! this->hasColMap ()) {
2011  // There is no such thing as local column indices without a column Map.
2012  return Teuchos::OrdinalTraits<LO>::invalid ();
2013  }
2014  else if (values.size () != indices.size ()) {
2015  // The sizes of values and indices must match.
2016  return Teuchos::OrdinalTraits<LO>::invalid ();
2017  }
2018  const bool isLocalRow = getRowMap ()->isNodeLocalElement (localRow);
2019  if (! isLocalRow) {
2020  // The calling process does not own this row, so it is not
2021  // allowed to modify its values.
2022  //
2023  // FIXME (mfh 02 Jan 2015) replaceGlobalValues returns invalid
2024  // in this case.
2025  return static_cast<LO> (0);
2026  }
2027 
2028  if (indices.size () == 0) {
2029  return static_cast<LO> (0);
2030  }
2031  else {
2032  RowInfo rowInfo = staticGraph_->getRowInfo (localRow);
2033  ArrayView<ST> curVals = this->getViewNonConst (rowInfo);
2034  if (isLocallyIndexed ()) {
2035  return staticGraph_->template transformLocalValues<ST, f_type> (rowInfo,
2036  curVals,
2037  indices,
2038  valsIn,
2039  f_type ());
2040  }
2041  else if (isGloballyIndexed ()) {
2042  // Convert the given local indices to global indices.
2043  //
2044  // FIXME (mfh 27 Jun 2014) Why can't we ask the graph to do
2045  // that? It could do the conversions in place, so that we
2046  // wouldn't need temporary storage.
2047  const map_type& colMap = * (this->getColMap ());
2048  const size_type numInds = indices.size ();
2049 
2050  // mfh 27 Jun 2014: Some of the given local indices might be
2051  // invalid. That's OK, though, since the graph ignores them
2052  // and their corresponding values in transformGlobalValues.
2053  // Thus, we don't have to count how many indices are valid.
2054  // We do so just as a sanity check.
2055  Array<GO> gblInds (numInds);
2056  size_type numValid = 0; // sanity check count of # valid indices
2057  for (size_type k = 0; k < numInds; ++k) {
2058  const GO gid = colMap.getGlobalElement (indices[k]);
2059  gblInds[k] = gid;
2060  if (gid != Teuchos::OrdinalTraits<GO>::invalid ()) {
2061  ++numValid; // sanity check count of # valid indices
2062  }
2063  }
2064  const LO numXformed =
2065  staticGraph_->template transformGlobalValues<ST, f_type> (rowInfo,
2066  curVals, // target
2067  gblInds,
2068  valsIn, // source
2069  f_type ());
2070  if (static_cast<size_type> (numXformed) != numValid) {
2071  return Teuchos::OrdinalTraits<LO>::invalid ();
2072  } else {
2073  return numXformed;
2074  }
2075  }
2076  // NOTE (mfh 26 Jun 2014) In the current version of CrsMatrix,
2077  // it's possible for a matrix (or graph) to be neither locally
2078  // nor globally indexed on a process. This means that the graph
2079  // or matrix has no entries on that process. Epetra also works
2080  // like this. It's related to lazy allocation (on first
2081  // insertion, not at graph / matrix construction). Lazy
2082  // allocation will go away because it is not thread scalable.
2083  return static_cast<LO> (0);
2084  }
2085  }
2086 
2087 
2088  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
2089  LocalOrdinal
2091  replaceGlobalValues (GlobalOrdinal globalRow,
2092  const Teuchos::ArrayView<const GlobalOrdinal>& indices,
2093  const Teuchos::ArrayView<const Scalar>& values)
2094  {
2095  using Teuchos::Array;
2096  using Teuchos::ArrayView;
2097  using Teuchos::av_reinterpret_cast;
2098  typedef LocalOrdinal LO;
2099  typedef GlobalOrdinal GO;
2100  typedef impl_scalar_type ST;
2101  // project2nd is a binary function that returns its second
2102  // argument. This replaces entries in the given row with their
2103  // corresponding entry of values.
2104  typedef Tpetra::project2nd<ST, ST> f_type;
2105  typedef typename ArrayView<GO>::size_type size_type;
2106 
2107  ArrayView<const ST> valsIn = av_reinterpret_cast<const ST> (values);
2108  if (! isFillActive ()) {
2109  // Fill must be active in order to call this method.
2110  return Teuchos::OrdinalTraits<LO>::invalid ();
2111  }
2112  else if (values.size () != indices.size ()) {
2113  // The sizes of values and indices must match.
2114  return Teuchos::OrdinalTraits<LO>::invalid ();
2115  }
2116 
2117  const LO lrow = this->getRowMap ()->getLocalElement (globalRow);
2118  if (lrow == Teuchos::OrdinalTraits<LO>::invalid ()) {
2119  // The calling process does not own this row, so it is not
2120  // allowed to modify its values.
2121  //
2122  // FIXME (mfh 02 Jan 2015) replaceLocalValues returns 0 in this case.
2123  return Teuchos::OrdinalTraits<LO>::invalid ();
2124  }
2125 
2126  if (staticGraph_.is_null ()) {
2127  return Teuchos::OrdinalTraits<LO>::invalid ();
2128  }
2129  const crs_graph_type& graph = *staticGraph_;
2130  RowInfo rowInfo = graph.getRowInfo (lrow);
2131  if (indices.size () == 0) {
2132  return static_cast<LO> (0);
2133  }
2134  else {
2135  ArrayView<ST> curVals = this->getViewNonConst (rowInfo);
2136  if (isLocallyIndexed ()) {
2137  // Convert the given global indices to local indices.
2138  //
2139  // FIXME (mfh 08 Jul 2014) Why can't we ask the graph to do
2140  // that? It could do the conversions in place, so that we
2141  // wouldn't need temporary storage.
2142  const map_type& colMap = * (this->getColMap ());
2143  const size_type numInds = indices.size ();
2144  Array<LO> lclInds (numInds);
2145  for (size_type k = 0; k < numInds; ++k) {
2146  // There is no need to filter out indices not in the
2147  // column Map. Those that aren't will be mapped to
2148  // invalid(), which the graph's transformGlobalValues()
2149  // will filter out (but not count in its return value).
2150  lclInds[k] = colMap.getLocalElement (indices[k]);
2151  }
2152  return graph.template transformLocalValues<ST, f_type> (rowInfo,
2153  curVals,
2154  lclInds (),
2155  valsIn,
2156  f_type ());
2157  }
2158  else if (isGloballyIndexed ()) {
2159  return graph.template transformGlobalValues<ST, f_type> (rowInfo,
2160  curVals,
2161  indices,
2162  valsIn,
2163  f_type ());
2164  }
2165  else {
2166  // If the graph is neither locally nor globally indexed on
2167  // the calling process, that means that the calling process
2168  // can't possibly have any entries in the owned row. Thus,
2169  // there are no entries to transform, so we return zero.
2170  return static_cast<LO> (0);
2171  }
2172  }
2173  }
2174 
2175 
2176  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
2177  LocalOrdinal
2179  sumIntoGlobalValues (const GlobalOrdinal globalRow,
2180  const Teuchos::ArrayView<const GlobalOrdinal>& indices,
2181  const Teuchos::ArrayView<const Scalar>& values)
2182 
2183  {
2184  using Teuchos::Array;
2185  using Teuchos::ArrayView;
2186  using Teuchos::av_reinterpret_cast;
2187  typedef LocalOrdinal LO;
2188  typedef GlobalOrdinal GO;
2189  typedef impl_scalar_type ST;
2190  typedef std::plus<Scalar> f_type;
2191  typedef typename ArrayView<GO>::size_type size_type;
2192 
2193  ArrayView<const ST> valsIn = av_reinterpret_cast<const ST> (values);
2194  if (! isFillActive ()) {
2195  // Fill must be active in order to call this method.
2196  return Teuchos::OrdinalTraits<LO>::invalid ();
2197  }
2198  else if (values.size () != indices.size ()) {
2199  // The sizes of values and indices must match.
2200  return Teuchos::OrdinalTraits<LO>::invalid ();
2201  }
2202 
2203  const LO lrow = this->getRowMap ()->getLocalElement (globalRow);
2204  if (lrow == Teuchos::OrdinalTraits<LO>::invalid ()) {
2205  // globalRow is not in the row Map, so stash the given entries
2206  // away in a separate data structure. globalAssemble() (called
2207  // during fillComplete()) will exchange that data and sum it in
2208  // using sumIntoGlobalValues().
2209  this->insertNonownedGlobalValues (globalRow, indices, values);
2210  // FIXME (mfh 08 Jul 2014) It's not clear what to return here,
2211  // since we won't know whether the given indices were valid
2212  // until globalAssemble (called in fillComplete) is called.
2213  // That's why insertNonownedGlobalValues doesn't return
2214  // anything. Just for consistency, I'll return the number of
2215  // entries that the user gave us.
2216  return static_cast<LO> (indices.size ());
2217  }
2218 
2219  if (staticGraph_.is_null ()) {
2220  return Teuchos::OrdinalTraits<LO>::invalid ();
2221  }
2222  const crs_graph_type& graph = *staticGraph_;
2223  RowInfo rowInfo = graph.getRowInfo (lrow);
2224  if (indices.size () == 0) {
2225  return static_cast<LO> (0);
2226  }
2227  else {
2228  ArrayView<ST> curVals = this->getViewNonConst (rowInfo);
2229  if (isLocallyIndexed ()) {
2230  // Convert the given global indices to local indices.
2231  //
2232  // FIXME (mfh 08 Jul 2014) Why can't we ask the graph to do
2233  // that? It could do the conversions in place, so that we
2234  // wouldn't need temporary storage.
2235  const map_type& colMap = * (this->getColMap ());
2236  const size_type numInds = indices.size ();
2237  Array<LO> lclInds (numInds);
2238  for (size_type k = 0; k < numInds; ++k) {
2239  // There is no need to filter out indices not in the
2240  // column Map. Those that aren't will be mapped to
2241  // invalid(), which the graph's transformGlobalValues()
2242  // will filter out (but not count in its return value).
2243  lclInds[k] = colMap.getLocalElement (indices[k]);
2244  }
2245  return graph.template transformLocalValues<ST, f_type> (rowInfo,
2246  curVals,
2247  lclInds (),
2248  valsIn,
2249  f_type ());
2250  }
2251  else if (isGloballyIndexed ()) {
2252  return graph.template transformGlobalValues<ST, f_type> (rowInfo,
2253  curVals,
2254  indices,
2255  valsIn,
2256  f_type ());
2257  }
2258  else {
2259  // If the graph is neither locally nor globally indexed on
2260  // the calling process, that means that the calling process
2261  // can't possibly have any entries in the owned row. Thus,
2262  // there are no entries to transform, so we return zero.
2263  return static_cast<LO> (0);
2264  }
2265  }
2266  }
2267 
2268 
2269  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
2270  LocalOrdinal
2272  sumIntoLocalValues (const LocalOrdinal localRow,
2273  const Teuchos::ArrayView<const LocalOrdinal>& indices,
2274  const Teuchos::ArrayView<const Scalar>& values)
2275  {
2276  using Teuchos::Array;
2277  using Teuchos::ArrayView;
2278  using Teuchos::av_reinterpret_cast;
2279  typedef LocalOrdinal LO;
2280  typedef GlobalOrdinal GO;
2281  typedef impl_scalar_type ST;
2282  typedef std::plus<Scalar> f_type;
2283  typedef typename ArrayView<GO>::size_type size_type;
2284 
2285  ArrayView<const ST> valsIn = av_reinterpret_cast<const ST> (values);
2286  if (! isFillActive ()) {
2287  // Fill must be active in order to call this method.
2288  return Teuchos::OrdinalTraits<LO>::invalid ();
2289  }
2290  else if (! this->hasColMap ()) {
2291  // There is no such thing as local column indices without a column Map.
2292  return Teuchos::OrdinalTraits<LO>::invalid ();
2293  }
2294  else if (values.size () != indices.size ()) {
2295  // The sizes of values and indices must match.
2296  return Teuchos::OrdinalTraits<LO>::invalid ();
2297  }
2298  const bool isLocalRow = getRowMap ()->isNodeLocalElement (localRow);
2299  if (! isLocalRow) {
2300  // The calling process doesn't own the local row, so we can't
2301  // insert into it.
2302  return static_cast<LO> (0);
2303  }
2304 
2305  if (indices.size () == 0) {
2306  return static_cast<LO> (0);
2307  }
2308  else {
2309  RowInfo rowInfo = staticGraph_->getRowInfo (localRow);
2310  ArrayView<ST> curVals = this->getViewNonConst (rowInfo);
2311  if (isLocallyIndexed ()) {
2312  return staticGraph_->template transformLocalValues<ST, f_type> (rowInfo,
2313  curVals,
2314  indices,
2315  valsIn,
2316  f_type ());
2317  }
2318  else if (isGloballyIndexed ()) {
2319  // Convert the given local indices to global indices.
2320  //
2321  // FIXME (mfh 27 Jun 2014) Why can't we ask the graph to do
2322  // that? It could do the conversions in place, so that we
2323  // wouldn't need temporary storage.
2324  const map_type& colMap = * (this->getColMap ());
2325  const size_type numInds = indices.size ();
2326 
2327  // mfh 27 Jun 2014: Some of the given local indices might be
2328  // invalid. That's OK, though, since the graph ignores them
2329  // and their corresponding values in transformGlobalValues.
2330  // Thus, we don't have to count how many indices are valid.
2331  // We do so just as a sanity check.
2332  Array<GO> gblInds (numInds);
2333  size_type numValid = 0; // sanity check count of # valid indices
2334  for (size_type k = 0; k < numInds; ++k) {
2335  const GO gid = colMap.getGlobalElement (indices[k]);
2336  gblInds[k] = gid;
2337  if (gid != Teuchos::OrdinalTraits<GO>::invalid ()) {
2338  ++numValid; // sanity check count of # valid indices
2339  }
2340  }
2341  const LO numXformed =
2342  staticGraph_->template transformGlobalValues<ST, f_type> (rowInfo,
2343  curVals, // target
2344  gblInds,
2345  valsIn, // source
2346  f_type ());
2347  if (static_cast<size_type> (numXformed) != numValid) {
2348  return Teuchos::OrdinalTraits<LO>::invalid ();
2349  } else {
2350  return numXformed;
2351  }
2352  }
2353  // NOTE (mfh 26 Jun 2014) In the current version of CrsMatrix,
2354  // it's possible for a matrix (or graph) to be neither locally
2355  // nor globally indexed on a process. This means that the graph
2356  // or matrix has no entries on that process. Epetra also works
2357  // like this. It's related to lazy allocation (on first
2358  // insertion, not at graph / matrix construction). Lazy
2359  // allocation will go away because it is not thread scalable.
2360  return static_cast<LO> (0);
2361  }
2362  }
2363 
2364  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
2365  Teuchos::ArrayView<const typename CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node, classic>::impl_scalar_type>
2367  getView (RowInfo rowinfo) const
2368  {
2369  using Kokkos::MemoryUnmanaged;
2370  using Kokkos::View;
2371  using Teuchos::ArrayView;
2372  typedef impl_scalar_type ST;
2373  typedef std::pair<size_t, size_t> range_type;
2374 
2375  if (k_values1D_.dimension_0 () != 0 && rowinfo.allocSize > 0) {
2376 #ifdef HAVE_TPETRA_DEBUG
2377  TEUCHOS_TEST_FOR_EXCEPTION(
2378  rowinfo.offset1D + rowinfo.allocSize > k_values1D_.dimension_0 (),
2379  std::range_error, "Tpetra::CrsMatrix::getView: Invalid access "
2380  "to 1-D storage of values." << std::endl << "rowinfo.offset1D (" <<
2381  rowinfo.offset1D << ") + rowinfo.allocSize (" << rowinfo.allocSize <<
2382  ") > k_values1D_.dimension_0() (" << k_values1D_.dimension_0 () << ").");
2383 #endif // HAVE_TPETRA_DEBUG
2384  range_type range (rowinfo.offset1D, rowinfo.offset1D + rowinfo.allocSize);
2385  typedef View<const ST*, execution_space, MemoryUnmanaged> subview_type;
2386  subview_type sv = Kokkos::subview (k_values1D_, range);
2387 
2388  const ST* const sv_raw = (rowinfo.allocSize == 0) ? NULL : sv.ptr_on_device ();
2389  return ArrayView<const ST> (sv_raw, rowinfo.allocSize);
2390  }
2391  else if (values2D_ != null) {
2392  return values2D_[rowinfo.localRow] ();
2393  }
2394  else {
2395  return ArrayView<impl_scalar_type> ();
2396  }
2397  }
2398 
2399  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
2400  Teuchos::ArrayView<typename CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node, classic>::impl_scalar_type>
2403  {
2404  return Teuchos::av_const_cast<impl_scalar_type> (this->getView (rowinfo));
2405  }
2406 
2407  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
2408  void
2410  getLocalRowCopy (LocalOrdinal localRow,
2411  const Teuchos::ArrayView<LocalOrdinal>& indices,
2412  const Teuchos::ArrayView<Scalar>& values,
2413  size_t& numEntries) const
2414  {
2415  using Teuchos::ArrayView;
2416  using Teuchos::av_reinterpret_cast;
2417  typedef LocalOrdinal LO;
2418  typedef GlobalOrdinal GO;
2419 
2420  TEUCHOS_TEST_FOR_EXCEPTION(
2421  isGloballyIndexed () && ! hasColMap (), std::runtime_error,
2422  "Tpetra::CrsMatrix::getLocalRowCopy: The matrix is globally indexed and "
2423  "does not have a column Map yet. That means we don't have local indices "
2424  "for columns yet, so it doesn't make sense to call this method. If the "
2425  "matrix doesn't have a column Map yet, you should call fillComplete on "
2426  "it first.");
2427  TEUCHOS_TEST_FOR_EXCEPTION(
2428  ! staticGraph_->hasRowInfo (), std::runtime_error,
2429  "Tpetra::CrsMatrix::getLocalRowCopy: The graph's row information was "
2430  "deleted at fillComplete().");
2431 
2432  if (! this->getRowMap ()->isNodeLocalElement (localRow)) {
2433  // The calling process owns no entries in this row.
2434  numEntries = 0;
2435  return;
2436  }
2437 
2438  const RowInfo rowinfo = staticGraph_->getRowInfo (localRow);
2439  const size_t theNumEntries = rowinfo.numEntries;
2440 
2441  TEUCHOS_TEST_FOR_EXCEPTION(
2442  static_cast<size_t> (indices.size ()) < theNumEntries ||
2443  static_cast<size_t> (values.size ()) < theNumEntries,
2444  std::runtime_error,
2445  "Tpetra::CrsMatrix::getLocalRowCopy: The given row " << localRow
2446  << " has " << theNumEntries << " entries. One or both of the given "
2447  "ArrayViews are not long enough to store that many entries. indices "
2448  "can store " << indices.size() << " entries and values can store "
2449  << values.size() << " entries.");
2450 
2451  numEntries = theNumEntries;
2452 
2453  if (staticGraph_->isLocallyIndexed ()) {
2454  ArrayView<const LO> indrowview = staticGraph_->getLocalView (rowinfo);
2455  ArrayView<const Scalar> valrowview =
2456  av_reinterpret_cast<const Scalar> (this->getView (rowinfo));
2457  std::copy (indrowview.begin (), indrowview.begin () + numEntries, indices.begin ());
2458  std::copy (valrowview.begin (), valrowview.begin () + numEntries, values.begin ());
2459  }
2460  else if (staticGraph_->isGloballyIndexed ()) {
2461  ArrayView<const GO> indrowview = staticGraph_->getGlobalView (rowinfo);
2462  ArrayView<const Scalar> valrowview =
2463  av_reinterpret_cast<const Scalar> (this->getView (rowinfo));
2464  std::copy (valrowview.begin (), valrowview.begin () + numEntries, values.begin ());
2465 
2466  const map_type& colMap = * (this->getColMap ());
2467  for (size_t j=0; j < numEntries; ++j) {
2468  indices[j] = colMap.getLocalElement (indrowview[j]);
2469  }
2470  }
2471  else {
2472  numEntries = 0;
2473  }
2474  }
2475 
2476  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
2477  void
2479  getGlobalRowCopy (GlobalOrdinal globalRow,
2480  const Teuchos::ArrayView<GlobalOrdinal>& indices,
2481  const Teuchos::ArrayView<Scalar>& values,
2482  size_t& numEntries) const
2483  {
2484  using Teuchos::ArrayView;
2485  using Teuchos::av_reinterpret_cast;
2486  typedef LocalOrdinal LO;
2487  typedef GlobalOrdinal GO;
2488 
2489  const char tfecfFuncName[] = "getGlobalRowCopy: ";
2490  const LocalOrdinal lrow = getRowMap ()->getLocalElement (globalRow);
2491  if (lrow == OTL::invalid ()) {
2492  // The calling process owns no entries in this row.
2493  numEntries = 0;
2494  return;
2495  }
2496 
2497  const RowInfo rowinfo = staticGraph_->getRowInfo (lrow);
2498  const size_t theNumEntries = rowinfo.numEntries;
2499 
2500  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
2501  static_cast<size_t> (indices.size ()) < theNumEntries ||
2502  static_cast<size_t> (values.size ()) < theNumEntries,
2503  std::runtime_error,
2504  "The given row " << globalRow << ", corresponding to local row " << lrow
2505  << ", has " << theNumEntries << " entries. One or both of the given "
2506  "ArrayView input arguments are not long enough to store that many "
2507  "entries. indices.size() = " << indices.size() << ", values.size() = "
2508  << values.size () << ", but the number of entries in the row is "
2509  << theNumEntries << ".");
2510 
2511  // Don't "commit" the value until we know that the input arrays are valid.
2512  numEntries = theNumEntries;
2513 
2514  if (staticGraph_->isGloballyIndexed ()) {
2515  ArrayView<const GO> indrowview = staticGraph_->getGlobalView (rowinfo);
2516  ArrayView<const Scalar> valrowview =
2517  av_reinterpret_cast<const Scalar> (this->getView (rowinfo));
2518  std::copy (indrowview.begin (), indrowview.begin () + numEntries, indices.begin ());
2519  std::copy (valrowview.begin (), valrowview.begin () + numEntries, values.begin ());
2520  }
2521  else if (staticGraph_->isLocallyIndexed ()) {
2522  ArrayView<const LO> indrowview = staticGraph_->getLocalView(rowinfo);
2523  ArrayView<const Scalar> valrowview =
2524  av_reinterpret_cast<const Scalar> (this->getView (rowinfo));
2525  std::copy (valrowview.begin (), valrowview.begin () + numEntries, values.begin ());
2526  for (size_t j = 0; j < numEntries; ++j) {
2527  indices[j] = getColMap ()->getGlobalElement (indrowview[j]);
2528  }
2529  }
2530  else {
2531 #ifdef HAVE_TPETRA_DEBUG
2532  // should have fallen in one of the above if indices are allocated
2533  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
2534  staticGraph_->indicesAreAllocated (), std::logic_error,
2535  "Internal logic error. Please contact Tpetra team.");
2536 #endif // HAVE_TPETRA_DEBUG
2537  numEntries = 0;
2538  }
2539  }
2540 
2541  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
2542  void
2544  getLocalRowView (LocalOrdinal localRow,
2545  Teuchos::ArrayView<const LocalOrdinal>& indices,
2546  Teuchos::ArrayView<const Scalar>& values) const
2547  {
2548  using Teuchos::ArrayView;
2549  using Teuchos::av_reinterpret_cast;
2550  typedef LocalOrdinal LO;
2551 
2552  const char tfecfFuncName[] = "getLocalRowView: ";
2553  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
2554  isGloballyIndexed (), std::runtime_error, "The matrix currently stores "
2555  "its indices as global indices, so you cannot get a view with local "
2556  "column indices. If the matrix has a column Map, you may call "
2557  "getLocalRowCopy() to get local column indices; otherwise, you may get "
2558  "a view with global column indices by calling getGlobalRowCopy().");
2559  indices = Teuchos::null;
2560  values = Teuchos::null;
2561 #ifdef HAVE_TPETRA_DEBUG
2562  size_t numEntries = 0;
2563 #endif // HAVE_TPETRA_DEBUG
2564  if (getRowMap ()->isNodeLocalElement (localRow)) {
2565  const RowInfo rowinfo = staticGraph_->getRowInfo (localRow);
2566 #ifdef HAVE_TPETRA_DEBUG
2567  numEntries = rowinfo.numEntries;
2568 #endif // HAVE_TPETRA_DEBUG
2569  if (rowinfo.numEntries > 0) {
2570  ArrayView<const LO> indTmp = staticGraph_->getLocalView (rowinfo);
2571  ArrayView<const Scalar> valTmp =
2572  av_reinterpret_cast<const Scalar> (this->getView (rowinfo));
2573  indices = indTmp (0, rowinfo.numEntries);
2574  values = valTmp (0, rowinfo.numEntries);
2575  }
2576  }
2577 
2578 #ifdef HAVE_TPETRA_DEBUG
2579  const char suffix[] = ". This should never happen. Please report this "
2580  "bug to the Tpetra developers.";
2581  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
2582  static_cast<size_t>(indices.size ()) != static_cast<size_t>(values.size ()), std::logic_error,
2583  "At the end of this method, for local row " << localRow << ", "
2584  "indices.size() = " << indices.size () << " != values.size () = "
2585  << values.size () << suffix);
2586  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
2587  static_cast<size_t>(indices.size ()) != static_cast<size_t>(numEntries), std::logic_error,
2588  "At the end of this method, for local row " << localRow << ", "
2589  "indices.size() = " << indices.size () << " != numEntries = "
2590  << numEntries << suffix);
2591  const size_t expectedNumEntries = this->getNumEntriesInLocalRow (localRow);
2592  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
2593  numEntries != expectedNumEntries, std::logic_error,
2594  "At the end of this method, for local row " << localRow << ", numEntries"
2595  " = " << numEntries << " != getNumEntriesInLocalRow(localRow)"
2596  " = "<< expectedNumEntries << suffix);
2597 #endif // HAVE_TPETRA_DEBUG
2598  }
2599 
2600  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
2601  void
2603  getGlobalRowView (GlobalOrdinal globalRow,
2604  Teuchos::ArrayView<const GlobalOrdinal>& indices,
2605  Teuchos::ArrayView<const Scalar>& values) const
2606  {
2607  using Teuchos::ArrayView;
2608  using Teuchos::av_reinterpret_cast;
2609  typedef GlobalOrdinal GO;
2610  const char tfecfFuncName[] = "getGlobalRowView: ";
2611 
2612  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
2613  isLocallyIndexed (), std::runtime_error,
2614  "The matrix is locally indexed, so we cannot return a view of the row "
2615  "with global column indices. Use getGlobalRowCopy() instead.");
2616  indices = Teuchos::null;
2617  values = Teuchos::null;
2618  const LocalOrdinal lrow = getRowMap ()->getLocalElement (globalRow);
2619  if (lrow != Teuchos::OrdinalTraits<LocalOrdinal>::invalid ()) {
2620  // getRowInfo() requires a local row index, whether or not
2621  // storage has been optimized.
2622  const RowInfo rowinfo = staticGraph_->getRowInfo(lrow);
2623  if (rowinfo.numEntries > 0) {
2624  ArrayView<const GO> indTmp = staticGraph_->getGlobalView (rowinfo);
2625  ArrayView<const Scalar> valTmp =
2626  av_reinterpret_cast<const Scalar> (this->getView (rowinfo));
2627  indices = indTmp (0, rowinfo.numEntries);
2628  values = valTmp (0, rowinfo.numEntries);
2629  }
2630  }
2631 #ifdef HAVE_TPETRA_DEBUG
2632  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
2633  static_cast<size_t> (indices.size ()) != this->getNumEntriesInGlobalRow (globalRow) ||
2634  indices.size () != values.size (),
2635  std::logic_error,
2636  "Violated stated post-conditions. Please contact Tpetra team.");
2637 #endif // HAVE_TPETRA_DEBUG
2638  }
2639 
2640  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
2641  void
2643  scale (const Scalar& alpha)
2644  {
2645  typedef LocalOrdinal LO;
2646  typedef Kokkos::SparseRowView<local_matrix_type> row_view_type;
2647  typedef typename Teuchos::Array<Scalar>::size_type size_type;
2648  const char tfecfFuncName[] = "scale: ";
2649  const impl_scalar_type theAlpha = static_cast<impl_scalar_type> (alpha);
2650 
2651  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
2652  ! isFillActive (), std::runtime_error,
2653  "Fill must be active before you may call this method. "
2654  "Please call resumeFill() to make fill active.");
2655 
2656  const size_t nlrs = staticGraph_->getNodeNumRows ();
2657  const size_t numAlloc = staticGraph_->getNodeAllocationSize ();
2658  const size_t numEntries = staticGraph_->getNodeNumEntries ();
2659  if (! staticGraph_->indicesAreAllocated () || nlrs == 0 ||
2660  numAlloc == 0 || numEntries == 0) {
2661  // do nothing
2662  }
2663  else {
2664  if (staticGraph_->getProfileType () == StaticProfile) {
2665  const LO lclNumRows = lclMatrix_.numRows ();
2666  for (LO lclRow = 0; lclRow < lclNumRows; ++lclRow) {
2667  row_view_type row_i = lclMatrix_.template row<typename row_view_type::size_type> (lclRow);
2668  for (LO k = 0; k < row_i.length; ++k) {
2669  // FIXME (mfh 02 Jan 2015) This assumes CUDA UVM.
2670  row_i.value (k) *= theAlpha;
2671  }
2672  }
2673  }
2674  else if (staticGraph_->getProfileType () == DynamicProfile) {
2675  for (size_t row = 0; row < nlrs; ++row) {
2676  const size_type numEnt = getNumEntriesInLocalRow (row);
2677  Teuchos::ArrayView<impl_scalar_type> rowVals = values2D_[row] ();
2678  for (size_type k = 0; k < numEnt; ++k) {
2679  rowVals[k] *= theAlpha;
2680  }
2681  }
2682  }
2683  }
2684  }
2685 
2686  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
2687  void
2689  setAllToScalar (const Scalar& alpha)
2690  {
2691  const char tfecfFuncName[] = "setAllToScalar: ";
2692  const impl_scalar_type theAlpha = static_cast<impl_scalar_type> (alpha);
2693  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
2694  ! isFillActive (), std::runtime_error,
2695  "Fill must be active before you may call this method. "
2696  "Please call resumeFill() to make fill active.");
2697 
2698  // replace all values in the matrix
2699  // it is easiest to replace all allocated values, instead of replacing only the ones with valid entries
2700  // however, if there are no valid entries, we can short-circuit
2701  // furthermore, if the values aren't allocated, we can short-circuit (no entry have been inserted so far)
2702  const size_t nlrs = staticGraph_->getNodeNumRows(),
2703  numAlloc = staticGraph_->getNodeAllocationSize(),
2704  numEntries = staticGraph_->getNodeNumEntries();
2705  if (! staticGraph_->indicesAreAllocated () || numAlloc == 0 || numEntries == 0) {
2706  // do nothing
2707  }
2708  else {
2709  const ProfileType profType = staticGraph_->getProfileType ();
2710  if (profType == StaticProfile) {
2711  // FIXME (mfh 24 Dec 2014) Once CrsMatrix implements DualView
2712  // semantics, this would be the place to mark memory as
2713  // modified.
2714  typedef typename local_matrix_type::values_type values_type;
2715  Kokkos::Impl::ViewFill<values_type> (k_values1D_, theAlpha);
2716  }
2717  else if (profType == DynamicProfile) {
2718  for (size_t row = 0; row < nlrs; ++row) {
2719  std::fill (values2D_[row].begin (), values2D_[row].end (), theAlpha);
2720  }
2721  }
2722  }
2723  }
2724 
2725  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
2726  void
2728  setAllValues (const typename local_matrix_type::row_map_type& rowPointers,
2729  const typename local_graph_type::entries_type::non_const_type& columnIndices,
2730  const typename local_matrix_type::values_type& values)
2731  {
2732  const char tfecfFuncName[] = "setAllValues";
2733  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
2734  columnIndices.size () != values.size (), std::runtime_error,
2735  ": columnIndices and values must have the same size. columnIndices.size() = "
2736  << columnIndices.size () << " != values.size() = " << values.size () << ".");
2737  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
2738  myGraph_.is_null (), std::runtime_error, ": myGraph_ must not be null.");
2739 
2740  try {
2741  myGraph_->setAllIndices (rowPointers, columnIndices);
2742  }
2743  catch (std::exception &e) {
2744  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
2745  true, std::runtime_error, ": Caught exception while calling myGraph_->"
2746  "setAllIndices(): " << e.what ());
2747  }
2748  k_values1D_ = values;
2749  checkInternalState ();
2750  }
2751 
2752  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
2753  void
2755  setAllValues (const Teuchos::ArrayRCP<size_t>& rowPointers,
2756  const Teuchos::ArrayRCP<LocalOrdinal>& columnIndices,
2757  const Teuchos::ArrayRCP<Scalar>& values)
2758  {
2759  const char tfecfFuncName[] = "setAllValues: ";
2760  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
2761  columnIndices.size () != values.size (), std::runtime_error,
2762  "columnIndices.size() = " << columnIndices.size () << " != "
2763  "values.size() = " << values.size () << ".");
2764  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
2765  myGraph_.is_null (), std::runtime_error, "myGraph_ must not be null.");
2766 
2767  try {
2768  myGraph_->setAllIndices (rowPointers, columnIndices);
2769  }
2770  catch (std::exception &e) {
2771  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
2772  true, std::runtime_error, "Caught exception while calling myGraph_->"
2773  "setAllIndices(): " << e.what ());
2774  }
2775  Teuchos::ArrayRCP<impl_scalar_type> vals =
2776  Teuchos::arcp_reinterpret_cast<impl_scalar_type> (values);
2777  k_values1D_ = Kokkos::Compat::getKokkosViewDeepCopy<device_type> (vals ());
2778  checkInternalState ();
2779  }
2780 
2781  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
2782  void
2784  getLocalDiagOffsets (Teuchos::ArrayRCP<size_t>& offsets) const
2785  {
2786  using Teuchos::ArrayRCP;
2787  using Teuchos::ArrayView;
2788  const char tfecfFuncName[] = "getLocalDiagOffsets";
2789 
2790  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
2791  ! hasColMap (), std::runtime_error,
2792  ": This method requires that the matrix have a column Map.");
2793  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
2794  staticGraph_.is_null (), std::runtime_error,
2795  ": This method requires that the matrix have a graph.");
2796 
2797  const map_type& rowMap = * (this->getRowMap ());
2798  const map_type& colMap = * (this->getColMap ());
2799 
2800  const size_t myNumRows = getNodeNumRows ();
2801  if (static_cast<size_t> (offsets.size ()) != myNumRows) {
2802  offsets.resize (static_cast<size_t> (myNumRows));
2803  }
2804 
2805 #ifdef HAVE_TPETRA_DEBUG
2806  bool allRowMapDiagEntriesInColMap = true;
2807  bool allDiagEntriesFound = true;
2808 #endif // HAVE_TPETRA_DEBUG
2809 
2810  for (size_t r = 0; r < myNumRows; ++r) {
2811  const GlobalOrdinal rgid = rowMap.getGlobalElement (r);
2812  const LocalOrdinal rlid = colMap.getLocalElement (rgid);
2813 
2814 #ifdef HAVE_TPETRA_DEBUG
2815  if (rlid == Teuchos::OrdinalTraits<LocalOrdinal>::invalid ()) {
2816  allRowMapDiagEntriesInColMap = false;
2817  }
2818 #endif // HAVE_TPETRA_DEBUG
2819 
2820  if (rlid != Teuchos::OrdinalTraits<LocalOrdinal>::invalid ()) {
2821  RowInfo rowinfo = staticGraph_->getRowInfo (r);
2822  if (rowinfo.numEntries > 0) {
2823  offsets[r] = staticGraph_->findLocalIndex (rowinfo, rlid);
2824  }
2825  else {
2826  offsets[r] = Teuchos::OrdinalTraits<size_t>::invalid ();
2827 #ifdef HAVE_TPETRA_DEBUG
2828  allDiagEntriesFound = false;
2829 #endif // HAVE_TPETRA_DEBUG
2830  }
2831  }
2832  }
2833 
2834 #ifdef HAVE_TPETRA_DEBUG
2835  using Teuchos::reduceAll;
2836  using std::endl;
2837 
2838  const bool localSuccess =
2839  allRowMapDiagEntriesInColMap && allDiagEntriesFound;
2840  int localResults[3];
2841  localResults[0] = allRowMapDiagEntriesInColMap ? 1 : 0;
2842  localResults[1] = allDiagEntriesFound ? 1 : 0;
2843  // min-all-reduce will compute least rank of all the processes
2844  // that didn't succeed.
2845  localResults[2] =
2846  ! localSuccess ? getComm ()->getRank () : getComm ()->getSize ();
2847  int globalResults[3];
2848  globalResults[0] = 0;
2849  globalResults[1] = 0;
2850  globalResults[2] = 0;
2851  reduceAll<int, int> (* (getComm ()), Teuchos::REDUCE_MIN,
2852  3, localResults, globalResults);
2853  if (globalResults[0] == 0 || globalResults[1] == 0) {
2854  std::ostringstream os; // build error message
2855  const bool both =
2856  globalResults[0] == 0 && globalResults[1] == 0;
2857  os << ": At least one process (including Process " << globalResults[2]
2858  << ") had the following issue" << (both ? "s" : "") << ":" << endl;
2859  if (globalResults[0] == 0) {
2860  os << " - The column Map does not contain at least one diagonal entry "
2861  "of the matrix." << endl;
2862  }
2863  if (globalResults[1] == 0) {
2864  os << " - There is a row on that / those process(es) that does not "
2865  "contain a diagonal entry." << endl;
2866  }
2867  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(true, std::runtime_error, os.str());
2868  }
2869 #endif // HAVE_TPETRA_DEBUG
2870  }
2871 
2872  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
2873  void
2876  {
2877  using Teuchos::ArrayRCP;
2878  using Teuchos::ArrayView;
2879  using Teuchos::av_reinterpret_cast;
2880  const char tfecfFuncName[] = "getLocalDiagCopy: ";
2882  typedef typename vec_type::dual_view_type dual_view_type;
2883  typedef typename dual_view_type::host_mirror_space::execution_space host_execution_space;
2884 
2885  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
2886  ! hasColMap (), std::runtime_error,
2887  "This method requires that the matrix have a column Map.");
2888  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
2889  staticGraph_.is_null (), std::runtime_error,
2890  "This method requires that the matrix have a graph.");
2891  const map_type& rowMap = * (this->getRowMap ());
2892  const map_type& colMap = * (this->getColMap ());
2893 
2894 #ifdef HAVE_TPETRA_DEBUG
2895  // isCompatible() requires an all-reduce, and thus this check
2896  // should only be done in debug mode.
2897  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
2898  ! dvec.getMap ()->isCompatible (rowMap), std::runtime_error,
2899  ": The input Vector's Map must be compatible with the CrsMatrix's row "
2900  "Map. You may check this by using Map's isCompatible method: "
2901  "dvec.getMap ()->isCompatible (A.getRowMap ());");
2902 #endif // HAVE_TPETRA_DEBUG
2903 
2904  // For now, we fill the Vector on the host and sync to device.
2905  // Later, we may write a parallel kernel that works entirely on
2906  // device.
2907  dual_view_type lclVec = dvec.getDualView ();
2908  lclVec.template modify<host_execution_space> ();
2909  typedef typename dual_view_type::t_host host_view_type;
2910  host_view_type lclVecHost = lclVec.h_view;
2911 
2912  // 1-D subview of lclVecHost. All the "typename" stuff ensures
2913  // that we get the same layout and memory traits as the original
2914  // 2-D view.
2915  typedef typename Kokkos::View<impl_scalar_type*,
2916  typename host_view_type::array_layout,
2917  typename host_view_type::device_type,
2918  typename host_view_type::memory_traits>
2919  host_view_1d_type;
2920  host_view_1d_type lclVecHost1d =
2921  Kokkos::subview (lclVecHost, Kokkos::ALL (), 0);
2922 
2923  // Find the diagonal entries and put them in lclVecHost1d.
2924  const size_t myNumRows = getNodeNumRows ();
2925  for (size_t r = 0; r < myNumRows; ++r) {
2926  lclVecHost1d(r) = STS::zero (); // default value if no diag entry
2927  const GlobalOrdinal rgid = rowMap.getGlobalElement (r);
2928  const LocalOrdinal rlid = colMap.getLocalElement (rgid);
2929 
2930  if (rlid != Teuchos::OrdinalTraits<LocalOrdinal>::invalid ()) {
2931  RowInfo rowinfo = staticGraph_->getRowInfo (r);
2932  if (rowinfo.numEntries > 0) {
2933  const size_t j = staticGraph_->findLocalIndex (rowinfo, rlid);
2934  if (j != Teuchos::OrdinalTraits<size_t>::invalid ()) {
2935  // NOTE (mfh 02 Jan 2015) This technically does not assume
2936  // UVM, since getView and getViewNonConst are supposed to
2937  // return views of host data.
2938  ArrayView<const impl_scalar_type> view = this->getView (rowinfo);
2939  lclVecHost1d(r) = view[j];
2940  }
2941  }
2942  }
2943  }
2944  lclVec.template sync<execution_space> (); // sync changes back to device
2945  }
2946 
2947  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
2948  void
2951  const Teuchos::ArrayView<const size_t>& offsets) const
2952  {
2953  using Teuchos::ArrayRCP;
2954  using Teuchos::ArrayView;
2956  typedef typename vec_type::dual_view_type dual_view_type;
2957  typedef typename dual_view_type::host_mirror_space::execution_space host_execution_space;
2958 
2959 #ifdef HAVE_TPETRA_DEBUG
2960  const char tfecfFuncName[] = "getLocalDiagCopy: ";
2961  const map_type& rowMap = * (this->getRowMap ());
2962  // isCompatible() requires an all-reduce, and thus this check
2963  // should only be done in debug mode.
2964  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
2965  ! diag.getMap ()->isCompatible (rowMap), std::runtime_error,
2966  "The input Vector's Map must be compatible with (in the sense of Map::"
2967  "isCompatible) the CrsMatrix's row Map.");
2968 #endif // HAVE_TPETRA_DEBUG
2969 
2970  // For now, we fill the Vector on the host and sync to device.
2971  // Later, we may write a parallel kernel that works entirely on
2972  // device.
2973  dual_view_type lclVec = diag.getDualView ();
2974  lclVec.template modify<host_execution_space> ();
2975  typedef typename dual_view_type::t_host host_view_type;
2976  host_view_type lclVecHost = lclVec.h_view;
2977 
2978  // 1-D subview of lclVecHost. All the "typename" stuff ensures
2979  // that we get the same layout and memory traits as the original
2980  // 2-D view.
2981  typedef typename Kokkos::View<impl_scalar_type*,
2982  typename host_view_type::array_layout,
2983  typename host_view_type::device_type,
2984  typename host_view_type::memory_traits>
2985  host_view_1d_type;
2986  host_view_1d_type lclVecHost1d =
2987  Kokkos::subview (lclVecHost, Kokkos::ALL (), 0);
2988 
2989  // Find the diagonal entries and put them in lclVecHost1d.
2990  const size_t myNumRows = getNodeNumRows ();
2991  for (size_t i = 0; i < myNumRows; ++i) {
2992  lclVecHost1d(i) = STS::zero (); // default value if no diag entry
2993  if (offsets[i] != Teuchos::OrdinalTraits<size_t>::invalid ()) {
2994  ArrayView<const LocalOrdinal> ind;
2995  ArrayView<const Scalar> val;
2996  // NOTE (mfh 02 Jan 2015) This technically does not assume
2997  // UVM, since the get{Global,Local}RowView methods are
2998  // supposed to return views of host data.
2999  this->getLocalRowView (i, ind, val);
3000  lclVecHost1d(i) = static_cast<impl_scalar_type> (val[offsets[i]]);
3001  }
3002  }
3003  lclVec.template sync<execution_space> (); // sync changes back to device
3004  }
3005 
3006 
3007  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
3008  void
3011  {
3012  using Teuchos::ArrayRCP;
3013  using Teuchos::ArrayView;
3014  using Teuchos::null;
3015  using Teuchos::RCP;
3016  using Teuchos::rcp;
3017  using Teuchos::rcpFromRef;
3019  const char tfecfFuncName[] = "leftScale";
3020 
3021  // FIXME (mfh 06 Aug 2014) This doesn't make sense. The matrix
3022  // should only be modified when it is not fill complete.
3023  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3024  ! isFillComplete (), std::runtime_error,
3025  ": matrix must be fill complete.");
3026  RCP<const vec_type> xp;
3027 
3028  if (getRangeMap ()->isSameAs (* (x.getMap ()))){
3029  // Take from Epetra: If we have a non-trivial exporter, we must
3030  // import elements that are permuted or are on other processors.
3031  // (We will use the exporter to perform the import ("reverse
3032  // mode").)
3033  if (getCrsGraph ()->getExporter () != null) {
3034  RCP<vec_type> tempVec = rcp (new vec_type (getRowMap ()));
3035  tempVec->doImport (x, * (getCrsGraph ()->getExporter ()), INSERT);
3036  xp = tempVec;
3037  }
3038  else {
3039  xp = rcpFromRef (x);
3040  }
3041  }
3042  else if (getRowMap ()->isSameAs (* (x.getMap ()))) {
3043  xp = rcpFromRef (x);
3044  }
3045  else {
3046  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(true, std::invalid_argument, ": The "
3047  "input scaling vector x's Map must be the same as either the row Map or "
3048  "the range Map of the CrsMatrix.");
3049  }
3050  ArrayRCP<const Scalar> vectorVals = xp->getData (0);
3051  ArrayView<impl_scalar_type> rowValues = null;
3052 
3053  const size_t lclNumRows = this->getNodeNumRows ();
3054  for (size_t i = 0; i < lclNumRows; ++i) {
3055  const RowInfo rowinfo = staticGraph_->getRowInfo (static_cast<LocalOrdinal> (i));
3056  rowValues = this->getViewNonConst (rowinfo);
3057  const impl_scalar_type scaleValue = static_cast<impl_scalar_type> (vectorVals[i]);
3058  for (size_t j = 0; j < rowinfo.numEntries; ++j) {
3059  rowValues[j] *= scaleValue;
3060  }
3061  }
3062  }
3063 
3064  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
3065  void
3068  {
3069  using Teuchos::ArrayRCP;
3070  using Teuchos::ArrayView;
3071  using Teuchos::null;
3072  using Teuchos::RCP;
3073  using Teuchos::rcp;
3074  using Teuchos::rcpFromRef;
3076  const char tfecfFuncName[] = "rightScale: ";
3077 
3078  // FIXME (mfh 06 Aug 2014) This doesn't make sense. The matrix
3079  // should only be modified when it is not fill complete.
3080  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3081  ! isFillComplete (), std::runtime_error, "Matrix must be fill complete.");
3082  RCP<const vec_type> xp;
3083  if (getDomainMap ()->isSameAs (* (x.getMap ()))) {
3084  // Take from Epetra: If we have a non-trivial exporter, we must
3085  // import elements that are permuted or are on other processors.
3086  // (We will use the exporter to perform the import.)
3087  if (getCrsGraph ()->getImporter () != null) {
3088  RCP<vec_type> tempVec = rcp (new vec_type (getColMap ()));
3089  tempVec->doImport (x, * (getCrsGraph ()->getImporter ()), INSERT);
3090  xp = tempVec;
3091  }
3092  else {
3093  xp = rcpFromRef (x);
3094  }
3095  }
3096  else if (getRowMap ()->isSameAs (* (x.getMap ()))) {
3097  xp = rcpFromRef (x);
3098  } else {
3099  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3100  true, std::runtime_error, "The vector x must have the same Map as "
3101  "either the row Map or the range Map.");
3102  }
3103 
3104  ArrayRCP<const Scalar> vectorVals = xp->getData (0);
3105  ArrayView<impl_scalar_type> rowValues = null;
3106 
3107  const size_t lclNumRows = this->getNodeNumRows ();
3108  for (size_t i = 0; i < lclNumRows; ++i) {
3109  const RowInfo rowinfo = staticGraph_->getRowInfo (static_cast<LocalOrdinal> (i));
3110  rowValues = this->getViewNonConst (rowinfo);
3111  ArrayView<const LocalOrdinal> colInds;
3112  getCrsGraph ()->getLocalRowView (static_cast<LocalOrdinal> (i), colInds);
3113  for (size_t j = 0; j < rowinfo.numEntries; ++j) {
3114  rowValues[j] *= static_cast<impl_scalar_type> (vectorVals[colInds[j]]);
3115  }
3116  }
3117  }
3118 
3119  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
3123  {
3124  using Teuchos::outArg;
3125  using Teuchos::REDUCE_SUM;
3126  using Teuchos::reduceAll;
3127  typedef typename Teuchos::ArrayRCP<const impl_scalar_type>::size_type size_type;
3128 
3129  // FIXME (mfh 05 Aug 2014) Write a thread-parallel kernel for the
3130  // local part of this computation. It could make sense to put
3131  // this operation in the Kokkos::CrsMatrix.
3132 
3133  // check the cache first
3134  mag_type frobNorm = frobNorm_;
3135  if (frobNorm == -STM::one ()) {
3136  mag_type mySum = STM::zero ();
3137  if (getNodeNumEntries() > 0) {
3138  if (isStorageOptimized ()) {
3139  // "Optimized" storage is packed storage. That means we can
3140  // iterate in one pass through the 1-D values array.
3141  const size_type numEntries =
3142  static_cast<size_type> (getNodeNumEntries ());
3143  for (size_type k = 0; k < numEntries; ++k) {
3144  // FIXME (mfh 05 Aug 2014) This assumes UVM.
3145  const impl_scalar_type val = k_values1D_(k);
3146  // Note (etp 06 Jan 2015) We need abs() here for composite types
3147  // (in general, if mag_type is on the left-hand-side, we need
3148  // abs() on the right-hand-side)
3149  const mag_type val_abs = STS::abs (val);
3150  mySum += val_abs * val_abs;
3151  }
3152  }
3153  else {
3154  const size_t numRows = getNodeNumRows ();
3155  for (size_t r = 0; r < numRows; ++r) {
3156  RowInfo rowInfo = myGraph_->getRowInfo (r);
3157  const size_type numEntries =
3158  static_cast<size_type> (rowInfo.numEntries);
3159  ArrayView<const impl_scalar_type> A_r =
3160  this->getView (rowInfo).view (0, numEntries);
3161  for (size_type k = 0; k < numEntries; ++k) {
3162  const impl_scalar_type val = A_r[k];
3163  const mag_type val_abs = STS::abs (val);
3164  mySum += val_abs * val_abs;
3165  }
3166  }
3167  }
3168  }
3169  mag_type totalSum = STM::zero ();
3170  reduceAll<int, mag_type> (* (getComm ()), REDUCE_SUM,
3171  mySum, outArg (totalSum));
3172  frobNorm = STM::sqrt (totalSum);
3173  }
3174  if (isFillComplete ()) {
3175  // Only cache the result if the matrix is fill complete.
3176  // Otherwise, the values might still change. resumeFill clears
3177  // the cache.
3178  frobNorm_ = frobNorm;
3179  }
3180  return frobNorm;
3181  }
3182 
3183  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
3184  void
3186  replaceColMap (const Teuchos::RCP<const map_type>& newColMap)
3187  {
3188  const char tfecfFuncName[] = "replaceColMap: ";
3189  // FIXME (mfh 06 Aug 2014) What if the graph is locally indexed?
3190  // Then replacing the column Map might mean that we need to
3191  // reindex the column indices.
3192  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3193  myGraph_.is_null (), std::runtime_error,
3194  "This method does not work if the matrix has a const graph. The whole "
3195  "idea of a const graph is that you are not allowed to change it, but "
3196  "this method necessarily must modify the graph, since the graph owns "
3197  "the matrix's column Map.");
3198  myGraph_->replaceColMap (newColMap);
3199  }
3200 
3201  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
3202  void
3205  const Teuchos::RCP<const map_type>& newColMap,
3206  const Teuchos::RCP<const import_type>& newImport,
3207  const bool sortEachRow)
3208  {
3209  const char tfecfFuncName[] = "reindexColumns: ";
3210  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3211  graph == NULL && myGraph_.is_null (), std::invalid_argument,
3212  "The input graph is NULL, but the matrix does not own its graph.");
3213 
3214  crs_graph_type& theGraph = (graph == NULL) ? *myGraph_ : *graph;
3215  const bool sortGraph = false; // we'll sort graph & matrix together below
3216  theGraph.reindexColumns (newColMap, newImport, sortGraph);
3217  if (sortEachRow && theGraph.isLocallyIndexed () && ! theGraph.isSorted ()) {
3218  // We can't just call sortEntries() here, because that fails if
3219  // the matrix has a const graph. We want to use the given graph
3220  // in that case.
3221  const size_t lclNumRows = theGraph.getNodeNumRows ();
3222  for (size_t row = 0; row < lclNumRows; ++row) {
3223  RowInfo rowInfo = theGraph.getRowInfo (row);
3224  Teuchos::ArrayView<impl_scalar_type> rv = this->getViewNonConst (rowInfo);
3225  theGraph.template sortRowIndicesAndValues<impl_scalar_type> (rowInfo, rv);
3226  }
3227  theGraph.indicesAreSorted_ = true;
3228  }
3229  }
3230 
3231  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
3232  void
3234  replaceDomainMapAndImporter (const Teuchos::RCP<const map_type>& newDomainMap,
3235  Teuchos::RCP<const import_type>& newImporter)
3236  {
3237  const char tfecfFuncName[] = "replaceDomainMapAndImporter: ";
3238  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3239  myGraph_.is_null (), std::runtime_error,
3240  "This method does not work if the matrix has a const graph. The whole "
3241  "idea of a const graph is that you are not allowed to change it, but this"
3242  " method necessarily must modify the graph, since the graph owns the "
3243  "matrix's domain Map and Import objects.");
3244  myGraph_->replaceDomainMapAndImporter (newDomainMap, newImporter);
3245  }
3246 
3247  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
3248  void
3250  insertNonownedGlobalValues (const GlobalOrdinal globalRow,
3251  const Teuchos::ArrayView<const GlobalOrdinal>& indices,
3252  const Teuchos::ArrayView<const Scalar>& values)
3253  {
3254  using Teuchos::Array;
3255  typedef GlobalOrdinal GO;
3256  typedef typename Array<GO>::size_type size_type;
3257 
3258  const size_type numToInsert = indices.size ();
3259  // Add the new data to the list of nonlocals.
3260  // This creates the arrays if they don't exist yet.
3261  std::pair<Array<GO>, Array<Scalar> >& curRow = nonlocals_[globalRow];
3262  Array<GO>& curRowInds = curRow.first;
3263  Array<Scalar>& curRowVals = curRow.second;
3264  const size_type newCapacity = curRowInds.size () + numToInsert;
3265  curRowInds.reserve (newCapacity);
3266  curRowVals.reserve (newCapacity);
3267  for (size_type k = 0; k < numToInsert; ++k) {
3268  curRowInds.push_back (indices[k]);
3269  curRowVals.push_back (values[k]);
3270  }
3271  }
3272 
3273  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
3274  void
3277  {
3278  using Teuchos::arcp;
3279  using Teuchos::Array;
3280  using Teuchos::ArrayRCP;
3281  using Teuchos::ArrayView;
3282  using Teuchos::CommRequest;
3283  using Teuchos::gatherAll;
3284  using Teuchos::isend;
3285  using Teuchos::ireceive;
3286  using Teuchos::null;
3287  using Teuchos::outArg;
3288  using Teuchos::RCP;
3289  using Teuchos::rcpFromRef;
3290  using Teuchos::REDUCE_MAX;
3291  using Teuchos::reduceAll;
3292  using Teuchos::SerialDenseMatrix;
3293  using Teuchos::tuple;
3294  using Teuchos::waitAll;
3295  using std::make_pair;
3296  using std::pair;
3297  typedef GlobalOrdinal GO;
3298  typedef typename Array<GO>::size_type size_type;
3299  // nonlocals_ contains the entries stored by previous calls to
3300  // insertGlobalValues() for nonowned rows.
3301  typedef std::map<GO, pair<Array<GO>, Array<Scalar> > > nonlocals_map_type;
3302  typedef typename nonlocals_map_type::const_iterator nonlocals_iter_type;
3303 
3304  const char tfecfFuncName[] = "globalAssemble";
3305  const Teuchos::Comm<int>& comm = * (getComm ());
3306  const int numImages = comm.getSize ();
3307  const int myImageID = comm.getRank ();
3308 
3309  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3310  ! isFillActive (), std::runtime_error, ": requires that fill is active.");
3311 
3312  // Determine (via a global all-reduce) if any processes have
3313  // nonlocal entries to share. This is necessary even if the
3314  // matrix has a static graph, because insertGlobalValues allows
3315  // nonlocal entries in that case.
3316  size_t MyNonlocals = static_cast<size_t> (nonlocals_.size ());
3317  size_t MaxGlobalNonlocals = 0;
3318  reduceAll<int, size_t> (comm, REDUCE_MAX, MyNonlocals,
3319  outArg (MaxGlobalNonlocals));
3320  if (MaxGlobalNonlocals == 0) {
3321  return; // no entries to share
3322  }
3323 
3324  // FIXME (mfh 14 Dec 2012) The code below reimplements an Export
3325  // operation. It would be better just to use an Export. See
3326  // Comment #34 in discussion of Bug 5782.
3327  //
3328  // mfh 24 Feb 2014: On the other hand, this is not technically an
3329  // Export, since the row Map might not necessarily be one-to-one.
3330 
3331  // compute a list of NLRs from nonlocals_ and use it to compute:
3332  // IdsAndRows: a vector of (id,row) pairs
3333  // NLR2Id: a map from NLR to the Id that owns it
3334  // globalNeighbors: a global graph of connectivity between images:
3335  // globalNeighbors(i,j) indicates that j sends to i
3336  // sendIDs: a list of all images I send to
3337  // recvIDs: a list of all images I receive from (constructed later)
3338  Array<pair<int,GlobalOrdinal> > IdsAndRows;
3339  std::map<GlobalOrdinal,int> NLR2Id;
3340  SerialDenseMatrix<int,char> globalNeighbors;
3341  Array<int> sendIDs, recvIDs;
3342  {
3343  // Construct the set of all nonowned rows encountered by this
3344  // process in insertGlobalValues() or sumIntoGlobalValues().
3345  std::set<GlobalOrdinal> setOfRows;
3346  for (nonlocals_iter_type iter = nonlocals_.begin ();
3347  iter != nonlocals_.end (); ++iter) {
3348  setOfRows.insert (iter->first);
3349  }
3350  // Copy the resulting set of nonowned rows into an Array.
3351  Array<GlobalOrdinal> NLRs (setOfRows.size ());
3352  std::copy (setOfRows.begin (), setOfRows.end (), NLRs.begin ());
3353 
3354  // get a list of ImageIDs for the non-local rows (NLRs)
3355  Array<int> NLRIds (NLRs.size ());
3356  {
3357  const LookupStatus stat =
3358  getRowMap ()->getRemoteIndexList (NLRs (), NLRIds ());
3359  const int lclerr = (stat == IDNotPresent ? 1 : 0);
3360  int gblerr;
3361  reduceAll<int, int> (comm, REDUCE_MAX, lclerr, outArg (gblerr));
3362  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3363  gblerr, std::runtime_error, ": non-local entries correspond to "
3364  "invalid rows.");
3365  }
3366 
3367  // build up a list of neighbors, as well as a map between NLRs and Ids
3368  // localNeighbors[i] != 0 iff I have data to send to image i
3369  // put NLRs,Ids into an array of pairs
3370  IdsAndRows.reserve (NLRs.size ());
3371  Array<char> localNeighbors (numImages, 0);
3372  typename Array<GO>::const_iterator nlr;
3373  typename Array<int>::const_iterator id;
3374  for (nlr = NLRs.begin (), id = NLRIds.begin ();
3375  nlr != NLRs.end (); ++nlr, ++id) {
3376  NLR2Id[*nlr] = *id;
3377  localNeighbors[*id] = 1;
3378  IdsAndRows.push_back (make_pair (*id, *nlr));
3379  }
3380  for (int j = 0; j < numImages; ++j) {
3381  if (localNeighbors[j]) {
3382  sendIDs.push_back (j);
3383  }
3384  }
3385  // sort IdsAndRows, by Ids first, then rows
3386  std::sort (IdsAndRows.begin (), IdsAndRows.end ());
3387  // gather from other nodes to form the full graph
3388  //
3389  // FIXME (mfh 24 Feb 2014) Ugh, this is awful!!! It's making a
3390  // P x P matrix which is the full graph of process connectivity.
3391  // Neither Export nor Import does this! It would probably be
3392  // more efficient to do the following:
3393  //
3394  // 1. Form the one-to-one version of the row Map, tgtMap
3395  // 2. Form the (possibly overlapping) Map srcMap, with the
3396  // global row indices which are the keys of nonlocals_ on
3397  // each process
3398  // 3. Construct an Export from srcMap to tgtMap
3399  // 4. Execute the Export with Tpetra::ADD
3400  globalNeighbors.shapeUninitialized (numImages, numImages);
3401  gatherAll (comm, numImages, localNeighbors.getRawPtr (),
3402  numImages*numImages, globalNeighbors.values ());
3403  // globalNeighbors at this point contains (on all images) the
3404  // connectivity between the images.
3405  // globalNeighbors(i,j) != 0 means that j sends to i/that i receives from j
3406  }
3407 
3409  // FIGURE OUT WHO IS SENDING TO WHOM AND HOW MUCH
3410  // DO THIS IN THE PROCESS OF PACKING ALL OUTGOING DATA ACCORDING TO DESTINATION ID
3412 
3413  // loop over all columns to know from which images I can expect to receive something
3414  for (int j=0; j<numImages; ++j) {
3415  if (globalNeighbors (myImageID, j)) {
3416  recvIDs.push_back (j);
3417  }
3418  }
3419  const size_t numRecvs = recvIDs.size ();
3420 
3421  // we know how many we're sending to already
3422  // form a contiguous list of all data to be sent
3423  // track the number of entries for each ID
3424  Array<Details::CrsIJV<GlobalOrdinal, Scalar> > IJVSendBuffer;
3425  Array<size_t> sendSizes (sendIDs.size(), 0);
3426  size_t numSends = 0;
3427  for (typename Array<pair<int, GlobalOrdinal> >::const_iterator IdAndRow = IdsAndRows.begin();
3428  IdAndRow != IdsAndRows.end(); ++IdAndRow)
3429  {
3430  const int id = IdAndRow->first;
3431  const GO row = IdAndRow->second;
3432 
3433  // have we advanced to a new send?
3434  if (sendIDs[numSends] != id) {
3435  numSends++;
3436  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3437  sendIDs[numSends] != id, std::logic_error,
3438  ": internal logic error. Contact Tpetra team.");
3439  }
3440 
3441  // copy data for row into contiguous storage
3442  pair<Array<GO>, Array<Scalar> >& nonlocalsRow = nonlocals_[row];
3443  ArrayView<const GO> nonlocalsRow_colInds = nonlocalsRow.first ();
3444  ArrayView<const Scalar> nonlocalsRow_values = nonlocalsRow.second ();
3445  const size_type numNonlocalsRow = nonlocalsRow_colInds.size ();
3446 
3447  for (size_type k = 0; k < numNonlocalsRow; ++k) {
3448  const Scalar val = nonlocalsRow_values[k];
3449  const GO col = nonlocalsRow_colInds[k];
3450  IJVSendBuffer.push_back (Details::CrsIJV<GO, Scalar> (row, col, val));
3451  sendSizes[numSends]++;
3452  }
3453  }
3454  if (IdsAndRows.size () > 0) {
3455  numSends++; // one last increment, to make it a count instead of an index
3456  }
3457  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3458  static_cast<size_type> (numSends) != sendIDs.size(),
3459  std::logic_error, ": internal logic error. Contact Tpetra team.");
3460 
3461  // don't need this data anymore
3462  // clear it before we start allocating a bunch of new memory
3463  nonlocals_.clear ();
3464 
3466  // TRANSMIT SIZE INFO BETWEEN SENDERS AND RECEIVERS
3468  // perform non-blocking sends: send sizes to our recipients
3469  Array<RCP<CommRequest<int> > > sendRequests;
3470  for (size_t s = 0; s < numSends ; ++s) {
3471  // we'll fake the memory management, because all communication will be local to this method and the scope of our data
3472  sendRequests.push_back (isend<int, size_t> (comm, rcpFromRef (sendSizes[s]), sendIDs[s]));
3473  }
3474  // perform non-blocking receives: receive sizes from our senders
3475  Array<RCP<CommRequest<int> > > recvRequests;
3476  Array<size_t> recvSizes (numRecvs);
3477  for (size_t r = 0; r < numRecvs; ++r) {
3478  // we'll fake the memory management, because all communication
3479  // will be local to this method and the scope of our data
3480  recvRequests.push_back (ireceive<int, size_t> (comm, rcpFromRef (recvSizes[r]), recvIDs[r]));
3481  }
3482  // wait on all
3483  if (! sendRequests.empty ()) {
3484  waitAll (comm, sendRequests ());
3485  }
3486  if (! recvRequests.empty ()) {
3487  waitAll (comm, recvRequests ());
3488  }
3489  comm.barrier ();
3490  sendRequests.clear ();
3491  recvRequests.clear ();
3492 
3494  // NOW SEND/RECEIVE ALL ROW DATA
3496  // from the size info, build the ArrayViews into IJVSendBuffer
3497  Array<ArrayView<Details::CrsIJV<GO, Scalar> > > sendBuffers (numSends, null);
3498  {
3499  size_t cur = 0;
3500  for (size_t s=0; s<numSends; ++s) {
3501  sendBuffers[s] = IJVSendBuffer (cur, sendSizes[s]);
3502  cur += sendSizes[s];
3503  }
3504  }
3505  // perform non-blocking sends
3506  for (size_t s = 0; s < numSends; ++s) {
3507  // we'll fake the memory management, because all communication
3508  // will be local to this method and the scope of our data
3509  ArrayRCP<Details::CrsIJV<GO, Scalar> > tmparcp =
3510  arcp (sendBuffers[s].getRawPtr (), 0, sendBuffers[s].size (), false);
3511  sendRequests.push_back (isend<int, Details::CrsIJV<GlobalOrdinal,Scalar> > (comm, tmparcp, sendIDs[s]));
3512  }
3513  // calculate amount of storage needed for receives
3514  // setup pointers for the receives as well
3515  size_t totalRecvSize = std::accumulate (recvSizes.begin (), recvSizes.end (), 0);
3516  Array<Details::CrsIJV<GO, Scalar> > IJVRecvBuffer (totalRecvSize);
3517  // from the size info, build the ArrayViews into IJVRecvBuffer
3518  Array<ArrayView<Details::CrsIJV<GO, Scalar> > > recvBuffers (numRecvs, null);
3519  {
3520  size_t cur = 0;
3521  for (size_t r = 0; r < numRecvs; ++r) {
3522  recvBuffers[r] = IJVRecvBuffer (cur, recvSizes[r]);
3523  cur += recvSizes[r];
3524  }
3525  }
3526  // perform non-blocking recvs
3527  for (size_t r = 0; r < numRecvs ; ++r) {
3528  // we'll fake the memory management, because all communication
3529  // will be local to this method and the scope of our data
3530  ArrayRCP<Details::CrsIJV<GO, Scalar> > tmparcp =
3531  arcp (recvBuffers[r].getRawPtr (), 0, recvBuffers[r].size (), false);
3532  recvRequests.push_back (ireceive (comm, tmparcp, recvIDs[r]));
3533  }
3534  // perform waits
3535  if (! sendRequests.empty ()) {
3536  waitAll (comm, sendRequests ());
3537  }
3538  if (! recvRequests.empty ()) {
3539  waitAll (comm, recvRequests ());
3540  }
3541  comm.barrier ();
3542  sendRequests.clear ();
3543  recvRequests.clear ();
3544 
3546  // NOW PROCESS THE RECEIVED ROW DATA
3548  // TODO: instead of adding one entry at a time, add one row at a time.
3549  // this requires resorting; they arrived sorted by sending node, so that entries could be non-contiguous if we received
3550  // multiple entries for a particular row from different processors.
3551  // it also requires restoring the data, which may make it not worth the trouble.
3552 
3553  typedef typename Array<Details::CrsIJV<GO, Scalar> >::const_iterator ijv_iter_type;
3554  if (this->isStaticGraph ()) {
3555  for (ijv_iter_type ijv = IJVRecvBuffer.begin ();
3556  ijv != IJVRecvBuffer.end (); ++ijv) {
3557  sumIntoGlobalValues (ijv->i, tuple (ijv->j), tuple (ijv->v));
3558  }
3559  }
3560  else { // Dynamic graph; can use insertGlobalValues ()
3561  for (ijv_iter_type ijv = IJVRecvBuffer.begin ();
3562  ijv != IJVRecvBuffer.end (); ++ijv) {
3563  try {
3564  insertGlobalValues (ijv->i, tuple (ijv->j), tuple (ijv->v));
3565  }
3566  catch (std::runtime_error &e) {
3567  std::ostringstream outmsg;
3568  outmsg << e.what() << std::endl
3569  << "caught in globalAssemble() in " << __FILE__ << ":" << __LINE__
3570  << std::endl ;
3571  TEUCHOS_TEST_FOR_EXCEPTION(true, std::runtime_error, outmsg.str());
3572  }
3573  }
3574  }
3575 
3576  // WHEW! THAT WAS TIRING!
3577  }
3578 
3579  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
3580  void
3582  resumeFill (const Teuchos::RCP<Teuchos::ParameterList>& params)
3583  {
3584  if (! isStaticGraph ()) { // Don't resume fill of a nonowned graph.
3585  myGraph_->resumeFill (params);
3586  }
3588  fillComplete_ = false;
3589  }
3590 
3591  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
3592  void
3595  {
3596  // This method doesn't do anything. The analogous method in
3597  // CrsGraph does actually compute something.
3598  //
3599  // Oddly enough, clearGlobalConstants() clears frobNorm_ (by
3600  // setting it to -1), but computeGlobalConstants() does _not_
3601  // compute the Frobenius norm; this is done on demand in
3602  // getFrobeniusNorm(), and the result is cached there.
3603  }
3604 
3605  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
3606  void
3609  // We use -1 to indicate that the Frobenius norm needs to be
3610  // recomputed, since the values might change between now and the
3611  // next fillComplete call.
3612  //
3613  // Oddly enough, clearGlobalConstants() clears frobNorm_, but
3614  // computeGlobalConstants() does _not_ compute the Frobenius norm;
3615  // this is done on demand in getFrobeniusNorm(), and the result is
3616  // cached there.
3617  frobNorm_ = -STM::one ();
3618  }
3619 
3620  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
3621  void
3623  fillComplete (const RCP<ParameterList>& params)
3624  {
3625  TEUCHOS_TEST_FOR_EXCEPTION(
3626  getCrsGraph ().is_null (), std::logic_error, "Tpetra::CrsMatrix::"
3627  "fillComplete(params): getCrsGraph() returns null. "
3628  "This should not happen at this point. "
3629  "Please report this bug to the Tpetra developers.");
3630 
3631  if (isStaticGraph () && getCrsGraph ()->isFillComplete ()) {
3633  getCrsGraph ()->getRangeMap (), params);
3634  } else {
3635  fillComplete (getRowMap (), getRowMap (), params);
3636  }
3637  }
3638 
3639  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
3640  void
3642  fillComplete (const Teuchos::RCP<const map_type>& domainMap,
3643  const Teuchos::RCP<const map_type>& rangeMap,
3644  const Teuchos::RCP<Teuchos::ParameterList>& params)
3645  {
3646  using Teuchos::ArrayRCP;
3647  using Teuchos::RCP;
3648  using Teuchos::rcp;
3649  const char tfecfFuncName[] = "fillComplete";
3650 
3651  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3652  ! isFillActive () || isFillComplete (),
3653  std::runtime_error, ": Matrix fill state must be active (isFillActive() "
3654  "must be true) before you may call fillComplete().");
3655  const int numProcs = getComm ()->getSize ();
3656 
3657  //
3658  // Read parameters from the input ParameterList.
3659  //
3660 
3661  // If true, the caller promises that no process did nonlocal
3662  // changes since the last call to fillComplete.
3663  bool assertNoNonlocalInserts = false;
3664  // If true, makeColMap sorts remote GIDs (within each remote
3665  // process' group).
3666  bool sortGhosts = true;
3667 
3668  if (! params.is_null ()) {
3669  assertNoNonlocalInserts = params->get ("No Nonlocal Changes",
3670  assertNoNonlocalInserts);
3671  if (params->isParameter ("sort column map ghost gids")) {
3672  sortGhosts = params->get ("sort column map ghost gids", sortGhosts);
3673  }
3674  else if (params->isParameter ("Sort column Map ghost GIDs")) {
3675  sortGhosts = params->get ("Sort column Map ghost GIDs", sortGhosts);
3676  }
3677  }
3678  // We also don't need to do global assembly if there is only one
3679  // process in the communicator.
3680  const bool needGlobalAssemble = ! assertNoNonlocalInserts && numProcs > 1;
3681  // This parameter only matters if this matrix owns its graph.
3682  if (! myGraph_.is_null ()) {
3683  myGraph_->sortGhostsAssociatedWithEachProcessor_ = sortGhosts;
3684  }
3685 
3686  if (! getCrsGraph()->indicesAreAllocated()) {
3687  if (hasColMap ()) {
3688  // We have a column Map, so use local indices.
3689  allocateValues (LocalIndices, GraphNotYetAllocated);
3690  } else {
3691  // We don't have a column Map, so use global indices.
3692  allocateValues (GlobalIndices, GraphNotYetAllocated);
3693  }
3694  }
3695  // Global assemble, if we need to. This call only costs a single
3696  // all-reduce if we didn't need global assembly after all.
3697  if (needGlobalAssemble) {
3698  globalAssemble ();
3699  }
3700  else {
3701  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3702  numProcs == 1 && nonlocals_.size() > 0,
3703  std::runtime_error, ": cannot have nonlocal entries on a serial run. "
3704  "An invalid entry (i.e., with row index not in the row Map) must have "
3705  "been submitted to the CrsMatrix.");
3706  }
3707 
3708  if (isStaticGraph ()) {
3709  // FIXME (mfh 18 Jun 2014) This check for correctness of the
3710  // input Maps incurs a penalty of two all-reduces for the
3711  // otherwise optimal const graph case.
3712  //
3713  // We could turn these (max) 2 all-reduces into (max) 1, by
3714  // fusing them. We could do this by adding a "locallySameAs"
3715  // method to Map, which would return one of four states:
3716  //
3717  // a. Certainly globally the same
3718  // b. Certainly globally not the same
3719  // c. Locally the same
3720  // d. Locally not the same
3721  //
3722  // The first two states don't require further communication.
3723  // The latter two states require an all-reduce to communicate
3724  // globally, but we only need one all-reduce, since we only need
3725  // to check whether at least one of the Maps is wrong.
3726  const bool domainMapsMatch = staticGraph_->getDomainMap ()->isSameAs (*domainMap);
3727  const bool rangeMapsMatch = staticGraph_->getRangeMap ()->isSameAs (*rangeMap);
3728 
3729  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3730  ! domainMapsMatch, std::runtime_error,
3731  ": The CrsMatrix's domain Map does not match the graph's domain Map. "
3732  "The graph cannot be changed because it was given to the CrsMatrix "
3733  "constructor as const. You can fix this by passing in the graph's "
3734  "domain Map and range Map to the matrix's fillComplete call.");
3735 
3736  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3737  ! rangeMapsMatch, std::runtime_error,
3738  ": The CrsMatrix's range Map does not match the graph's range Map. "
3739  "The graph cannot be changed because it was given to the CrsMatrix "
3740  "constructor as const. You can fix this by passing in the graph's "
3741  "domain Map and range Map to the matrix's fillComplete call.");
3742  }
3743  else {
3744  // Set the graph's domain and range Maps. This will clear the
3745  // Import if the domain Map has changed (is a different
3746  // pointer), and the Export if the range Map has changed (is a
3747  // different pointer).
3748  myGraph_->setDomainRangeMaps (domainMap, rangeMap);
3749 
3750  // Make the graph's column Map, if necessary.
3751  if (! myGraph_->hasColMap ()) {
3752  myGraph_->makeColMap ();
3753  }
3754 
3755  // Make indices local, if necessary. The method won't do
3756  // anything if the graph is already locally indexed.
3757  myGraph_->makeIndicesLocal ();
3758 
3759  if (! myGraph_->isSorted ()) {
3760  sortEntries ();
3761  }
3762  if (! myGraph_->isMerged ()) {
3764  }
3765  // Make the Import and Export, if they haven't been made already.
3766  myGraph_->makeImportExport ();
3767  myGraph_->computeGlobalConstants ();
3768  myGraph_->fillComplete_ = true;
3769  myGraph_->checkInternalState ();
3770  }
3772  // fill local objects; will fill and finalize local graph if appropriate
3773  if (myGraph_.is_null ()) {
3774  // The matrix does _not_ own the graph, and the graph's
3775  // structure is already fixed, so just fill the local matrix.
3776  fillLocalMatrix (params);
3777  } else {
3778  // The matrix _does_ own the graph, so fill the local graph at
3779  // the same time as the local matrix.
3780  fillLocalGraphAndMatrix (params);
3781  }
3782 
3783  // Once we've initialized the sparse kernels, we're done with the
3784  // local objects. We may now release them and their memory, since
3785  // they will persist in the local sparse ops if necessary. We
3786  // keep the local graph if the parameters tell us to do so.
3787 
3788  // FIXME (mfh 28 Aug 2014) "Preserve Local Graph" bool parameter no longer used.
3789 
3790  fillComplete_ = true; // Now we're fill complete!
3791  checkInternalState ();
3792  }
3793 
3794  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
3795  void
3797  expertStaticFillComplete (const Teuchos::RCP<const map_type> & domainMap,
3798  const Teuchos::RCP<const map_type> & rangeMap,
3799  const Teuchos::RCP<const import_type>& importer,
3800  const Teuchos::RCP<const export_type>& exporter,
3801  const Teuchos::RCP<Teuchos::ParameterList> &params)
3802  {
3803 #ifdef HAVE_TPETRA_MMM_TIMINGS
3804  std::string label;
3805  if(!params.is_null())
3806  label = params->get("Timer Label",label);
3807  std::string prefix = std::string("Tpetra ")+ label + std::string(": ");
3808  using Teuchos::TimeMonitor;
3809  Teuchos::RCP<Teuchos::TimeMonitor> MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix + std::string("ESFC-M-Graph"))));
3810 #endif
3811 
3812  const char tfecfFuncName[] = "expertStaticFillComplete: ";
3813  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC( ! isFillActive() || isFillComplete(),
3814  std::runtime_error, "Matrix fill state must be active (isFillActive() "
3815  "must be true) before calling fillComplete().");
3816  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3817  myGraph_.is_null (), std::logic_error, "myGraph_ is null. This is not allowed.");
3818 
3819 
3820  // We will presume globalAssemble is not needed, so we do the ESFC on the graph
3821  myGraph_->expertStaticFillComplete (domainMap, rangeMap, importer, exporter,params);
3822 
3823 #ifdef HAVE_TPETRA_MMM_TIMINGS
3824  MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix + std::string("ESFC-M-cGC"))));
3825 #endif
3826 
3828 
3829 #ifdef HAVE_TPETRA_MMM_TIMINGS
3830  MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix + std::string("ESFC-M-fLGAM"))));
3831 #endif
3832 
3833  // Fill the local graph and matrix
3834  fillLocalGraphAndMatrix (params);
3835 
3836  // FIXME (mfh 28 Aug 2014) "Preserve Local Graph" bool parameter no longer used.
3837 
3838  // Now we're fill complete!
3839  fillComplete_ = true;
3840 
3841  // Sanity checks at the end.
3842 #ifdef HAVE_TPETRA_DEBUG
3843  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(isFillActive(), std::logic_error,
3844  ": We're at the end of fillComplete(), but isFillActive() is true. "
3845  "Please report this bug to the Tpetra developers.");
3846  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(! isFillComplete(), std::logic_error,
3847  ": We're at the end of fillComplete(), but isFillActive() is true. "
3848  "Please report this bug to the Tpetra developers.");
3849 #endif // HAVE_TPETRA_DEBUG
3850 
3851 #ifdef HAVE_TPETRA_MMM_TIMINGS
3852  MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix + std::string("ESFC-M-cIS"))));
3853 #endif
3854 
3856  }
3857 
3858  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
3859  void
3862  {
3863  TEUCHOS_TEST_FOR_EXCEPTION(
3864  isStaticGraph (), std::runtime_error, "Tpetra::CrsMatrix::sortEntries: "
3865  "Cannot sort with static graph.");
3866  if (! myGraph_->isSorted ()) {
3867  const size_t lclNumRows = this->getNodeNumRows ();
3868  for (size_t row = 0; row < lclNumRows; ++row) {
3869  RowInfo rowInfo = myGraph_->getRowInfo (row);
3870  Teuchos::ArrayView<impl_scalar_type> rv = this->getViewNonConst (rowInfo);
3871  myGraph_->template sortRowIndicesAndValues<impl_scalar_type> (rowInfo, rv);
3872  }
3873  // we just sorted every row
3874  myGraph_->indicesAreSorted_ = true;
3875  }
3876  }
3877 
3878  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
3879  void
3882  {
3883  TEUCHOS_TEST_FOR_EXCEPTION(
3884  isStaticGraph (), std::runtime_error, "Tpetra::CrsMatrix::"
3885  "mergeRedundantEntries: Cannot merge with static graph.");
3886  if (! myGraph_->isMerged ()) {
3887  const size_t lclNumRows = this->getNodeNumRows ();
3888  for (size_t row = 0; row < lclNumRows; ++row) {
3889  RowInfo rowInfo = myGraph_->getRowInfo (row);
3890  Teuchos::ArrayView<impl_scalar_type> rv = this->getViewNonConst (rowInfo);
3891  myGraph_->template mergeRowIndicesAndValues<impl_scalar_type> (rowInfo, rv);
3892  }
3893  myGraph_->noRedundancies_ = true; // we just merged every row
3894  }
3895  }
3896 
3897  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
3898  void
3902  Scalar alpha,
3903  Scalar beta) const
3904  {
3905  using Teuchos::null;
3906  using Teuchos::RCP;
3907  using Teuchos::rcp;
3908  using Teuchos::rcp_const_cast;
3909  using Teuchos::rcpFromRef;
3910  const Scalar ZERO = Teuchos::ScalarTraits<Scalar>::zero ();
3911  const Scalar ONE = Teuchos::ScalarTraits<Scalar>::one ();
3912 
3913  // mfh 05 Jun 2014: Special case for alpha == 0. I added this to
3914  // fix an Ifpack2 test (RILUKSingleProcessUnitTests), which was
3915  // failing only for the Kokkos refactor version of Tpetra. It's a
3916  // good idea regardless to have the bypass.
3917  if (alpha == ZERO) {
3918  if (beta == ZERO) {
3919  Y_in.putScalar (ZERO);
3920  } else if (beta != ONE) {
3921  Y_in.scale (beta);
3922  }
3923  return;
3924  }
3925 
3926  // It's possible that X is a view of Y or vice versa. We don't
3927  // allow this (apply() requires that X and Y not alias one
3928  // another), but it's helpful to detect and work around this case.
3929  // We don't try to to detect the more subtle cases (e.g., one is a
3930  // subview of the other, but their initial pointers differ). We
3931  // only need to do this if this matrix's Import is trivial;
3932  // otherwise, we don't actually apply the operator from X into Y.
3933 
3934  RCP<const import_type> importer = this->getGraph ()->getImporter ();
3935  RCP<const export_type> exporter = this->getGraph ()->getExporter ();
3936 
3937  // If beta == 0, then the output MV will be overwritten; none of
3938  // its entries should be read. (Sparse BLAS semantics say that we
3939  // must ignore any Inf or NaN entries in Y_in, if beta is zero.)
3940  // This matters if we need to do an Export operation; see below.
3941  const bool Y_is_overwritten = (beta == ZERO);
3942 
3943  // We treat the case of a replicated MV output specially.
3944  const bool Y_is_replicated = ! Y_in.isDistributed ();
3945 
3946  // This is part of the special case for replicated MV output.
3947  // We'll let each process do its thing, but do an all-reduce at
3948  // the end to sum up the results. Setting beta=0 on all processes
3949  // but Proc 0 makes the math work out for the all-reduce. (This
3950  // assumes that the replicated data is correctly replicated, so
3951  // that the data are the same on all processes.)
3952  if (Y_is_replicated && this->getComm ()->getRank () > 0) {
3953  beta = ZERO;
3954  }
3955 
3956  // Temporary MV for Import operation. After the block of code
3957  // below, this will be an (Imported if necessary) column Map MV
3958  // ready to give to localMultiply().
3959  RCP<const MV> X_colMap;
3960  if (importer.is_null ()) {
3961  if (! X_in.isConstantStride ()) {
3962  // Not all sparse mat-vec kernels can handle an input MV with
3963  // nonconstant stride correctly, so we have to copy it in that
3964  // case into a constant stride MV. To make a constant stride
3965  // copy of X_in, we force creation of the column (== domain)
3966  // Map MV (if it hasn't already been created, else fetch the
3967  // cached copy). This avoids creating a new MV each time.
3968  RCP<MV> X_colMapNonConst = getColumnMapMultiVector (X_in, true);
3969  Tpetra::deep_copy (*X_colMapNonConst, X_in);
3970  X_colMap = rcp_const_cast<const MV> (X_colMapNonConst);
3971  }
3972  else {
3973  // The domain and column Maps are the same, so do the local
3974  // multiply using the domain Map input MV X_in.
3975  X_colMap = rcpFromRef (X_in);
3976  }
3977  }
3978  else {
3979  // We're doing an Import anyway, which will copy the relevant
3980  // elements of the domain Map MV X_in into a separate column Map
3981  // MV. Thus, we don't have to worry whether X_in is constant
3982  // stride.
3983  RCP<MV> X_colMapNonConst = getColumnMapMultiVector (X_in);
3984 
3985  // Import from the domain Map MV to the column Map MV.
3986  X_colMapNonConst->doImport (X_in, *importer, INSERT);
3987  X_colMap = rcp_const_cast<const MV> (X_colMapNonConst);
3988  }
3989 
3990  // Temporary MV for Export operation, or for copying a nonconstant
3991  // stride output MV into a constant stride MV.
3992  RCP<MV> Y_rowMap = getRowMapMultiVector (Y_in);
3993 
3994  // If we have a nontrivial Export object, we must perform an
3995  // Export. In that case, the local multiply result will go into
3996  // the row Map multivector. We don't have to make a
3997  // constant-stride version of Y_in in this case, because we had to
3998  // make a constant stride Y_rowMap MV and do an Export anyway.
3999  if (! exporter.is_null ()) {
4000  this->template localMultiply<Scalar, Scalar> (*X_colMap, *Y_rowMap,
4001  Teuchos::NO_TRANS,
4002  alpha, ZERO);
4003  // If we're overwriting the output MV Y_in completely (beta ==
4004  // 0), then make sure that it is filled with zeros before we do
4005  // the Export. Otherwise, the ADD combine mode will use data in
4006  // Y_in, which is supposed to be zero.
4007  if (Y_is_overwritten) {
4008  Y_in.putScalar (ZERO);
4009  }
4010  else {
4011  // Scale the output MV by beta, so that the Export sums in the
4012  // mat-vec contribution: Y_in = beta*Y_in + alpha*A*X_in.
4013  Y_in.scale (beta);
4014  }
4015  // Do the Export operation.
4016  Y_in.doExport (*Y_rowMap, *exporter, ADD);
4017  }
4018  else { // Don't do an Export: row Map and range Map are the same.
4019  //
4020  // If Y_in does not have constant stride, or if the column Map
4021  // MV aliases Y_in, then we can't let the kernel write directly
4022  // to Y_in. Instead, we have to use the cached row (== range)
4023  // Map MV as temporary storage.
4024  //
4025  // FIXME (mfh 05 Jun 2014) This test for aliasing only tests if
4026  // the user passed in the same MultiVector for both X and Y. It
4027  // won't detect whether one MultiVector views the other. We
4028  // should also check the MultiVectors' raw data pointers.
4029  if (! Y_in.isConstantStride () || X_colMap.getRawPtr () == &Y_in) {
4030  // Force creating the MV if it hasn't been created already.
4031  // This will reuse a previously created cached MV.
4032  Y_rowMap = getRowMapMultiVector (Y_in, true);
4033 
4034  // If beta == 0, we don't need to copy Y_in into Y_rowMap,
4035  // since we're overwriting it anyway.
4036  if (beta != ZERO) {
4037  Tpetra::deep_copy (*Y_rowMap, Y_in);
4038  }
4039  this->template localMultiply<Scalar, Scalar> (*X_colMap,
4040  *Y_rowMap,
4041  Teuchos::NO_TRANS,
4042  alpha, beta);
4043  Tpetra::deep_copy (Y_in, *Y_rowMap);
4044  }
4045  else {
4046  this->template localMultiply<Scalar, Scalar> (*X_colMap, Y_in,
4047  Teuchos::NO_TRANS,
4048  alpha, beta);
4049  }
4050  }
4051 
4052  // If the range Map is a locally replicated Map, sum up
4053  // contributions from each process. We set beta = 0 on all
4054  // processes but Proc 0 initially, so this will handle the scaling
4055  // factor beta correctly.
4056  if (Y_is_replicated) {
4057  Y_in.reduce ();
4058  }
4059  }
4060 
4061  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
4062  void
4066  const Teuchos::ETransp mode,
4067  Scalar alpha,
4068  Scalar beta) const
4069  {
4070  using Teuchos::null;
4071  using Teuchos::RCP;
4072  using Teuchos::rcp;
4073  using Teuchos::rcp_const_cast;
4074  using Teuchos::rcpFromRef;
4075  const Scalar ZERO = Teuchos::ScalarTraits<Scalar>::zero ();
4076 
4077  // Take shortcuts for alpha == 0.
4078  if (alpha == ZERO) {
4079  // Follow the Sparse BLAS convention by ignoring both the matrix
4080  // and X_in, in this case.
4081  if (beta == ZERO) {
4082  // Follow the Sparse BLAS convention by overwriting any Inf or
4083  // NaN values in Y_in, in this case.
4084  Y_in.putScalar (ZERO);
4085  }
4086  else {
4087  Y_in.scale (beta);
4088  }
4089  return;
4090  }
4091 
4092  const size_t numVectors = X_in.getNumVectors ();
4093 
4094  // We don't allow X_in and Y_in to alias one another. It's hard
4095  // to check this, because advanced users could create views from
4096  // raw pointers. However, if X_in and Y_in reference the same
4097  // object, we will do the user a favor by copying X into new
4098  // storage (with a warning). We only need to do this if we have
4099  // trivial importers; otherwise, we don't actually apply the
4100  // operator from X into Y.
4101  RCP<const import_type> importer = this->getGraph ()->getImporter ();
4102  RCP<const export_type> exporter = this->getGraph ()->getExporter ();
4103  // access X indirectly, in case we need to create temporary storage
4104  RCP<const MV> X;
4105 
4106  // some parameters for below
4107  const bool Y_is_replicated = ! Y_in.isDistributed ();
4108  const bool Y_is_overwritten = (beta == ZERO);
4109  if (Y_is_replicated && this->getComm ()->getRank () > 0) {
4110  beta = ZERO;
4111  }
4112 
4113  // The kernels do not allow input or output with nonconstant stride.
4114  if (! X_in.isConstantStride () && importer.is_null ()) {
4115  X = rcp (new MV (X_in, Teuchos::Copy)); // Constant-stride copy of X_in
4116  } else {
4117  X = rcpFromRef (X_in); // Reference to X_in
4118  }
4119 
4120  // Set up temporary multivectors for Import and/or Export.
4121  if (importer != null) {
4122  if (importMV_ != null && importMV_->getNumVectors() != numVectors) {
4123  importMV_ = null;
4124  }
4125  if (importMV_ == null) {
4126  importMV_ = rcp (new MV (this->getColMap (), numVectors));
4127  }
4128  }
4129  if (exporter != null) {
4130  if (exportMV_ != null && exportMV_->getNumVectors() != numVectors) {
4131  exportMV_ = null;
4132  }
4133  if (exportMV_ == null) {
4134  exportMV_ = rcp (new MV (this->getRowMap (), numVectors));
4135  }
4136  }
4137 
4138  // If we have a non-trivial exporter, we must import elements that
4139  // are permuted or are on other processors.
4140  if (! exporter.is_null ()) {
4141  exportMV_->doImport (X_in, *exporter, INSERT);
4142  X = exportMV_; // multiply out of exportMV_
4143  }
4144 
4145  // If we have a non-trivial importer, we must export elements that
4146  // are permuted or belong to other processors. We will compute
4147  // solution into the to-be-exported MV; get a view.
4148  if (importer != null) {
4149  // FIXME (mfh 18 Apr 2015) Temporary fix suggested by Clark
4150  // Dohrmann on Fri 17 Apr 2015. At some point, we need to go
4151  // back and figure out why this helps. importMV_ SHOULD be
4152  // completely overwritten in the localMultiply() call below,
4153  // because beta == ZERO there.
4154  importMV_->putScalar (ZERO);
4155  // Do the local computation.
4156  this->template localMultiply<Scalar, Scalar> (*X, *importMV_, mode,
4157  alpha, ZERO);
4158  if (Y_is_overwritten) {
4159  Y_in.putScalar (ZERO);
4160  } else {
4161  Y_in.scale (beta);
4162  }
4163  Y_in.doExport (*importMV_, *importer, ADD);
4164  }
4165  // otherwise, multiply into Y
4166  else {
4167  // can't multiply in-situ; can't multiply into non-strided multivector
4168  //
4169  // FIXME (mfh 05 Jun 2014) This test for aliasing only tests if
4170  // the user passed in the same MultiVector for both X and Y. It
4171  // won't detect whether one MultiVector views the other. We
4172  // should also check the MultiVectors' raw data pointers.
4173  if (! Y_in.isConstantStride () || X.getRawPtr () == &Y_in) {
4174  // Make a deep copy of Y_in, into which to write the multiply result.
4175  MV Y (Y_in, Teuchos::Copy);
4176  this->template localMultiply<Scalar, Scalar> (*X, Y, mode, alpha, beta);
4177  Tpetra::deep_copy (Y_in, Y);
4178  } else {
4179  this->template localMultiply<Scalar, Scalar> (*X, Y_in, mode, alpha, beta);
4180  }
4181  }
4182 
4183  // If the range Map is a locally replicated map, sum the
4184  // contributions from each process. (That's why we set beta=0
4185  // above for all processes but Proc 0.)
4186  if (Y_is_replicated) {
4187  Y_in.reduce ();
4188  }
4189  }
4190 
4191  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
4192  void
4196  Teuchos::ETransp mode,
4197  Scalar alpha,
4198  Scalar beta) const
4199  {
4200  TEUCHOS_TEST_FOR_EXCEPTION(
4201  ! isFillComplete (), std::runtime_error,
4202  "Tpetra::CrsMatrix::apply(): Cannot call apply() until fillComplete() "
4203  "has been called.");
4204 
4205  if (mode == Teuchos::NO_TRANS) {
4206  applyNonTranspose (X, Y, alpha, beta);
4207  } else {
4208  //Thyra was implicitly assuming that Y gets set to zero / or is overwritten
4209  //when bets==0. This was not the case with transpose in a multithreaded
4210  //environment where a multiplication with subsequent atomic_adds is used
4211  //since 0 is effectively not special cased. Doing the explicit set to zero here
4212  //This catches cases where Y is nan or inf.
4213  const Scalar ZERO = Teuchos::ScalarTraits<Scalar>::zero ();
4214  if(beta == ZERO)
4215  Y.putScalar (ZERO);
4216  applyTranspose (X, Y, mode, alpha, beta);
4217  }
4218  }
4219 
4220  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
4221  void
4226  const Scalar& dampingFactor,
4227  const ESweepDirection direction,
4228  const int numSweeps) const
4229  {
4230  reorderedGaussSeidel (B, X, D, Teuchos::null, dampingFactor, direction, numSweeps);
4231  }
4232 
4233  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
4234  void
4239  const Teuchos::ArrayView<LocalOrdinal>& rowIndices,
4240  const Scalar& dampingFactor,
4241  const ESweepDirection direction,
4242  const int numSweeps) const
4243  {
4244  using Teuchos::null;
4245  using Teuchos::RCP;
4246  using Teuchos::rcp;
4247  using Teuchos::rcp_const_cast;
4248  using Teuchos::rcpFromRef;
4249  typedef Scalar ST;
4250 
4251  TEUCHOS_TEST_FOR_EXCEPTION(
4252  isFillComplete() == false, std::runtime_error,
4253  "Tpetra::CrsMatrix::gaussSeidel: cannot call this method until "
4254  "fillComplete() has been called.");
4255  TEUCHOS_TEST_FOR_EXCEPTION(
4256  numSweeps < 0,
4257  std::invalid_argument,
4258  "Tpetra::CrsMatrix::gaussSeidel: The number of sweeps must be , "
4259  "nonnegative but you provided numSweeps = " << numSweeps << " < 0.");
4260 
4261  // Translate from global to local sweep direction.
4262  // While doing this, validate the input.
4263  KokkosClassic::ESweepDirection localDirection;
4264  if (direction == Forward) {
4265  localDirection = KokkosClassic::Forward;
4266  }
4267  else if (direction == Backward) {
4268  localDirection = KokkosClassic::Backward;
4269  }
4270  else if (direction == Symmetric) {
4271  // We'll control local sweep direction manually.
4272  localDirection = KokkosClassic::Forward;
4273  }
4274  else {
4275  TEUCHOS_TEST_FOR_EXCEPTION(true, std::invalid_argument,
4276  "Tpetra::CrsMatrix::gaussSeidel: The 'direction' enum does not have "
4277  "any of its valid values: Forward, Backward, or Symmetric.");
4278  }
4279 
4280  if (numSweeps == 0) {
4281  return; // Nothing to do.
4282  }
4283 
4284  // We don't need the Export object because this method assumes
4285  // that the row, domain, and range Maps are the same. We do need
4286  // the Import object, if there is one, though.
4287  RCP<const import_type> importer = this->getGraph()->getImporter();
4288  RCP<const export_type> exporter = this->getGraph()->getExporter();
4289  TEUCHOS_TEST_FOR_EXCEPTION(
4290  ! exporter.is_null (), std::runtime_error,
4291  "Tpetra's gaussSeidel implementation requires that the row, domain, "
4292  "and range Maps be the same. This cannot be the case, because the "
4293  "matrix has a nontrivial Export object.");
4294 
4295  RCP<const map_type> domainMap = this->getDomainMap ();
4296  RCP<const map_type> rangeMap = this->getRangeMap ();
4297  RCP<const map_type> rowMap = this->getGraph ()->getRowMap ();
4298  RCP<const map_type> colMap = this->getGraph ()->getColMap ();
4299 
4300 #ifdef HAVE_TEUCHOS_DEBUG
4301  {
4302  // The relation 'isSameAs' is transitive. It's also a
4303  // collective, so we don't have to do a "shared" test for
4304  // exception (i.e., a global reduction on the test value).
4305  TEUCHOS_TEST_FOR_EXCEPTION(
4306  ! X.getMap ()->isSameAs (*domainMap),
4307  std::runtime_error,
4308  "Tpetra::CrsMatrix::gaussSeidel requires that the input "
4309  "multivector X be in the domain Map of the matrix.");
4310  TEUCHOS_TEST_FOR_EXCEPTION(
4311  ! B.getMap ()->isSameAs (*rangeMap),
4312  std::runtime_error,
4313  "Tpetra::CrsMatrix::gaussSeidel requires that the input "
4314  "B be in the range Map of the matrix.");
4315  TEUCHOS_TEST_FOR_EXCEPTION(
4316  ! D.getMap ()->isSameAs (*rowMap),
4317  std::runtime_error,
4318  "Tpetra::CrsMatrix::gaussSeidel requires that the input "
4319  "D be in the row Map of the matrix.");
4320  TEUCHOS_TEST_FOR_EXCEPTION(
4321  ! rowMap->isSameAs (*rangeMap),
4322  std::runtime_error,
4323  "Tpetra::CrsMatrix::gaussSeidel requires that the row Map and the "
4324  "range Map be the same (in the sense of Tpetra::Map::isSameAs).");
4325  TEUCHOS_TEST_FOR_EXCEPTION(
4326  ! domainMap->isSameAs (*rangeMap),
4327  std::runtime_error,
4328  "Tpetra::CrsMatrix::gaussSeidel requires that the domain Map and "
4329  "the range Map of the matrix be the same.");
4330  }
4331 #else
4332  // Forestall any compiler warnings for unused variables.
4333  (void) rangeMap;
4334  (void) rowMap;
4335 #endif // HAVE_TEUCHOS_DEBUG
4336 
4337  // If B is not constant stride, copy it into a constant stride
4338  // multivector. We'l handle the right-hand side B first and deal
4339  // with X right before the sweeps, to improve locality of the
4340  // first sweep. (If the problem is small enough, then that will
4341  // hopefully keep more of the entries of X in cache. This
4342  // optimizes for the typical case of a small number of sweeps.)
4343  RCP<const MV> B_in;
4344  if (B.isConstantStride()) {
4345  B_in = rcpFromRef (B);
4346  }
4347  else {
4348  // The range Map and row Map are the same in this case, so we
4349  // can use the (possibly cached) row Map multivector to store a
4350  // constant stride copy of B. We don't have to copy back, since
4351  // Gauss-Seidel won't modify B.
4352  RCP<MV> B_in_nonconst = getRowMapMultiVector (B, true);
4353  deep_copy (*B_in_nonconst, B); // Copy from B into B_in(_nonconst).
4354  B_in = rcp_const_cast<const MV> (B_in_nonconst);
4355 
4357  ! B.isConstantStride (),
4358  std::runtime_error,
4359  "gaussSeidel: The current implementation of the Gauss-Seidel kernel "
4360  "requires that X and B both have constant stride. Since B does not "
4361  "have constant stride, we had to make a copy. This is a limitation of "
4362  "the current implementation and not your fault, but we still report it "
4363  "as an efficiency warning for your information.");
4364  }
4365 
4366  // If X is not constant stride, copy it into a constant stride
4367  // multivector. Also, make the column Map multivector X_colMap,
4368  // and its domain Map view X_domainMap. (X actually must be a
4369  // domain Map view of a column Map multivector; exploit this, if X
4370  // has constant stride.)
4371 
4372  RCP<MV> X_domainMap;
4373  RCP<MV> X_colMap;
4374  bool copiedInput = false;
4375 
4376  if (importer.is_null ()) { // Domain and column Maps are the same.
4377  if (X.isConstantStride ()) {
4378  X_domainMap = rcpFromRef (X);
4379  X_colMap = X_domainMap;
4380  copiedInput = false;
4381  }
4382  else {
4383  // Get a temporary column Map multivector, make a domain Map
4384  // view of it, and copy X into the domain Map view. We have
4385  // to copy here because we won't be doing Import operations.
4386  X_colMap = getColumnMapMultiVector (X, true);
4387  X_domainMap = X_colMap; // Domain and column Maps are the same.
4388  deep_copy (*X_domainMap, X); // Copy X into the domain Map view.
4389  copiedInput = true;
4391  ! X.isConstantStride (), std::runtime_error,
4392  "Tpetra::CrsMatrix::gaussSeidel: The current implementation of the "
4393  "Gauss-Seidel kernel requires that X and B both have constant "
4394  "stride. Since X does not have constant stride, we had to make a "
4395  "copy. This is a limitation of the current implementation and not "
4396  "your fault, but we still report it as an efficiency warning for "
4397  "your information.");
4398  }
4399  }
4400  else { // We will be doing Import operations in the sweeps.
4401  if (X.isConstantStride ()) {
4402  X_domainMap = rcpFromRef (X);
4403  // This kernel assumes that X is a domain Map view of a column
4404  // Map multivector. We will only check if this is valid if
4405  // the CMake configure Teuchos_ENABLE_DEBUG is ON.
4406  X_colMap = X_domainMap->offsetViewNonConst (colMap, 0);
4407 
4408  // FIXME (mfh 19 Mar 2013) Do we need to fill the remote
4409  // entries of X_colMap with zeros? Do we need to fill all of
4410  // X_domainMap initially with zeros? Ifpack
4411  // (Ifpack_PointRelaxation.cpp, line 906) creates an entirely
4412  // new MultiVector each time.
4413 
4414  // Do the first Import for the first sweep. This simplifies
4415  // the logic in the sweeps.
4416  X_colMap->doImport (X, *importer, INSERT);
4417  copiedInput = false;
4418  }
4419  else {
4420  // Get a temporary column Map multivector X_colMap, and make a
4421  // domain Map view X_domainMap of it. Instead of copying, we
4422  // do an Import from X into X_domainMap. This saves us a
4423  // copy, since the Import has to copy the data anyway.
4424  X_colMap = getColumnMapMultiVector (X, true);
4425  X_domainMap = X_colMap->offsetViewNonConst (domainMap, 0);
4426  X_colMap->doImport (X, *importer, INSERT);
4427  copiedInput = true;
4429  ! X.isConstantStride (), std::runtime_error,
4430  "Tpetra::CrsMatrix::gaussSeidel: The current implementation of the "
4431  "Gauss-Seidel kernel requires that X and B both have constant stride. "
4432  "Since X does not have constant stride, we had to make a copy. "
4433  "This is a limitation of the current implementation and not your fault, "
4434  "but we still report it as an efficiency warning for your information.");
4435  }
4436  }
4437 
4438  for (int sweep = 0; sweep < numSweeps; ++sweep) {
4439  if (! importer.is_null () && sweep > 0) {
4440  // We already did the first Import for the zeroth sweep.
4441  X_colMap->doImport (*X_domainMap, *importer, INSERT);
4442  }
4443 
4444  // Do local Gauss-Seidel.
4445  if (direction != Symmetric) {
4446  if (rowIndices.is_null ()) {
4447  this->template localGaussSeidel<ST, ST> (*B_in, *X_colMap, D,
4448  dampingFactor,
4449  localDirection);
4450  }
4451  else {
4452  this->template reorderedLocalGaussSeidel<ST, ST> (*B_in, *X_colMap,
4453  D, rowIndices,
4454  dampingFactor,
4455  localDirection);
4456  }
4457  }
4458  else { // direction == Symmetric
4459  const bool doImportBetweenDirections = false;
4460  if (rowIndices.is_null ()) {
4461  this->template localGaussSeidel<ST, ST> (*B_in, *X_colMap, D,
4462  dampingFactor,
4463  KokkosClassic::Forward);
4464  // mfh 18 Mar 2013: Aztec's implementation of "symmetric
4465  // Gauss-Seidel" does _not_ do an Import between the forward
4466  // and backward sweeps. This makes sense, because Aztec
4467  // considers "symmetric Gauss-Seidel" a subdomain solver.
4468  if (doImportBetweenDirections) {
4469  // Communicate again before the Backward sweep.
4470  if (! importer.is_null ()) {
4471  X_colMap->doImport (*X_domainMap, *importer, INSERT);
4472  }
4473  }
4474  this->template localGaussSeidel<ST, ST> (*B_in, *X_colMap, D,
4475  dampingFactor,
4476  KokkosClassic::Backward);
4477  }
4478  else {
4479  this->template reorderedLocalGaussSeidel<ST, ST> (*B_in, *X_colMap,
4480  D, rowIndices,
4481  dampingFactor,
4482  KokkosClassic::Forward);
4483  if (doImportBetweenDirections) {
4484  // Communicate again before the Backward sweep.
4485  if (! importer.is_null ()) {
4486  X_colMap->doImport (*X_domainMap, *importer, INSERT);
4487  }
4488  }
4489  this->template reorderedLocalGaussSeidel<ST, ST> (*B_in, *X_colMap,
4490  D, rowIndices,
4491  dampingFactor,
4492  KokkosClassic::Backward);
4493  }
4494  }
4495  }
4496 
4497  if (copiedInput) {
4498  deep_copy (X, *X_domainMap); // Copy back from X_domainMap to X.
4499  }
4500  }
4501 
4502  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
4503  void
4508  const Scalar& dampingFactor,
4509  const ESweepDirection direction,
4510  const int numSweeps,
4511  const bool zeroInitialGuess) const
4512  {
4513  reorderedGaussSeidelCopy (X, B, D, Teuchos::null, dampingFactor, direction,
4514  numSweeps, zeroInitialGuess);
4515  }
4516 
4517  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
4518  void
4523  const Teuchos::ArrayView<LocalOrdinal>& rowIndices,
4524  const Scalar& dampingFactor,
4525  const ESweepDirection direction,
4526  const int numSweeps,
4527  const bool zeroInitialGuess) const
4528  {
4529  using Teuchos::null;
4530  using Teuchos::RCP;
4531  using Teuchos::rcp;
4532  using Teuchos::rcpFromRef;
4533  using Teuchos::rcp_const_cast;
4534  typedef Scalar ST;
4535  const char prefix[] = "Tpetra::CrsMatrix::(reordered)gaussSeidelCopy: ";
4536  const Scalar ZERO = Teuchos::ScalarTraits<Scalar>::zero ();
4537 
4538  TEUCHOS_TEST_FOR_EXCEPTION(
4539  ! isFillComplete (), std::runtime_error,
4540  prefix << "The matrix is not fill complete.");
4541  TEUCHOS_TEST_FOR_EXCEPTION(
4542  numSweeps < 0, std::invalid_argument,
4543  prefix << "The number of sweeps must be nonnegative, "
4544  "but you provided numSweeps = " << numSweeps << " < 0.");
4545 
4546  // Translate from global to local sweep direction.
4547  // While doing this, validate the input.
4548  KokkosClassic::ESweepDirection localDirection;
4549  if (direction == Forward) {
4550  localDirection = KokkosClassic::Forward;
4551  }
4552  else if (direction == Backward) {
4553  localDirection = KokkosClassic::Backward;
4554  }
4555  else if (direction == Symmetric) {
4556  // We'll control local sweep direction manually.
4557  localDirection = KokkosClassic::Forward;
4558  }
4559  else {
4560  TEUCHOS_TEST_FOR_EXCEPTION(
4561  true, std::invalid_argument,
4562  prefix << "The 'direction' enum does not have any of its valid "
4563  "values: Forward, Backward, or Symmetric.");
4564  }
4565 
4566  if (numSweeps == 0) {
4567  return;
4568  }
4569 
4570  RCP<const import_type> importer = this->getGraph ()->getImporter ();
4571  RCP<const export_type> exporter = this->getGraph ()->getExporter ();
4572  TEUCHOS_TEST_FOR_EXCEPTION(
4573  ! exporter.is_null (), std::runtime_error,
4574  "This method's implementation currently requires that the matrix's row, "
4575  "domain, and range Maps be the same. This cannot be the case, because "
4576  "the matrix has a nontrivial Export object.");
4577 
4578  RCP<const map_type> domainMap = this->getDomainMap ();
4579  RCP<const map_type> rangeMap = this->getRangeMap ();
4580  RCP<const map_type> rowMap = this->getGraph ()->getRowMap ();
4581  RCP<const map_type> colMap = this->getGraph ()->getColMap ();
4582 
4583 #ifdef HAVE_TEUCHOS_DEBUG
4584  {
4585  // The relation 'isSameAs' is transitive. It's also a
4586  // collective, so we don't have to do a "shared" test for
4587  // exception (i.e., a global reduction on the test value).
4588  TEUCHOS_TEST_FOR_EXCEPTION(
4589  ! X.getMap ()->isSameAs (*domainMap), std::runtime_error,
4590  "Tpetra::CrsMatrix::gaussSeidelCopy requires that the input "
4591  "multivector X be in the domain Map of the matrix.");
4592  TEUCHOS_TEST_FOR_EXCEPTION(
4593  ! B.getMap ()->isSameAs (*rangeMap), std::runtime_error,
4594  "Tpetra::CrsMatrix::gaussSeidelCopy requires that the input "
4595  "B be in the range Map of the matrix.");
4596  TEUCHOS_TEST_FOR_EXCEPTION(
4597  ! D.getMap ()->isSameAs (*rowMap), std::runtime_error,
4598  "Tpetra::CrsMatrix::gaussSeidelCopy requires that the input "
4599  "D be in the row Map of the matrix.");
4600  TEUCHOS_TEST_FOR_EXCEPTION(
4601  ! rowMap->isSameAs (*rangeMap), std::runtime_error,
4602  "Tpetra::CrsMatrix::gaussSeidelCopy requires that the row Map and the "
4603  "range Map be the same (in the sense of Tpetra::Map::isSameAs).");
4604  TEUCHOS_TEST_FOR_EXCEPTION(
4605  ! domainMap->isSameAs (*rangeMap), std::runtime_error,
4606  "Tpetra::CrsMatrix::gaussSeidelCopy requires that the domain Map and "
4607  "the range Map of the matrix be the same.");
4608  }
4609 #else
4610  // Forestall any compiler warnings for unused variables.
4611  (void) rangeMap;
4612  (void) rowMap;
4613 #endif // HAVE_TEUCHOS_DEBUG
4614 
4615  // Fetch a (possibly cached) temporary column Map multivector
4616  // X_colMap, and a domain Map view X_domainMap of it. Both have
4617  // constant stride by construction. We know that the domain Map
4618  // must include the column Map, because our Gauss-Seidel kernel
4619  // requires that the row Map, domain Map, and range Map are all
4620  // the same, and that each process owns all of its own diagonal
4621  // entries of the matrix.
4622 
4623  RCP<MV> X_colMap;
4624  RCP<MV> X_domainMap;
4625  bool copyBackOutput = false;
4626  if (importer.is_null ()) {
4627  if (X.isConstantStride ()) {
4628  X_colMap = rcpFromRef (X);
4629  X_domainMap = rcpFromRef (X);
4630  // Column Map and domain Map are the same, so there are no
4631  // remote entries. Thus, if we are not setting the initial
4632  // guess to zero, we don't have to worry about setting remote
4633  // entries to zero, even though we are not doing an Import in
4634  // this case.
4635  if (zeroInitialGuess) {
4636  X_colMap->putScalar (ZERO);
4637  }
4638  // No need to copy back to X at end.
4639  }
4640  else { // We must copy X into a constant stride multivector.
4641  // Just use the cached column Map multivector for that.
4642  // force=true means fill with zeros, so no need to fill
4643  // remote entries (not in domain Map) with zeros.
4644  X_colMap = getColumnMapMultiVector (X, true);
4645  // X_domainMap is always a domain Map view of the column Map
4646  // multivector. In this case, the domain and column Maps are
4647  // the same, so X_domainMap _is_ X_colMap.
4648  X_domainMap = X_colMap;
4649  if (! zeroInitialGuess) { // Don't copy if zero initial guess
4650  try {
4651  deep_copy (*X_domainMap , X); // Copy X into constant stride MV
4652  } catch (std::exception& e) {
4653  std::ostringstream os;
4654  os << "Tpetra::CrsMatrix::reorderedGaussSeidelCopy: "
4655  "deep_copy(*X_domainMap, X) threw an exception: "
4656  << e.what () << ".";
4657  TEUCHOS_TEST_FOR_EXCEPTION(true, std::runtime_error, e.what ());
4658  }
4659  }
4660  copyBackOutput = true; // Don't forget to copy back at end.
4662  ! X.isConstantStride (),
4663  std::runtime_error,
4664  "gaussSeidelCopy: The current implementation of the Gauss-Seidel "
4665  "kernel requires that X and B both have constant stride. Since X "
4666  "does not have constant stride, we had to make a copy. This is a "
4667  "limitation of the current implementation and not your fault, but we "
4668  "still report it as an efficiency warning for your information.");
4669  }
4670  }
4671  else { // Column Map and domain Map are _not_ the same.
4672  X_colMap = getColumnMapMultiVector (X);
4673  X_domainMap = X_colMap->offsetViewNonConst (domainMap, 0);
4674 
4675 #ifdef HAVE_TPETRA_DEBUG
4676  typename MV::dual_view_type X_colMap_view = X_colMap->getDualView ();
4677  typename MV::dual_view_type X_domainMap_view = X_domainMap->getDualView ();
4678 
4679  if (X_colMap->getLocalLength () != 0 && X_domainMap->getLocalLength ()) {
4680  TEUCHOS_TEST_FOR_EXCEPTION(
4681  X_colMap_view.h_view.ptr_on_device () != X_domainMap_view.h_view.ptr_on_device (),
4682  std::logic_error, "Tpetra::CrsMatrix::gaussSeidelCopy: "
4683  "Pointer to start of column Map view of X is not equal to pointer to "
4684  "start of (domain Map view of) X. This may mean that "
4685  "Tpetra::MultiVector::offsetViewNonConst is broken. "
4686  "Please report this bug to the Tpetra developers.");
4687  }
4688 
4689  TEUCHOS_TEST_FOR_EXCEPTION(
4690  X_colMap_view.dimension_0 () < X_domainMap_view.dimension_0 () ||
4691  X_colMap->getLocalLength () < X_domainMap->getLocalLength (),
4692  std::logic_error, "Tpetra::CrsMatrix::gaussSeidelCopy: "
4693  "X_colMap has fewer local rows than X_domainMap. "
4694  "X_colMap_view.dimension_0() = " << X_colMap_view.dimension_0 ()
4695  << ", X_domainMap_view.dimension_0() = "
4696  << X_domainMap_view.dimension_0 ()
4697  << ", X_colMap->getLocalLength() = " << X_colMap->getLocalLength ()
4698  << ", and X_domainMap->getLocalLength() = "
4699  << X_domainMap->getLocalLength ()
4700  << ". This means that Tpetra::MultiVector::offsetViewNonConst "
4701  "is broken. Please report this bug to the Tpetra developers.");
4702 
4703  TEUCHOS_TEST_FOR_EXCEPTION(
4704  X_colMap->getNumVectors () != X_domainMap->getNumVectors (),
4705  std::logic_error, "Tpetra::CrsMatrix::gaussSeidelCopy: "
4706  "X_colMap has a different number of columns than X_domainMap. "
4707  "X_colMap->getNumVectors() = " << X_colMap->getNumVectors ()
4708  << " != X_domainMap->getNumVectors() = "
4709  << X_domainMap->getNumVectors ()
4710  << ". This means that Tpetra::MultiVector::offsetViewNonConst "
4711  "is broken. Please report this bug to the Tpetra developers.");
4712 #endif // HAVE_TPETRA_DEBUG
4713 
4714  if (zeroInitialGuess) {
4715  // No need for an Import, since we're filling with zeros.
4716  X_colMap->putScalar (ZERO);
4717  } else {
4718  // We could just copy X into X_domainMap. However, that
4719  // wastes a copy, because the Import also does a copy (plus
4720  // communication). Since the typical use case for
4721  // Gauss-Seidel is a small number of sweeps (2 is typical), we
4722  // don't want to waste that copy. Thus, we do the Import
4723  // here, and skip the first Import in the first sweep.
4724  // Importing directly from X effects the copy into X_domainMap
4725  // (which is a view of X_colMap).
4726  X_colMap->doImport (X, *importer, INSERT);
4727  }
4728  copyBackOutput = true; // Don't forget to copy back at end.
4729  } // if column and domain Maps are (not) the same
4730 
4731  // The Gauss-Seidel / SOR kernel expects multivectors of constant
4732  // stride. X_colMap is by construction, but B might not be. If
4733  // it's not, we have to make a copy.
4734  RCP<const MV> B_in;
4735  if (B.isConstantStride ()) {
4736  B_in = rcpFromRef (B);
4737  }
4738  else {
4739  // Range Map and row Map are the same in this case, so we can
4740  // use the cached row Map multivector to store a constant stride
4741  // copy of B.
4742  RCP<MV> B_in_nonconst = getRowMapMultiVector (B, true);
4743  try {
4744  deep_copy (*B_in_nonconst, B);
4745  } catch (std::exception& e) {
4746  std::ostringstream os;
4747  os << "Tpetra::CrsMatrix::reorderedGaussSeidelCopy: "
4748  "deep_copy(*B_in_nonconst, B) threw an exception: "
4749  << e.what () << ".";
4750  TEUCHOS_TEST_FOR_EXCEPTION(true, std::runtime_error, e.what ());
4751  }
4752  B_in = rcp_const_cast<const MV> (B_in_nonconst);
4753 
4755  ! B.isConstantStride (),
4756  std::runtime_error,
4757  "gaussSeidelCopy: The current implementation requires that B have "
4758  "constant stride. Since B does not have constant stride, we had to "
4759  "copy it into a separate constant-stride multivector. This is a "
4760  "limitation of the current implementation and not your fault, but we "
4761  "still report it as an efficiency warning for your information.");
4762  }
4763 
4764  for (int sweep = 0; sweep < numSweeps; ++sweep) {
4765  if (! importer.is_null () && sweep > 0) {
4766  // We already did the first Import for the zeroth sweep above,
4767  // if it was necessary.
4768  X_colMap->doImport (*X_domainMap, *importer, INSERT);
4769  }
4770 
4771  // Do local Gauss-Seidel.
4772  if (direction != Symmetric) {
4773  if (rowIndices.is_null ()) {
4774  this->template localGaussSeidel<ST, ST> (*B_in, *X_colMap, D,
4775  dampingFactor,
4776  localDirection);
4777  }
4778  else {
4779  this->template reorderedLocalGaussSeidel<ST, ST> (*B_in, *X_colMap,
4780  D, rowIndices,
4781  dampingFactor,
4782  localDirection);
4783  }
4784  }
4785  else { // direction == Symmetric
4786  if (rowIndices.is_null ()) {
4787  this->template localGaussSeidel<ST, ST> (*B_in, *X_colMap, D,
4788  dampingFactor,
4789  KokkosClassic::Forward);
4790  // mfh 18 Mar 2013: Aztec's implementation of "symmetric
4791  // Gauss-Seidel" does _not_ do an Import between the forward
4792  // and backward sweeps. This makes symmetric Gauss-Seidel a
4793  // symmetric preconditioner if the matrix A is symmetric. We
4794  // imitate Aztec's behavior here.
4795  this->template localGaussSeidel<ST, ST> (*B_in, *X_colMap, D,
4796  dampingFactor,
4797  KokkosClassic::Backward);
4798  }
4799  else {
4800  this->template reorderedLocalGaussSeidel<ST, ST> (*B_in, *X_colMap,
4801  D, rowIndices,
4802  dampingFactor,
4803  KokkosClassic::Forward);
4804  this->template reorderedLocalGaussSeidel<ST, ST> (*B_in, *X_colMap,
4805  D, rowIndices,
4806  dampingFactor,
4807  KokkosClassic::Backward);
4808 
4809  }
4810  }
4811  }
4812 
4813  if (copyBackOutput) {
4814  try {
4815  deep_copy (X , *X_domainMap); // Copy result back into X.
4816  } catch (std::exception& e) {
4817  TEUCHOS_TEST_FOR_EXCEPTION(
4818  true, std::runtime_error, prefix << "deep_copy(X, *X_domainMap) "
4819  "threw an exception: " << e.what ());
4820  }
4821  }
4822  }
4823 
4824  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
4825  template<class T>
4826  Teuchos::RCP<CrsMatrix<T, LocalOrdinal, GlobalOrdinal, Node, classic> >
4828  convert () const
4829  {
4830  using Teuchos::ArrayRCP;
4831  using Teuchos::RCP;
4832  using Teuchos::rcp;
4834  typedef typename out_mat_type::local_matrix_type out_lcl_mat_type;
4835  typedef typename out_lcl_mat_type::values_type out_vals_type;
4836  typedef ArrayRCP<size_t>::size_type size_type;
4837  const char tfecfFuncName[] = "convert";
4838 
4839  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
4840  ! isFillComplete (), std::runtime_error, "This matrix (the source of "
4841  "the conversion) is not fill complete. You must first call "
4842  "fillComplete() (possibly with the domain and range Map) without an "
4843  "intervening call to resumeFill(), before you may call this method.");
4844 
4845  // mfh 27 Feb 2014: It seems reasonable that if this matrix has a
4846  // const graph, then the returned matrix should also. However, if
4847  // this matrix does not have a const graph, then neither should
4848  // the returned matrix. The code below implements this strategy.
4849 
4850  RCP<out_mat_type> newmat; // the matrix to return
4851 
4852  if (this->isStaticGraph ()) {
4853  // This matrix has a const graph, so the returned matrix should too.
4854  newmat = rcp (new out_mat_type (this->getCrsGraph ()));
4855 
4856  // Convert the values from Scalar to T, and stuff them directly
4857  // into the matrix to return.
4858  const size_type numVals =
4859  static_cast<size_type> (this->lclMatrix_.values.dimension_0 ());
4860 
4861  // FIXME (mfh 05 Aug 2014) Write a copy kernel (impl_scalar_type and
4862  // T differ, so we can't use Kokkos::deep_copy).
4863  //
4864  // FIXME (mfh 05 Aug 2014) This assumes UVM.
4865  out_vals_type newVals1D ("Tpetra::CrsMatrix::val", numVals);
4866  for (size_type k = 0; k < numVals; ++k) {
4867  newVals1D(k) = static_cast<T> (this->k_values1D_(k));
4868  }
4869  newmat->lclMatrix_ =
4870  out_lcl_mat_type ("Tpetra::CrsMatrix::lclMatrix_",
4871  this->lclMatrix_.numCols (), newVals1D,
4872  this->lclMatrix_.graph);
4873  newmat->k_values1D_ = newVals1D;
4874  // Since newmat has a static (const) graph, the graph already
4875  // has a column Map, and Import and Export objects already exist
4876  // (if applicable). Thus, calling fillComplete is cheap.
4877  newmat->fillComplete (this->getDomainMap (), this->getRangeMap ());
4878  }
4879  else {
4880  // This matrix has a nonconst graph, so the returned matrix
4881  // should also have a nonconst graph. However, it's fine for
4882  // the returned matrix to have static profile. This will
4883  // certainly speed up its fillComplete.
4884 
4885  //
4886  // FIXME (mfh 05 Aug 2014) Instead of the slow stuff below, we
4887  // should copy the values and existing graph into a new local
4888  // matrix (lclMatrix), and then use the Tpetra::CrsMatrix
4889  // constructor that takes (rowMap, colMap, lclMatrix, params).
4890  //
4891 
4892  // Get this matrix's local data.
4893  ArrayRCP<const size_t> ptr;
4894  ArrayRCP<const LocalOrdinal> ind;
4895  ArrayRCP<const Scalar> oldVal;
4896  this->getAllValues (ptr, ind, oldVal);
4897 
4898  RCP<const map_type> rowMap = this->getRowMap ();
4899  RCP<const map_type> colMap = this->getColMap ();
4900 
4901  // Get an array of the number of entries in each (locally owned)
4902  // row, so that we can make the new matrix with static profile.
4903  const size_type numLocalRows =
4904  static_cast<size_type> (rowMap->getNodeNumElements ());
4905  ArrayRCP<size_t> numEntriesPerRow (numLocalRows);
4906  for (size_type localRow = 0; localRow < numLocalRows; ++localRow) {
4907  numEntriesPerRow[localRow] =
4908  static_cast<size_type> (getNumEntriesInLocalRow (localRow));
4909  }
4910 
4911  newmat = rcp (new out_mat_type (rowMap, colMap, numEntriesPerRow,
4912  StaticProfile));
4913 
4914  // Convert this matrix's values from Scalar to T.
4915  const size_type numVals = this->lclMatrix_.values.dimension_0 ();
4916  ArrayRCP<T> newVals1D (numVals);
4917  // FIXME (mfh 05 Aug 2014) This assumes UVM.
4918  for (size_type k = 0; k < numVals; ++k) {
4919  newVals1D[k] = static_cast<T> (this->k_values1D_(k));
4920  }
4921 
4922  // Give this matrix all of its local data. We can all this
4923  // method because newmat was _not_ created with a const graph.
4924  // The data must be passed in as nonconst, so we have to copy it
4925  // first.
4926  ArrayRCP<size_t> newPtr (ptr.size ());
4927  std::copy (ptr.begin (), ptr.end (), newPtr.begin ());
4928  ArrayRCP<LocalOrdinal> newInd (ind.size ());
4929  std::copy (ind.begin (), ind.end (), newInd.begin ());
4930  newmat->setAllValues (newPtr, newInd, newVals1D);
4931 
4932  // We already have the Import and Export (if applicable) objects
4933  // from the graph, so we can save a lot of time by passing them
4934  // in to expertStaticFillComplete.
4935  RCP<const map_type> domainMap = this->getDomainMap ();
4936  RCP<const map_type> rangeMap = this->getRangeMap ();
4937  RCP<const import_type> importer = this->getCrsGraph ()->getImporter ();
4938  RCP<const export_type> exporter = this->getCrsGraph ()->getExporter ();
4939  newmat->expertStaticFillComplete (domainMap, rangeMap, importer, exporter);
4940  }
4941 
4942  return newmat;
4943  }
4944 
4945 
4946  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
4947  void
4950  {
4951 #ifdef HAVE_TPETRA_DEBUG
4952  const char tfecfFuncName[] = "checkInternalState: ";
4953  const char err[] = "Internal state is not consistent. "
4954  "Please report this bug to the Tpetra developers.";
4955 
4956  // This version of the graph (RCP<const crs_graph_type>) must
4957  // always be nonnull.
4958  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
4959  staticGraph_.is_null (),
4960  std::logic_error, err);
4961  // myGraph == null means that the matrix has a const ("static")
4962  // graph. Otherwise, the matrix has a dynamic graph (it owns its
4963  // graph).
4964  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
4965  ! myGraph_.is_null () && myGraph_ != staticGraph_,
4966  std::logic_error, err);
4967  // if matrix is fill complete, then graph must be fill complete
4968  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
4969  isFillComplete () && ! staticGraph_->isFillComplete (),
4970  std::logic_error, err << " Specifically, the matrix is fill complete, "
4971  "but its graph is NOT fill complete.");
4972  // if matrix is storage optimized, it should have a 1D allocation
4973  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
4974  isStorageOptimized () && ! values2D_.is_null (),
4975  std::logic_error, err);
4976  // if matrix/graph are static profile, then 2D allocation should not be present
4977  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
4978  getProfileType() == StaticProfile && values2D_ != null,
4979  std::logic_error, err);
4980  // if matrix/graph are dynamic profile, then 1D allocation should not be present
4981  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
4982  getProfileType() == DynamicProfile && k_values1D_.dimension_0 () > 0,
4983  std::logic_error, err);
4984  // if values are allocated and they are non-zero in number, then
4985  // one of the allocations should be present
4986  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
4987  staticGraph_->indicesAreAllocated () &&
4988  staticGraph_->getNodeAllocationSize() > 0 &&
4989  staticGraph_->getNodeNumRows() > 0
4990  && values2D_.is_null () &&
4991  k_values1D_.dimension_0 () == 0,
4992  std::logic_error, err);
4993  // we cannot have both a 1D and 2D allocation
4994  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
4995  k_values1D_.dimension_0 () > 0 && values2D_ != null,
4996  std::logic_error, err << " Specifically, k_values1D_ is allocated (has "
4997  "size " << k_values1D_.dimension_0 () << " > 0) and values2D_ is also "
4998  "allocated. CrsMatrix is not suppose to have both a 1-D and a 2-D "
4999  "allocation at the same time.");
5000 #endif
5001  }
5002 
5003  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
5004  std::string
5007  {
5008  std::ostringstream os;
5009 
5010  os << "Tpetra::CrsMatrix (Kokkos refactor): {";
5011  if (this->getObjectLabel () != "") {
5012  os << "Label: \"" << this->getObjectLabel () << "\", ";
5013  }
5014  if (isFillComplete ()) {
5015  os << "isFillComplete: true"
5016  << ", global dimensions: [" << getGlobalNumRows () << ", "
5017  << getGlobalNumCols () << "]"
5018  << ", global number of entries: " << getGlobalNumEntries ()
5019  << "}";
5020  }
5021  else {
5022  os << "isFillComplete: false"
5023  << ", global dimensions: [" << getGlobalNumRows () << ", "
5024  << getGlobalNumCols () << "]}";
5025  }
5026  return os.str ();
5027  }
5028 
5029  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
5030  void
5032  describe (Teuchos::FancyOStream &out,
5033  const Teuchos::EVerbosityLevel verbLevel) const
5034  {
5035  using std::endl;
5036  using std::setw;
5037  using Teuchos::Comm;
5038  using Teuchos::RCP;
5039  using Teuchos::TypeNameTraits;
5040  using Teuchos::VERB_DEFAULT;
5041  using Teuchos::VERB_NONE;
5042  using Teuchos::VERB_LOW;
5043  using Teuchos::VERB_MEDIUM;
5044  using Teuchos::VERB_HIGH;
5045  using Teuchos::VERB_EXTREME;
5046 
5047  const Teuchos::EVerbosityLevel vl = (verbLevel == VERB_DEFAULT) ? VERB_LOW : verbLevel;
5048 
5049  if (vl == VERB_NONE) {
5050  return; // Don't print anything at all
5051  }
5052  // By convention, describe() always begins with a tab.
5053  Teuchos::OSTab tab0 (out);
5054 
5055  RCP<const Comm<int> > comm = this->getComm();
5056  const int myRank = comm->getRank();
5057  const int numProcs = comm->getSize();
5058  size_t width = 1;
5059  for (size_t dec=10; dec<getGlobalNumRows(); dec *= 10) {
5060  ++width;
5061  }
5062  width = std::max<size_t> (width, static_cast<size_t> (11)) + 2;
5063 
5064  // none: print nothing
5065  // low: print O(1) info from node 0
5066  // medium: print O(P) info, num entries per process
5067  // high: print O(N) info, num entries per row
5068  // extreme: print O(NNZ) info: print indices and values
5069  //
5070  // for medium and higher, print constituent objects at specified verbLevel
5071  if (myRank == 0) {
5072  out << "Tpetra::CrsMatrix (Kokkos refactor):" << endl;
5073  }
5074  Teuchos::OSTab tab1 (out);
5075 
5076  if (myRank == 0) {
5077  if (this->getObjectLabel () != "") {
5078  out << "Label: \"" << this->getObjectLabel () << "\", ";
5079  }
5080  {
5081  out << "Template parameters:" << endl;
5082  Teuchos::OSTab tab2 (out);
5083  out << "Scalar: " << TypeNameTraits<Scalar>::name () << endl
5084  << "LocalOrdinal: " << TypeNameTraits<LocalOrdinal>::name () << endl
5085  << "GlobalOrdinal: " << TypeNameTraits<GlobalOrdinal>::name () << endl
5086  << "Node: " << TypeNameTraits<Node>::name () << endl;
5087  }
5088  if (isFillComplete()) {
5089  out << "isFillComplete: true" << endl
5090  << "Global dimensions: [" << getGlobalNumRows () << ", "
5091  << getGlobalNumCols () << "]" << endl
5092  << "Global number of entries: " << getGlobalNumEntries () << endl
5093  << "Global number of diagonal entries: " << getGlobalNumDiags ()
5094  << endl << "Global max number of entries in a row: "
5095  << getGlobalMaxNumRowEntries () << endl;
5096  }
5097  else {
5098  out << "isFillComplete: false" << endl
5099  << "Global dimensions: [" << getGlobalNumRows () << ", "
5100  << getGlobalNumCols () << "]" << endl;
5101  }
5102  }
5103 
5104  if (vl < VERB_MEDIUM) {
5105  return; // all done!
5106  }
5107 
5108  // Describe the row Map.
5109  if (myRank == 0) {
5110  out << endl << "Row Map:" << endl;
5111  }
5112  if (getRowMap ().is_null ()) {
5113  if (myRank == 0) {
5114  out << "null" << endl;
5115  }
5116  }
5117  else {
5118  if (myRank == 0) {
5119  out << endl;
5120  }
5121  getRowMap ()->describe (out, vl);
5122  }
5123 
5124  // Describe the column Map.
5125  if (myRank == 0) {
5126  out << "Column Map: ";
5127  }
5128  if (getColMap ().is_null ()) {
5129  if (myRank == 0) {
5130  out << "null" << endl;
5131  }
5132  } else if (getColMap () == getRowMap ()) {
5133  if (myRank == 0) {
5134  out << "same as row Map" << endl;
5135  }
5136  } else {
5137  if (myRank == 0) {
5138  out << endl;
5139  }
5140  getColMap ()->describe (out, vl);
5141  }
5142 
5143  // Describe the domain Map.
5144  if (myRank == 0) {
5145  out << "Domain Map: ";
5146  }
5147  if (getDomainMap ().is_null ()) {
5148  if (myRank == 0) {
5149  out << "null" << endl;
5150  }
5151  } else if (getDomainMap () == getRowMap ()) {
5152  if (myRank == 0) {
5153  out << "same as row Map" << endl;
5154  }
5155  } else if (getDomainMap () == getColMap ()) {
5156  if (myRank == 0) {
5157  out << "same as column Map" << endl;
5158  }
5159  } else {
5160  if (myRank == 0) {
5161  out << endl;
5162  }
5163  getDomainMap ()->describe (out, vl);
5164  }
5165 
5166  // Describe the range Map.
5167  if (myRank == 0) {
5168  out << "Range Map: ";
5169  }
5170  if (getRangeMap ().is_null ()) {
5171  if (myRank == 0) {
5172  out << "null" << endl;
5173  }
5174  } else if (getRangeMap () == getDomainMap ()) {
5175  if (myRank == 0) {
5176  out << "same as domain Map" << endl;
5177  }
5178  } else if (getRangeMap () == getRowMap ()) {
5179  if (myRank == 0) {
5180  out << "same as row Map" << endl;
5181  }
5182  } else {
5183  if (myRank == 0) {
5184  out << endl;
5185  }
5186  getRangeMap ()->describe (out, vl);
5187  }
5188 
5189  // O(P) data
5190  for (int curRank = 0; curRank < numProcs; ++curRank) {
5191  if (myRank == curRank) {
5192  out << "Process rank: " << curRank << endl;
5193  Teuchos::OSTab tab2 (out);
5194  if (! staticGraph_->indicesAreAllocated ()) {
5195  out << "Graph indices not allocated" << endl;
5196  }
5197  else {
5198  out << "Number of allocated entries: "
5199  << staticGraph_->getNodeAllocationSize () << endl;
5200  }
5201  out << "Number of entries: " << getNodeNumEntries () << endl;
5202  if (isFillComplete ()) {
5203  out << "Number of diagonal entries: " << getNodeNumDiags () << endl;
5204  }
5205  out << "Max number of entries per row: " << getNodeMaxNumRowEntries ()
5206  << endl;
5207  }
5208  // Give output time to complete by executing some barriers.
5209  comm->barrier ();
5210  comm->barrier ();
5211  comm->barrier ();
5212  }
5213 
5214  if (vl < VERB_HIGH) {
5215  return; // all done!
5216  }
5217 
5218  // O(N) and O(NNZ) data
5219  for (int curRank = 0; curRank < numProcs; ++curRank) {
5220  if (myRank == curRank) {
5221  out << std::setw(width) << "Proc Rank"
5222  << std::setw(width) << "Global Row"
5223  << std::setw(width) << "Num Entries";
5224  if (vl == VERB_EXTREME) {
5225  out << std::setw(width) << "(Index,Value)";
5226  }
5227  out << endl;
5228  for (size_t r = 0; r < getNodeNumRows (); ++r) {
5229  const size_t nE = getNumEntriesInLocalRow(r);
5230  GlobalOrdinal gid = getRowMap()->getGlobalElement(r);
5231  out << std::setw(width) << myRank
5232  << std::setw(width) << gid
5233  << std::setw(width) << nE;
5234  if (vl == VERB_EXTREME) {
5235  if (isGloballyIndexed()) {
5236  ArrayView<const GlobalOrdinal> rowinds;
5237  ArrayView<const Scalar> rowvals;
5238  getGlobalRowView (gid, rowinds, rowvals);
5239  for (size_t j = 0; j < nE; ++j) {
5240  out << " (" << rowinds[j]
5241  << ", " << rowvals[j]
5242  << ") ";
5243  }
5244  }
5245  else if (isLocallyIndexed()) {
5246  ArrayView<const LocalOrdinal> rowinds;
5247  ArrayView<const Scalar> rowvals;
5248  getLocalRowView (r, rowinds, rowvals);
5249  for (size_t j=0; j < nE; ++j) {
5250  out << " (" << getColMap()->getGlobalElement(rowinds[j])
5251  << ", " << rowvals[j]
5252  << ") ";
5253  }
5254  } // globally or locally indexed
5255  } // vl == VERB_EXTREME
5256  out << endl;
5257  } // for each row r on this process
5258  } // if (myRank == curRank)
5259 
5260  // Give output time to complete
5261  comm->barrier ();
5262  comm->barrier ();
5263  comm->barrier ();
5264  } // for each process p
5265  }
5266 
5267  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
5268  bool
5271  {
5272  // It's not clear what kind of compatibility checks on sizes can
5273  // be performed here. Epetra_CrsGraph doesn't check any sizes for
5274  // compatibility.
5275 
5276  // Currently, the source object must be a RowMatrix with the same
5277  // four template parameters as the target CrsMatrix. We might
5278  // relax this requirement later.
5280  const row_matrix_type* srcRowMat =
5281  dynamic_cast<const row_matrix_type*> (&source);
5282  return (srcRowMat != NULL);
5283  }
5284 
5285  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
5286  void
5289  size_t numSameIDs,
5290  const Teuchos::ArrayView<const LocalOrdinal>& permuteToLIDs,
5291  const Teuchos::ArrayView<const LocalOrdinal>& permuteFromLIDs)
5292  {
5293  using Teuchos::Array;
5294  using Teuchos::ArrayView;
5295  typedef LocalOrdinal LO;
5296  typedef GlobalOrdinal GO;
5297  typedef node_type NT;
5298  // Method name string for TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC.
5299  const char tfecfFuncName[] = "copyAndPermute: ";
5300 
5301  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
5302  permuteToLIDs.size() != permuteFromLIDs.size(),
5303  std::invalid_argument, "permuteToLIDs.size() = " << permuteToLIDs.size()
5304  << "!= permuteFromLIDs.size() = " << permuteFromLIDs.size() << ".");
5305 
5306  // This dynamic cast should succeed, because we've already tested
5307  // it in checkSizes().
5308  typedef RowMatrix<Scalar, LO, GO, NT> row_matrix_type;
5309  const row_matrix_type& srcMat = dynamic_cast<const row_matrix_type&> (source);
5310 
5311  const bool sourceIsLocallyIndexed = srcMat.isLocallyIndexed ();
5312  //
5313  // Copy the first numSame row from source to target (this matrix).
5314  // This involves copying rows corresponding to LIDs [0, numSame-1].
5315  //
5316  const map_type& srcRowMap = * (srcMat.getRowMap ());
5317  Array<GO> rowInds;
5318  Array<Scalar> rowVals;
5319  const LO numSameIDs_as_LID = static_cast<LO> (numSameIDs);
5320  for (LO sourceLID = 0; sourceLID < numSameIDs_as_LID; ++sourceLID) {
5321  // Global ID for the current row index in the source matrix.
5322  // The first numSameIDs GIDs in the two input lists are the
5323  // same, so sourceGID == targetGID in this case.
5324  const GO sourceGID = srcRowMap.getGlobalElement (sourceLID);
5325  const GO targetGID = sourceGID;
5326 
5327  // Input views for the combineGlobalValues() call below.
5328  ArrayView<const GO> rowIndsConstView;
5329  ArrayView<const Scalar> rowValsConstView;
5330 
5331  if (sourceIsLocallyIndexed) {
5332  const size_t rowLength = srcMat.getNumEntriesInGlobalRow (sourceGID);
5333  if (rowLength > static_cast<size_t> (rowInds.size())) {
5334  rowInds.resize (rowLength);
5335  rowVals.resize (rowLength);
5336  }
5337  // Resizing invalidates an Array's views, so we must make new
5338  // ones, even if rowLength hasn't changed.
5339  ArrayView<GO> rowIndsView = rowInds.view (0, rowLength);
5340  ArrayView<Scalar> rowValsView = rowVals.view (0, rowLength);
5341 
5342  // The source matrix is locally indexed, so we have to get a
5343  // copy. Really it's the GIDs that have to be copied (because
5344  // they have to be converted from LIDs).
5345  size_t checkRowLength = 0;
5346  srcMat.getGlobalRowCopy (sourceGID, rowIndsView, rowValsView, checkRowLength);
5347 
5348 #ifdef HAVE_TPETRA_DEBUG
5349  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(rowLength != checkRowLength,
5350  std::logic_error, "For global row index " << sourceGID << ", the source"
5351  " matrix's getNumEntriesInGlobalRow() method returns a row length of "
5352  << rowLength << ", but the getGlobalRowCopy() method reports that "
5353  "the row length is " << checkRowLength << ". Please report this bug "
5354  "to the Tpetra developers.");
5355 #endif // HAVE_TPETRA_DEBUG
5356 
5357  rowIndsConstView = rowIndsView.view (0, rowLength);
5358  rowValsConstView = rowValsView.view (0, rowLength);
5359  }
5360  else { // source matrix is globally indexed.
5361  srcMat.getGlobalRowView (sourceGID, rowIndsConstView, rowValsConstView);
5362  }
5363 
5364  // Combine the data into the target matrix.
5365  if (isStaticGraph()) {
5366  // Applying a permutation to a matrix with a static graph
5367  // means REPLACE-ing entries.
5368  combineGlobalValues (targetGID, rowIndsConstView, rowValsConstView, REPLACE);
5369  }
5370  else {
5371  // Applying a permutation to a matrix with a dynamic graph
5372  // means INSERT-ing entries. This has the same effect as
5373  // ADD, if the target graph already has an entry there.
5374  combineGlobalValues (targetGID, rowIndsConstView, rowValsConstView, INSERT);
5375  }
5376  } // For each of the consecutive source and target IDs that are the same
5377 
5378  //
5379  // Permute the remaining rows.
5380  //
5381  const map_type& tgtRowMap = * (this->getRowMap ());
5382  const size_t numPermuteToLIDs = static_cast<size_t> (permuteToLIDs.size ());
5383  for (size_t p = 0; p < numPermuteToLIDs; ++p) {
5384  const GO sourceGID = srcRowMap.getGlobalElement (permuteFromLIDs[p]);
5385  const GO targetGID = tgtRowMap.getGlobalElement (permuteToLIDs[p]);
5386 
5387  // Input views for the combineGlobalValues() call below.
5388  ArrayView<const GO> rowIndsConstView;
5389  ArrayView<const Scalar> rowValsConstView;
5390 
5391  if (sourceIsLocallyIndexed) {
5392  const size_t rowLength = srcMat.getNumEntriesInGlobalRow (sourceGID);
5393  if (rowLength > static_cast<size_t> (rowInds.size ())) {
5394  rowInds.resize (rowLength);
5395  rowVals.resize (rowLength);
5396  }
5397  // Resizing invalidates an Array's views, so we must make new
5398  // ones, even if rowLength hasn't changed.
5399  ArrayView<GO> rowIndsView = rowInds.view (0, rowLength);
5400  ArrayView<Scalar> rowValsView = rowVals.view (0, rowLength);
5401 
5402  // The source matrix is locally indexed, so we have to get a
5403  // copy. Really it's the GIDs that have to be copied (because
5404  // they have to be converted from LIDs).
5405  size_t checkRowLength = 0;
5406  srcMat.getGlobalRowCopy (sourceGID, rowIndsView, rowValsView, checkRowLength);
5407 
5408 #ifdef HAVE_TPETRA_DEBUG
5409  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(rowLength != checkRowLength,
5410  std::logic_error, "For the source matrix's global row index "
5411  << sourceGID << ", the source matrix's getNumEntriesInGlobalRow() "
5412  "method returns a row length of " << rowLength << ", but the "
5413  "getGlobalRowCopy() method reports that the row length is "
5414  << checkRowLength << ". Please report this bug to the Tpetra "
5415  "developers.");
5416 #endif // HAVE_TPETRA_DEBUG
5417 
5418  rowIndsConstView = rowIndsView.view (0, rowLength);
5419  rowValsConstView = rowValsView.view (0, rowLength);
5420  }
5421  else {
5422  srcMat.getGlobalRowView (sourceGID, rowIndsConstView, rowValsConstView);
5423  }
5424 
5425  // Combine the data into the target matrix.
5426  if (isStaticGraph()) {
5427  this->combineGlobalValues (targetGID, rowIndsConstView,
5428  rowValsConstView, REPLACE);
5429  }
5430  else {
5431  this->combineGlobalValues (targetGID, rowIndsConstView,
5432  rowValsConstView, INSERT);
5433  }
5434  } // For each ID to permute
5435  }
5436 
5437  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
5438  void
5440  packAndPrepare (const SrcDistObject& source,
5441  const Teuchos::ArrayView<const LocalOrdinal>& exportLIDs,
5442  Teuchos::Array<char>& exports,
5443  const Teuchos::ArrayView<size_t>& numPacketsPerLID,
5444  size_t& constantNumPackets,
5445  Distributor& distor)
5446  {
5447  using Teuchos::Array;
5448  using Teuchos::ArrayView;
5449  using Teuchos::av_reinterpret_cast;
5450  typedef LocalOrdinal LO;
5451  typedef GlobalOrdinal GO;
5452  const char tfecfFuncName[] = "packAndPrepare: ";
5453 
5454  // Attempt to cast the source object to RowMatrix. If the cast
5455  // succeeds, use the source object's pack method to pack its data
5456  // for communication. If the source object is really a CrsMatrix,
5457  // this will pick up the CrsMatrix's more efficient override. If
5458  // the RowMatrix cast fails, then the source object doesn't have
5459  // the right type.
5460  //
5461  // FIXME (mfh 30 Jun 2013) We don't even need the RowMatrix to
5462  // have the same Node type. Unfortunately, we don't have a way to
5463  // ask if the RowMatrix is "a RowMatrix with any Node type," since
5464  // RowMatrix doesn't have a base class. A hypothetical
5465  // RowMatrixBase<Scalar, LO, GO> class, which does not currently
5466  // exist, would satisfy this requirement.
5467  //
5468  // Why RowMatrixBase<Scalar, LO, GO>? The source object's Scalar
5469  // type doesn't technically need to match the target object's
5470  // Scalar type, so we could just have RowMatrixBase<LO, GO>. LO
5471  // and GO need not be the same, as long as there is no overflow of
5472  // the indices. However, checking for index overflow is global
5473  // and therefore undesirable.
5474  typedef RowMatrix<Scalar, LO, GO, Node> row_matrix_type;
5475  const row_matrix_type* srcRowMat =
5476  dynamic_cast<const row_matrix_type*> (&source);
5477  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
5478  srcRowMat == NULL, std::invalid_argument,
5479  "The source object of the Import or Export operation is neither a "
5480  "CrsMatrix (with the same template parameters as the target object), "
5481  "nor a RowMatrix (with the same first four template parameters as the "
5482  "target object).");
5483 #ifdef HAVE_TPETRA_DEBUG
5484  {
5485  using Teuchos::reduceAll;
5486  std::ostringstream msg;
5487  int lclBad = 0;
5488  try {
5489  srcRowMat->pack (exportLIDs, exports, numPacketsPerLID,
5490  constantNumPackets, distor);
5491  } catch (std::exception& e) {
5492  lclBad = 1;
5493  msg << e.what ();
5494  }
5495  int gblBad = 0;
5496  const Teuchos::Comm<int>& comm = * (this->getComm ());
5497  reduceAll<int, int> (comm, Teuchos::REDUCE_MAX,
5498  lclBad, Teuchos::outArg (gblBad));
5499  if (gblBad != 0) {
5500  const int myRank = comm.getRank ();
5501  const int numProcs = comm.getSize ();
5502  for (int r = 0; r < numProcs; ++r) {
5503  if (r == myRank && lclBad != 0) {
5504  std::ostringstream os;
5505  os << "Proc " << myRank << ": " << msg.str () << std::endl;
5506  std::cerr << os.str ();
5507  }
5508  comm.barrier ();
5509  comm.barrier ();
5510  comm.barrier ();
5511  }
5512  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
5513  true, std::logic_error, "pack() threw an exception on one or "
5514  "more participating processes.");
5515  }
5516  }
5517 #else
5518  srcRowMat->pack (exportLIDs, exports, numPacketsPerLID,
5519  constantNumPackets, distor);
5520 #endif // HAVE_TPETRA_DEBUG
5521  }
5522 
5523  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
5524  bool
5526  packRow (char* const numEntOut,
5527  char* const valOut,
5528  char* const indOut,
5529  const size_t numEnt,
5530  const LocalOrdinal lclRow) const
5531  {
5532  using Teuchos::ArrayView;
5533  typedef LocalOrdinal LO;
5534  typedef GlobalOrdinal GO;
5535 
5536  const LO numEntLO = static_cast<LO> (numEnt);
5537  memcpy (numEntOut, &numEntLO, sizeof (LO));
5538  if (this->isLocallyIndexed ()) {
5539  // If the matrix is locally indexed on the calling process, we
5540  // have to use its column Map (which it _must_ have in this
5541  // case) to convert to global indices.
5542  ArrayView<const LO> indIn;
5543  ArrayView<const Scalar> valIn;
5544  this->getLocalRowView (lclRow, indIn, valIn);
5545  const map_type& colMap = * (this->getColMap ());
5546  // Copy column indices one at a time, so that we don't need
5547  // temporary storage.
5548  for (size_t k = 0; k < numEnt; ++k) {
5549  const GO gblIndIn = colMap.getGlobalElement (indIn[k]);
5550  memcpy (indOut + k * sizeof (GO), &gblIndIn, sizeof (GO));
5551  }
5552  memcpy (valOut, valIn.getRawPtr (), numEnt * sizeof (Scalar));
5553  }
5554  else if (this->isGloballyIndexed ()) {
5555  // If the matrix is globally indexed on the calling process,
5556  // then we can use the column indices directly. However, we
5557  // have to get the global row index. The calling process must
5558  // have a row Map, since otherwise it shouldn't be participating
5559  // in packing operations.
5560  ArrayView<const GO> indIn;
5561  ArrayView<const Scalar> valIn;
5562  const map_type& rowMap = * (this->getRowMap ());
5563  const GO gblRow = rowMap.getGlobalElement (lclRow);
5564  this->getGlobalRowView (gblRow, indIn, valIn);
5565  memcpy (indOut, indIn.getRawPtr (), numEnt * sizeof (GO));
5566  memcpy (valOut, valIn.getRawPtr (), numEnt * sizeof (Scalar));
5567  }
5568  else {
5569  if (numEnt != 0) {
5570  return false;
5571  }
5572  }
5573  return true;
5574  }
5575 
5576 
5577  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
5578  bool
5580  unpackRow (Scalar* const valInTmp,
5581  GlobalOrdinal* const indInTmp,
5582  const size_t tmpSize,
5583  const char* const valIn,
5584  const char* const indIn,
5585  const size_t numEnt,
5586  const LocalOrdinal lclRow,
5587  const Tpetra::CombineMode combineMode)
5588  {
5589  if (tmpSize < numEnt || (numEnt != 0 && (valInTmp == NULL || indInTmp == NULL))) {
5590  return false;
5591  }
5592  memcpy (valInTmp, valIn, numEnt * sizeof (Scalar));
5593  memcpy (indInTmp, indIn, numEnt * sizeof (GlobalOrdinal));
5594  const GlobalOrdinal gblRow = this->getRowMap ()->getGlobalElement (lclRow);
5595  Teuchos::ArrayView<Scalar> val ((numEnt == 0) ? NULL : valInTmp, numEnt);
5596  Teuchos::ArrayView<GlobalOrdinal> ind ((numEnt == 0) ? NULL : indInTmp, numEnt);
5597  this->combineGlobalValues (gblRow, ind, val, combineMode);
5598  return true;
5599  }
5600 
5601 
5602  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
5603  void
5605  allocatePackSpace (Teuchos::Array<char>& exports,
5606  size_t& totalNumEntries,
5607  const Teuchos::ArrayView<const LocalOrdinal>& exportLIDs) const
5608  {
5609  typedef LocalOrdinal LO;
5610  typedef GlobalOrdinal GO;
5611  typedef typename Teuchos::ArrayView<const LO>::size_type size_type;
5612  //const char tfecfFuncName[] = "allocatePackSpace: ";
5613  const size_type numExportLIDs = exportLIDs.size ();
5614 
5615  // Count the total number of entries to send.
5616  totalNumEntries = 0;
5617  for (size_type i = 0; i < numExportLIDs; ++i) {
5618  const LO lclRow = exportLIDs[i];
5619  size_t curNumEntries = this->getNumEntriesInLocalRow (lclRow);
5620  // FIXME (mfh 25 Jan 2015) We should actually report invalid row
5621  // indices as an error. Just consider them nonowned for now.
5622  if (curNumEntries == Teuchos::OrdinalTraits<size_t>::invalid ()) {
5623  curNumEntries = 0;
5624  }
5625  totalNumEntries += curNumEntries;
5626  }
5627 
5628  // FIXME (mfh 24 Feb 2013) This code is only correct if
5629  // sizeof(Scalar) is a meaningful representation of the amount of
5630  // data in a Scalar instance. (LO and GO are always built-in
5631  // integer types.)
5632  //
5633  // Allocate the exports array. It does NOT need padding for
5634  // alignment, since we use memcpy to write to / read from send /
5635  // receive buffers.
5636  const size_t allocSize =
5637  static_cast<size_t> (numExportLIDs) * sizeof (LO) +
5638  totalNumEntries * (sizeof (Scalar) + sizeof (GO));
5639  if (static_cast<size_t> (exports.size ()) < allocSize) {
5640  exports.resize (allocSize);
5641  }
5642  }
5643 
5644  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
5645  void
5647  pack (const Teuchos::ArrayView<const LocalOrdinal>& exportLIDs,
5648  Teuchos::Array<char>& exports,
5649  const Teuchos::ArrayView<size_t>& numPacketsPerLID,
5650  size_t& constantNumPackets,
5651  Distributor& distor) const
5652  {
5653  using Teuchos::Array;
5654  using Teuchos::ArrayView;
5655  using Teuchos::av_reinterpret_cast;
5656  using Teuchos::RCP;
5657  typedef LocalOrdinal LO;
5658  typedef GlobalOrdinal GO;
5659  typedef typename ArrayView<const LO>::size_type size_type;
5660  const char tfecfFuncName[] = "pack: ";
5661 
5662  const size_type numExportLIDs = exportLIDs.size ();
5663  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
5664  numExportLIDs != numPacketsPerLID.size (), std::invalid_argument,
5665  "exportLIDs.size() = " << numExportLIDs << " != numPacketsPerLID.size()"
5666  " = " << numPacketsPerLID.size () << ".");
5667 
5668  // Setting this to zero tells the caller to expect a possibly
5669  // different ("nonconstant") number of packets per local index
5670  // (i.e., a possibly different number of entries per row).
5671  constantNumPackets = 0;
5672 
5673  // The pack buffer 'exports' enters this method possibly
5674  // unallocated. Do the first two parts of "Count, allocate, fill,
5675  // compute."
5676  size_t totalNumEntries = 0;
5677  allocatePackSpace (exports, totalNumEntries, exportLIDs);
5678  const size_t bufSize = static_cast<size_t> (exports.size ());
5679 
5680  // Compute the number of "packets" (in this case, bytes) per
5681  // export LID (in this case, local index of the row to send), and
5682  // actually pack the data.
5683  //
5684  // FIXME (mfh 24 Feb 2013, 25 Jan 2015) This code is only correct
5685  // if sizeof(Scalar) is a meaningful representation of the amount
5686  // of data in a Scalar instance. (LO and GO are always built-in
5687  // integer types.)
5688 
5689  // Variables for error reporting in the loop.
5690  size_type firstBadIndex = 0; // only valid if outOfBounds == true.
5691  size_t firstBadOffset = 0; // only valid if outOfBounds == true.
5692  size_t firstBadNumBytes = 0; // only valid if outOfBounds == true.
5693  bool outOfBounds = false;
5694  bool packErr = false;
5695 
5696  char* const exportsRawPtr = exports.getRawPtr ();
5697  size_t offset = 0; // current index into 'exports' array.
5698  for (size_type i = 0; i < numExportLIDs; ++i) {
5699  const LO lclRow = exportLIDs[i];
5700  const size_t numEnt = this->getNumEntriesInLocalRow (lclRow);
5701 
5702  // Only pad this row if it has a nonzero number of entries.
5703  if (numEnt == 0) {
5704  numPacketsPerLID[i] = 0;
5705  }
5706  else {
5707  char* const numEntBeg = exportsRawPtr + offset;
5708  char* const numEntEnd = numEntBeg + sizeof (LO);
5709  char* const valBeg = numEntEnd;
5710  char* const valEnd = valBeg + numEnt * sizeof (Scalar);
5711  char* const indBeg = valEnd;
5712  const size_t numBytes = sizeof (LO) +
5713  numEnt * (sizeof (Scalar) + sizeof (GO));
5714  if (offset > bufSize || offset + numBytes > bufSize) {
5715  firstBadIndex = i;
5716  firstBadOffset = offset;
5717  firstBadNumBytes = numBytes;
5718  outOfBounds = true;
5719  break;
5720  }
5721  packErr = ! packRow (numEntBeg, valBeg, indBeg, numEnt, lclRow);
5722  if (packErr) {
5723  firstBadIndex = i;
5724  firstBadOffset = offset;
5725  firstBadNumBytes = numBytes;
5726  break;
5727  }
5728  // numPacketsPerLID[i] is the number of "packets" in the
5729  // current local row i. Packet=char (really "byte") so use
5730  // the number of bytes of the packed data for that row.
5731  numPacketsPerLID[i] = numBytes;
5732  offset += numBytes;
5733  }
5734  }
5735 
5736  TEUCHOS_TEST_FOR_EXCEPTION(
5737  outOfBounds, std::logic_error, "First invalid offset into 'exports' "
5738  "pack buffer at index i = " << firstBadIndex << ". exportLIDs[i]: "
5739  << exportLIDs[firstBadIndex] << ", bufSize: " << bufSize << ", offset: "
5740  << firstBadOffset << ", numBytes: " << firstBadNumBytes << ".");
5741  TEUCHOS_TEST_FOR_EXCEPTION(
5742  packErr, std::logic_error, "First error in packRow() at index i = "
5743  << firstBadIndex << ". exportLIDs[i]: " << exportLIDs[firstBadIndex]
5744  << ", bufSize: " << bufSize << ", offset: " << firstBadOffset
5745  << ", numBytes: " << firstBadNumBytes << ".");
5746  }
5747 
5748  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
5749  void
5751  combineGlobalValues (const GlobalOrdinal globalRowIndex,
5752  const Teuchos::ArrayView<const GlobalOrdinal>& columnIndices,
5753  const Teuchos::ArrayView<const Scalar>& values,
5754  const Tpetra::CombineMode combineMode)
5755  {
5756  const char tfecfFuncName[] = "combineGlobalValues: ";
5757 
5758  if (isStaticGraph ()) {
5759  // INSERT doesn't make sense for a static graph, since you
5760  // aren't allowed to change the structure of the graph.
5761  // However, all the other combine modes work.
5762  if (combineMode == ADD) {
5763  sumIntoGlobalValues (globalRowIndex, columnIndices, values);
5764  }
5765  else if (combineMode == REPLACE) {
5766  replaceGlobalValues (globalRowIndex, columnIndices, values);
5767  }
5768  else if (combineMode == ABSMAX) {
5769  using Details::AbsMax;
5770  AbsMax<Scalar> f;
5771  this->template transformGlobalValues<AbsMax<Scalar> > (globalRowIndex,
5772  columnIndices,
5773  values, f);
5774  }
5775  else if (combineMode == INSERT) {
5776  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
5777  isStaticGraph () && combineMode == INSERT, std::invalid_argument,
5778  "INSERT combine mode is not allowed if the matrix has a static graph "
5779  "(i.e., was constructed with the CrsMatrix constructor that takes a "
5780  "const CrsGraph pointer).");
5781  }
5782  else {
5783  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
5784  true, std::logic_error, "Invalid combine mode; should never get "
5785  "here! Please report this bug to the Tpetra developers.");
5786  }
5787  }
5788  else { // The matrix has a dynamic graph.
5789  if (combineMode == ADD || combineMode == INSERT) {
5790  // For a dynamic graph, all incoming column indices are
5791  // inserted into the target graph. Duplicate indices will
5792  // have their values summed. In this context, ADD and INSERT
5793  // are equivalent. We need to call insertGlobalValues()
5794  // anyway if the column indices don't yet exist in this row,
5795  // so we just call insertGlobalValues() for both cases.
5796  insertGlobalValuesFiltered (globalRowIndex, columnIndices, values);
5797  }
5798  // FIXME (mfh 14 Mar 2012):
5799  //
5800  // Implementing ABSMAX or REPLACE for a dynamic graph would
5801  // require modifying assembly to attach a possibly different
5802  // combine mode to each inserted (i, j, A_ij) entry. For
5803  // example, consider two different Export operations to the same
5804  // target CrsMatrix, the first with ABSMAX combine mode and the
5805  // second with REPLACE. This isn't a common use case, so we
5806  // won't mess with it for now.
5807  else if (combineMode == ABSMAX) {
5808  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
5809  ! isStaticGraph () && combineMode == ABSMAX, std::logic_error,
5810  "ABSMAX combine mode when the matrix has a dynamic graph is not yet "
5811  "implemented.");
5812  }
5813  else if (combineMode == REPLACE) {
5814  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
5815  ! isStaticGraph () && combineMode == REPLACE, std::logic_error,
5816  "REPLACE combine mode when the matrix has a dynamic graph is not yet "
5817  "implemented.");
5818  }
5819  else {
5820  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
5821  true, std::logic_error, "Should never get here! Please report this "
5822  "bug to the Tpetra developers.");
5823  }
5824  }
5825  }
5826 
5827 
5828  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
5829  void
5831  unpackAndCombine (const Teuchos::ArrayView<const LocalOrdinal>& importLIDs,
5832  const Teuchos::ArrayView<const char>& imports,
5833  const Teuchos::ArrayView<size_t>& numPacketsPerLID,
5834  size_t constantNumPackets,
5835  Distributor& distor,
5836  CombineMode combineMode)
5837  {
5838 #ifdef HAVE_TPETRA_DEBUG
5839  const char tfecfFuncName[] = "unpackAndCombine: ";
5840  const CombineMode validModes[4] = {ADD, REPLACE, ABSMAX, INSERT};
5841  const char* validModeNames[4] = {"ADD", "REPLACE", "ABSMAX", "INSERT"};
5842  const int numValidModes = 4;
5843 
5844  if (std::find (validModes, validModes+numValidModes, combineMode) ==
5845  validModes+numValidModes) {
5846  std::ostringstream os;
5847  os << "Invalid combine mode. Valid modes are {";
5848  for (int k = 0; k < numValidModes; ++k) {
5849  os << validModeNames[k];
5850  if (k < numValidModes - 1) {
5851  os << ", ";
5852  }
5853  }
5854  os << "}.";
5855  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
5856  true, std::invalid_argument, os.str ());
5857  }
5858 
5859  {
5860  using Teuchos::reduceAll;
5861  std::ostringstream msg;
5862  int lclBad = 0;
5863  try {
5864  this->unpackAndCombineImpl (importLIDs, imports, numPacketsPerLID,
5865  constantNumPackets, distor, combineMode);
5866  } catch (std::exception& e) {
5867  lclBad = 1;
5868  msg << e.what ();
5869  }
5870  int gblBad = 0;
5871  const Teuchos::Comm<int>& comm = * (this->getComm ());
5872  reduceAll<int, int> (comm, Teuchos::REDUCE_MAX,
5873  lclBad, Teuchos::outArg (gblBad));
5874  if (gblBad != 0) {
5875  const int myRank = comm.getRank ();
5876  const int numProcs = comm.getSize ();
5877  for (int r = 0; r < numProcs; ++r) {
5878  if (r == myRank && lclBad != 0) {
5879  std::ostringstream os;
5880  os << "Proc " << myRank << ": " << msg.str () << std::endl;
5881  std::cerr << os.str ();
5882  }
5883  comm.barrier ();
5884  comm.barrier ();
5885  comm.barrier ();
5886  }
5887  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
5888  true, std::logic_error, "unpackAndCombineImpl() threw an "
5889  "exception on one or more participating processes.");
5890  }
5891  }
5892 #else
5893  this->unpackAndCombineImpl (importLIDs, imports, numPacketsPerLID,
5894  constantNumPackets, distor, combineMode);
5895 #endif // HAVE_TPETRA_DEBUG
5896  }
5897 
5898  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
5899  void
5901  unpackAndCombineImpl (const Teuchos::ArrayView<const LocalOrdinal>& importLIDs,
5902  const Teuchos::ArrayView<const char>& imports,
5903  const Teuchos::ArrayView<size_t>& numPacketsPerLID,
5904  size_t constantNumPackets,
5905  Distributor & /* distor */,
5906  CombineMode combineMode)
5907  {
5908  typedef LocalOrdinal LO;
5909  typedef GlobalOrdinal GO;
5910  typedef typename Teuchos::ArrayView<const LO>::size_type size_type;
5911  const char tfecfFuncName[] = "unpackAndCombine: ";
5912 
5913 #ifdef HAVE_TPETRA_DEBUG
5914  const CombineMode validModes[4] = {ADD, REPLACE, ABSMAX, INSERT};
5915  const char* validModeNames[4] = {"ADD", "REPLACE", "ABSMAX", "INSERT"};
5916  const int numValidModes = 4;
5917 
5918  if (std::find (validModes, validModes+numValidModes, combineMode) ==
5919  validModes+numValidModes) {
5920  std::ostringstream os;
5921  os << "Invalid combine mode. Valid modes are {";
5922  for (int k = 0; k < numValidModes; ++k) {
5923  os << validModeNames[k];
5924  if (k < numValidModes - 1) {
5925  os << ", ";
5926  }
5927  }
5928  os << "}.";
5929  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
5930  true, std::invalid_argument, os.str ());
5931  }
5932 #endif // HAVE_TPETRA_DEBUG
5933 
5934  const size_type numImportLIDs = importLIDs.size ();
5935  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
5936  numImportLIDs != numPacketsPerLID.size (), std::invalid_argument,
5937  "importLIDs.size() = " << numImportLIDs << " != numPacketsPerLID.size()"
5938  << " = " << numPacketsPerLID.size () << ".");
5939 
5940  // If a sanity check fails, keep track of some state at the
5941  // "first" place where it fails. After the first failure, "run
5942  // through the motions" until the end of this method, then raise
5943  // an error with an informative message.
5944  size_type firstBadIndex = 0;
5945  size_t firstBadOffset = 0;
5946  size_t firstBadExpectedNumBytes = 0;
5947  size_t firstBadNumBytes = 0;
5948  LO firstBadNumEnt = 0;
5949  // We have sanity checks for three kinds of errors:
5950  //
5951  // 1. Offset into array of all the incoming data (for all rows)
5952  // is out of bounds
5953  // 2. Too few bytes of incoming data for a row, given the
5954  // reported number of entries in those incoming data
5955  // 3. Error in unpacking the row's incoming data
5956  //
5957  bool outOfBounds = false;
5958  bool wrongNumBytes = false;
5959  bool unpackErr = false;
5960 
5961  const size_t bufSize = static_cast<size_t> (imports.size ());
5962  const char* const importsRawPtr = imports.getRawPtr ();
5963  size_t offset = 0;
5964 
5965  // Temporary storage for incoming values and indices. We need
5966  // this because the receive buffer does not align storage; it's
5967  // just contiguous bytes. In order to avoid violating ANSI
5968  // aliasing rules, we memcpy each incoming row's data into these
5969  // temporary arrays. We double their size every time we run out
5970  // of storage.
5971  Array<Scalar> valInTmp;
5972  Array<GO> indInTmp;
5973  for (size_type i = 0; i < numImportLIDs; ++i) {
5974  const LO lclRow = importLIDs[i];
5975  const size_t numBytes = numPacketsPerLID[i];
5976 
5977  if (numBytes > 0) { // there is actually something in the row
5978  const char* const numEntBeg = importsRawPtr + offset;
5979  const char* const numEntEnd = numEntBeg + sizeof (LO);
5980 
5981  // Now we know how many entries to expect in the received data
5982  // for this row.
5983  LO numEnt = 0;
5984  memcpy (&numEnt, numEntBeg, sizeof (LO));
5985 
5986  const char* const valBeg = numEntEnd;
5987  const char* const valEnd =
5988  valBeg + static_cast<size_t> (numEnt) * sizeof (Scalar);
5989  const char* const indBeg = valEnd;
5990  const size_t expectedNumBytes = sizeof (LO) +
5991  static_cast<size_t> (numEnt) * (sizeof (Scalar) + sizeof (GO));
5992 
5993  if (expectedNumBytes > numBytes) {
5994  firstBadIndex = i;
5995  firstBadOffset = offset;
5996  firstBadExpectedNumBytes = expectedNumBytes;
5997  firstBadNumBytes = numBytes;
5998  firstBadNumEnt = numEnt;
5999  wrongNumBytes = true;
6000  break;
6001  }
6002  if (offset > bufSize || offset + numBytes > bufSize) {
6003  firstBadIndex = i;
6004  firstBadOffset = offset;
6005  firstBadExpectedNumBytes = expectedNumBytes;
6006  firstBadNumBytes = numBytes;
6007  firstBadNumEnt = numEnt;
6008  outOfBounds = true;
6009  break;
6010  }
6011  size_t tmpNumEnt = static_cast<size_t> (valInTmp.size ());
6012  if (tmpNumEnt < static_cast<size_t> (numEnt) ||
6013  static_cast<size_t> (indInTmp.size ()) < static_cast<size_t> (numEnt)) {
6014  // Double the size of the temporary arrays for incoming data.
6015  tmpNumEnt = std::max (static_cast<size_t> (numEnt), tmpNumEnt * 2);
6016  valInTmp.resize (tmpNumEnt);
6017  indInTmp.resize (tmpNumEnt);
6018  }
6019  unpackErr =
6020  ! unpackRow (valInTmp.getRawPtr (), indInTmp.getRawPtr (), tmpNumEnt,
6021  valBeg, indBeg, numEnt, lclRow, combineMode);
6022  if (unpackErr) {
6023  firstBadIndex = i;
6024  firstBadOffset = offset;
6025  firstBadExpectedNumBytes = expectedNumBytes;
6026  firstBadNumBytes = numBytes;
6027  firstBadNumEnt = numEnt;
6028  break;
6029  }
6030  offset += numBytes;
6031  }
6032  }
6033 
6034  if (wrongNumBytes || outOfBounds || unpackErr) {
6035  std::ostringstream os;
6036  os << " importLIDs[i]: " << importLIDs[firstBadIndex]
6037  << ", bufSize: " << bufSize
6038  << ", offset: " << firstBadOffset
6039  << ", numBytes: " << firstBadNumBytes
6040  << ", expectedNumBytes: " << firstBadExpectedNumBytes
6041  << ", numEnt: " << firstBadNumEnt;
6042  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
6043  wrongNumBytes, std::logic_error, "At index i = " << firstBadIndex
6044  << ", expectedNumBytes > numBytes." << os.str ());
6045  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
6046  outOfBounds, std::logic_error, "First invalid offset into 'imports' "
6047  "unpack buffer at index i = " << firstBadIndex << "." << os.str ());
6048  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
6049  unpackErr, std::logic_error, "First error in unpackRow() at index i = "
6050  << firstBadIndex << "." << os.str ());
6051  }
6052  }
6053 
6054  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
6055  Teuchos::RCP<MultiVector<Scalar, LocalOrdinal, GlobalOrdinal, Node, classic> >
6057  getColumnMapMultiVector (const MV& X_domainMap,
6058  const bool force) const
6059  {
6060  using Teuchos::null;
6061  using Teuchos::RCP;
6062  using Teuchos::rcp;
6063 
6064  TEUCHOS_TEST_FOR_EXCEPTION(
6065  ! this->hasColMap (), std::runtime_error, "Tpetra::CrsMatrix::getColumn"
6066  "MapMultiVector: You may only call this method if the matrix has a "
6067  "column Map. If the matrix does not yet have a column Map, you should "
6068  "first call fillComplete (with domain and range Map if necessary).");
6069 
6070  // If the graph is not fill complete, then the Import object (if
6071  // one should exist) hasn't been constructed yet.
6072  TEUCHOS_TEST_FOR_EXCEPTION(
6073  ! this->getGraph ()->isFillComplete (), std::runtime_error, "Tpetra::"
6074  "CrsMatrix::getColumnMapMultiVector: You may only call this method if "
6075  "this matrix's graph is fill complete.");
6076 
6077  const size_t numVecs = X_domainMap.getNumVectors ();
6078  RCP<const import_type> importer = this->getGraph ()->getImporter ();
6079  RCP<const map_type> colMap = this->getColMap ();
6080 
6081  RCP<MV> X_colMap; // null by default
6082 
6083  // If the Import object is trivial (null), then we don't need a
6084  // separate column Map multivector. Just return null in that
6085  // case. The caller is responsible for knowing not to use the
6086  // returned null pointer.
6087  //
6088  // If the Import is nontrivial, then we do need a separate
6089  // column Map multivector for the Import operation. Check in
6090  // that case if we have to (re)create the column Map
6091  // multivector.
6092  if (! importer.is_null () || force) {
6093  if (importMV_.is_null () || importMV_->getNumVectors () != numVecs) {
6094  X_colMap = rcp (new MV (colMap, numVecs));
6095 
6096  // Cache the newly created multivector for later reuse.
6097  importMV_ = X_colMap;
6098  }
6099  else { // Yay, we can reuse the cached multivector!
6100  X_colMap = importMV_;
6101  // mfh 09 Jan 2013: We don't have to fill with zeros first,
6102  // because the Import uses INSERT combine mode, which overwrites
6103  // existing entries.
6104  //
6105  //X_colMap->putScalar (ZERO);
6106  }
6107  }
6108  return X_colMap;
6109  }
6110 
6111  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
6112  Teuchos::RCP<MultiVector<Scalar, LocalOrdinal, GlobalOrdinal, Node, classic> >
6115  const bool force) const
6116  {
6117  using Teuchos::null;
6118  using Teuchos::RCP;
6119  using Teuchos::rcp;
6120 
6121  // If the graph is not fill complete, then the Export object (if
6122  // one should exist) hasn't been constructed yet.
6123  TEUCHOS_TEST_FOR_EXCEPTION(
6124  ! this->getGraph ()->isFillComplete (), std::runtime_error, "Tpetra::"
6125  "CrsMatrix::getRowMapMultiVector: You may only call this method if this "
6126  "matrix's graph is fill complete.");
6127 
6128  const size_t numVecs = Y_rangeMap.getNumVectors ();
6129  RCP<const export_type> exporter = this->getGraph ()->getExporter ();
6130  // Every version of the constructor takes either a row Map, or a
6131  // graph (all of whose constructors take a row Map). Thus, the
6132  // matrix always has a row Map.
6133  RCP<const map_type> rowMap = this->getRowMap ();
6134 
6135  RCP<MV> Y_rowMap; // null by default
6136 
6137  // If the Export object is trivial (null), then we don't need a
6138  // separate row Map multivector. Just return null in that case.
6139  // The caller is responsible for knowing not to use the returned
6140  // null pointer.
6141  //
6142  // If the Export is nontrivial, then we do need a separate row
6143  // Map multivector for the Export operation. Check in that case
6144  // if we have to (re)create the row Map multivector.
6145  if (! exporter.is_null () || force) {
6146  if (exportMV_.is_null () || exportMV_->getNumVectors () != numVecs) {
6147  Y_rowMap = rcp (new MV (rowMap, numVecs));
6148  exportMV_ = Y_rowMap; // Cache the newly created MV for later reuse.
6149  }
6150  else { // Yay, we can reuse the cached multivector!
6151  Y_rowMap = exportMV_;
6152  }
6153  }
6154  return Y_rowMap;
6155  }
6156 
6157  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
6158  void
6160  removeEmptyProcessesInPlace (const Teuchos::RCP<const map_type>& newMap)
6161  {
6162  TEUCHOS_TEST_FOR_EXCEPTION(
6163  myGraph_.is_null (), std::logic_error, "Tpetra::CrsMatrix::"
6164  "removeEmptyProcessesInPlace: This method does not work when the matrix "
6165  "was created with a constant graph (that is, when it was created using "
6166  "the version of its constructor that takes an RCP<const CrsGraph>). "
6167  "This is because the matrix is not allowed to modify the graph in that "
6168  "case, but removing empty processes requires modifying the graph.");
6169  myGraph_->removeEmptyProcessesInPlace (newMap);
6170  // Even though CrsMatrix's row Map (as returned by getRowMap())
6171  // comes from its CrsGraph, CrsMatrix still implements DistObject,
6172  // so we also have to change the DistObject's Map.
6173  this->map_ = this->getRowMap ();
6174  // In the nonconst graph case, staticGraph_ is just a const
6175  // pointer to myGraph_. This assignment is probably redundant,
6176  // but it doesn't hurt.
6177  staticGraph_ = Teuchos::rcp_const_cast<const Graph> (myGraph_);
6178  }
6179 
6180  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
6181  Teuchos::RCP<RowMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node> >
6183  add (const Scalar& alpha,
6185  const Scalar& beta,
6186  const Teuchos::RCP<const map_type>& domainMap,
6187  const Teuchos::RCP<const map_type>& rangeMap,
6188  const Teuchos::RCP<Teuchos::ParameterList>& params) const
6189  {
6190  using Teuchos::Array;
6191  using Teuchos::ArrayRCP;
6192  using Teuchos::ParameterList;
6193  using Teuchos::RCP;
6194  using Teuchos::rcp;
6195  using Teuchos::rcp_implicit_cast;
6196  using Teuchos::sublist;
6197  typedef LocalOrdinal LO;
6198  typedef GlobalOrdinal GO;
6201 
6202  const crs_matrix_type& B = *this; // a convenient abbreviation
6203  const Scalar ZERO = Teuchos::ScalarTraits<Scalar>::zero ();
6204  const Scalar ONE = Teuchos::ScalarTraits<Scalar>::one ();
6205 
6206  // If the user didn't supply a domain or range Map, then try to
6207  // get one from B first (if it has them), then from A (if it has
6208  // them). If we don't have any domain or range Maps, scold the
6209  // user.
6210  RCP<const map_type> A_domainMap = A.getDomainMap ();
6211  RCP<const map_type> A_rangeMap = A.getRangeMap ();
6212  RCP<const map_type> B_domainMap = B.getDomainMap ();
6213  RCP<const map_type> B_rangeMap = B.getRangeMap ();
6214 
6215  RCP<const map_type> theDomainMap = domainMap;
6216  RCP<const map_type> theRangeMap = rangeMap;
6217 
6218  if (domainMap.is_null ()) {
6219  if (B_domainMap.is_null ()) {
6220  TEUCHOS_TEST_FOR_EXCEPTION(
6221  A_domainMap.is_null (), std::invalid_argument,
6222  "Tpetra::CrsMatrix::add: If neither A nor B have a domain Map, "
6223  "then you must supply a nonnull domain Map to this method.");
6224  theDomainMap = A_domainMap;
6225  } else {
6226  theDomainMap = B_domainMap;
6227  }
6228  }
6229  if (rangeMap.is_null ()) {
6230  if (B_rangeMap.is_null ()) {
6231  TEUCHOS_TEST_FOR_EXCEPTION(
6232  A_rangeMap.is_null (), std::invalid_argument,
6233  "Tpetra::CrsMatrix::add: If neither A nor B have a range Map, "
6234  "then you must supply a nonnull range Map to this method.");
6235  theRangeMap = A_rangeMap;
6236  } else {
6237  theRangeMap = B_rangeMap;
6238  }
6239  }
6240 
6241 #ifdef HAVE_TPETRA_DEBUG
6242  // In a debug build, check that A and B have matching domain and
6243  // range Maps, if they have domain and range Maps at all. (If
6244  // they aren't fill complete, then they may not yet have them.)
6245  if (! A_domainMap.is_null () && ! A_rangeMap.is_null ()) {
6246  if (! B_domainMap.is_null () && ! B_rangeMap.is_null ()) {
6247  TEUCHOS_TEST_FOR_EXCEPTION(
6248  ! B_domainMap->isSameAs (*A_domainMap), std::invalid_argument,
6249  "Tpetra::CrsMatrix::add: The input RowMatrix A must have a domain Map "
6250  "which is the same as (isSameAs) this RowMatrix's domain Map.");
6251  TEUCHOS_TEST_FOR_EXCEPTION(
6252  ! B_rangeMap->isSameAs (*A_rangeMap), std::invalid_argument,
6253  "Tpetra::CrsMatrix::add: The input RowMatrix A must have a range Map "
6254  "which is the same as (isSameAs) this RowMatrix's range Map.");
6255  TEUCHOS_TEST_FOR_EXCEPTION(
6256  ! domainMap.is_null () && ! domainMap->isSameAs (*B_domainMap),
6257  std::invalid_argument,
6258  "Tpetra::CrsMatrix::add: The input domain Map must be the same as "
6259  "(isSameAs) this RowMatrix's domain Map.");
6260  TEUCHOS_TEST_FOR_EXCEPTION(
6261  ! rangeMap.is_null () && ! rangeMap->isSameAs (*B_rangeMap),
6262  std::invalid_argument,
6263  "Tpetra::CrsMatrix::add: The input range Map must be the same as "
6264  "(isSameAs) this RowMatrix's range Map.");
6265  }
6266  }
6267  else if (! B_domainMap.is_null () && ! B_rangeMap.is_null ()) {
6268  TEUCHOS_TEST_FOR_EXCEPTION(
6269  ! domainMap.is_null () && ! domainMap->isSameAs (*B_domainMap),
6270  std::invalid_argument,
6271  "Tpetra::CrsMatrix::add: The input domain Map must be the same as "
6272  "(isSameAs) this RowMatrix's domain Map.");
6273  TEUCHOS_TEST_FOR_EXCEPTION(
6274  ! rangeMap.is_null () && ! rangeMap->isSameAs (*B_rangeMap),
6275  std::invalid_argument,
6276  "Tpetra::CrsMatrix::add: The input range Map must be the same as "
6277  "(isSameAs) this RowMatrix's range Map.");
6278  }
6279  else {
6280  TEUCHOS_TEST_FOR_EXCEPTION(
6281  domainMap.is_null () || rangeMap.is_null (), std::invalid_argument,
6282  "Tpetra::CrsMatrix::add: If neither A nor B have a domain and range "
6283  "Map, then you must supply a nonnull domain and range Map to this "
6284  "method.");
6285  }
6286 #endif // HAVE_TPETRA_DEBUG
6287 
6288  // What parameters do we pass to C's constructor? Do we call
6289  // fillComplete on C after filling it? And if so, what parameters
6290  // do we pass to C's fillComplete call?
6291  bool callFillComplete = true;
6292  RCP<ParameterList> constructorSublist;
6293  RCP<ParameterList> fillCompleteSublist;
6294  if (! params.is_null ()) {
6295  callFillComplete = params->get ("Call fillComplete", callFillComplete);
6296  constructorSublist = sublist (params, "Constructor parameters");
6297  fillCompleteSublist = sublist (params, "fillComplete parameters");
6298  }
6299 
6300  RCP<const map_type> A_rowMap = A.getRowMap ();
6301  RCP<const map_type> B_rowMap = B.getRowMap ();
6302  RCP<const map_type> C_rowMap = B_rowMap; // see discussion in documentation
6303  RCP<crs_matrix_type> C; // The result matrix.
6304 
6305  // If A and B's row Maps are the same, we can compute an upper
6306  // bound on the number of entries in each row of C, before
6307  // actually computing the sum. A reasonable upper bound is the
6308  // sum of the two entry counts in each row. If we choose this as
6309  // the actual per-row upper bound, we can use static profile.
6310  if (A_rowMap->isSameAs (*B_rowMap)) {
6311  const LO localNumRows = static_cast<LO> (A_rowMap->getNodeNumElements ());
6312  ArrayRCP<size_t> C_maxNumEntriesPerRow (localNumRows, 0);
6313 
6314  // Get the number of entries in each row of A.
6315  if (alpha != ZERO) {
6316  for (LO localRow = 0; localRow < localNumRows; ++localRow) {
6317  const size_t A_numEntries = A.getNumEntriesInLocalRow (localRow);
6318  C_maxNumEntriesPerRow[localRow] += A_numEntries;
6319  }
6320  }
6321  // Get the number of entries in each row of B.
6322  if (beta != ZERO) {
6323  for (LO localRow = 0; localRow < localNumRows; ++localRow) {
6324  const size_t B_numEntries = B.getNumEntriesInLocalRow (localRow);
6325  C_maxNumEntriesPerRow[localRow] += B_numEntries;
6326  }
6327  }
6328  // Construct the result matrix C.
6329  if (constructorSublist.is_null ()) {
6330  C = rcp (new crs_matrix_type (C_rowMap, C_maxNumEntriesPerRow,
6331  StaticProfile));
6332  } else {
6333  C = rcp (new crs_matrix_type (C_rowMap, C_maxNumEntriesPerRow,
6334  StaticProfile, constructorSublist));
6335  }
6336  // Since A and B have the same row Maps, we could add them
6337  // together all at once and merge values before we call
6338  // insertGlobalValues. However, we don't really need to, since
6339  // we've already allocated enough space in each row of C for C
6340  // to do the merge itself.
6341  }
6342  else { // the row Maps of A and B are not the same
6343  // Construct the result matrix C.
6344  if (constructorSublist.is_null ()) {
6345  C = rcp (new crs_matrix_type (C_rowMap, 0, DynamicProfile));
6346  } else {
6347  C = rcp (new crs_matrix_type (C_rowMap, 0, DynamicProfile,
6348  constructorSublist));
6349  }
6350  }
6351 
6352 #ifdef HAVE_TPETRA_DEBUG
6353  TEUCHOS_TEST_FOR_EXCEPTION(C.is_null (), std::logic_error,
6354  "Tpetra::RowMatrix::add: C should not be null at this point. "
6355  "Please report this bug to the Tpetra developers.");
6356 #endif // HAVE_TPETRA_DEBUG
6357  //
6358  // Compute C = alpha*A + beta*B.
6359  //
6360  Array<GO> ind;
6361  Array<Scalar> val;
6362 
6363  if (alpha != ZERO) {
6364  const LO A_localNumRows = static_cast<LO> (A_rowMap->getNodeNumElements ());
6365  for (LO localRow = 0; localRow < A_localNumRows; ++localRow) {
6366  size_t A_numEntries = A.getNumEntriesInLocalRow (localRow);
6367  const GO globalRow = A_rowMap->getGlobalElement (localRow);
6368  if (A_numEntries > static_cast<size_t> (ind.size ())) {
6369  ind.resize (A_numEntries);
6370  val.resize (A_numEntries);
6371  }
6372  ArrayView<GO> indView = ind (0, A_numEntries);
6373  ArrayView<Scalar> valView = val (0, A_numEntries);
6374  A.getGlobalRowCopy (globalRow, indView, valView, A_numEntries);
6375 
6376  if (alpha != ONE) {
6377  for (size_t k = 0; k < A_numEntries; ++k) {
6378  valView[k] *= alpha;
6379  }
6380  }
6381  C->insertGlobalValues (globalRow, indView, valView);
6382  }
6383  }
6384 
6385  if (beta != ZERO) {
6386  const LO B_localNumRows = static_cast<LO> (B_rowMap->getNodeNumElements ());
6387  for (LO localRow = 0; localRow < B_localNumRows; ++localRow) {
6388  size_t B_numEntries = B.getNumEntriesInLocalRow (localRow);
6389  const GO globalRow = B_rowMap->getGlobalElement (localRow);
6390  if (B_numEntries > static_cast<size_t> (ind.size ())) {
6391  ind.resize (B_numEntries);
6392  val.resize (B_numEntries);
6393  }
6394  ArrayView<GO> indView = ind (0, B_numEntries);
6395  ArrayView<Scalar> valView = val (0, B_numEntries);
6396  B.getGlobalRowCopy (globalRow, indView, valView, B_numEntries);
6397 
6398  if (beta != ONE) {
6399  for (size_t k = 0; k < B_numEntries; ++k) {
6400  valView[k] *= beta;
6401  }
6402  }
6403  C->insertGlobalValues (globalRow, indView, valView);
6404  }
6405  }
6406 
6407  if (callFillComplete) {
6408  if (fillCompleteSublist.is_null ()) {
6409  C->fillComplete (theDomainMap, theRangeMap);
6410  } else {
6411  C->fillComplete (theDomainMap, theRangeMap, fillCompleteSublist);
6412  }
6413  }
6414  return rcp_implicit_cast<row_matrix_type> (C);
6415  }
6416 
6417  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
6418  void
6421  const ::Tpetra::Details::Transfer<LocalOrdinal, GlobalOrdinal, Node>& rowTransfer,
6422  const Teuchos::RCP<const map_type>& domainMap,
6423  const Teuchos::RCP<const map_type>& rangeMap,
6424  const Teuchos::RCP<Teuchos::ParameterList>& params) const
6425  {
6426  using Teuchos::ArrayView;
6427  using Teuchos::Comm;
6428  using Teuchos::ParameterList;
6429  using Teuchos::RCP;
6430  typedef LocalOrdinal LO;
6431  typedef GlobalOrdinal GO;
6432  typedef node_type NT;
6434  typedef Vector<int, LO, GO, NT> IntVectorType;
6435 
6436 #ifdef HAVE_TPETRA_MMM_TIMINGS
6437  std::string label;
6438  if(!params.is_null())
6439  label = params->get("Timer Label",label);
6440  std::string prefix = std::string("Tpetra ")+ label + std::string(": ");
6441  using Teuchos::TimeMonitor;
6442  Teuchos::RCP<Teuchos::TimeMonitor> MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix + std::string("TAFC Pack-1"))));
6443 #endif
6444 
6445  // Make sure that the input argument rowTransfer is either an
6446  // Import or an Export. Import and Export are the only two
6447  // subclasses of Transfer that we defined, but users might
6448  // (unwisely, for now at least) decide to implement their own
6449  // subclasses. Exclude this possibility.
6450  const import_type* xferAsImport = dynamic_cast<const import_type*> (&rowTransfer);
6451  const export_type* xferAsExport = dynamic_cast<const export_type*> (&rowTransfer);
6452  TEUCHOS_TEST_FOR_EXCEPTION(
6453  xferAsImport == NULL && xferAsExport == NULL, std::invalid_argument,
6454  "Tpetra::CrsMatrix::transferAndFillComplete: The 'rowTransfer' input "
6455  "argument must be either an Import or an Export, and its template "
6456  "parameters must match the corresponding template parameters of the "
6457  "CrsMatrix.");
6458 
6459  // FIXME (mfh 15 May 2014) Wouldn't communication still be needed,
6460  // if the source Map is not distributed but the target Map is?
6461  const bool communication_needed = rowTransfer.getSourceMap ()->isDistributed ();
6462 
6463  //
6464  // Get the caller's parameters
6465  //
6466 
6467  bool reverseMode = false; // Are we in reverse mode?
6468  bool restrictComm = false; // Do we need to restrict the communicator?
6469  RCP<ParameterList> matrixparams; // parameters for the destination matrix
6470  if (! params.is_null ()) {
6471  reverseMode = params->get ("Reverse Mode", reverseMode);
6472  restrictComm = params->get ("Restrict Communicator", restrictComm);
6473  matrixparams = sublist (params, "CrsMatrix");
6474  }
6475 
6476  // Get the new domain and range Maps. We need some of them for
6477  // error checking, now that we have the reverseMode parameter.
6478  RCP<const map_type> MyRowMap = reverseMode ?
6479  rowTransfer.getSourceMap () : rowTransfer.getTargetMap ();
6480  RCP<const map_type> MyColMap; // create this below
6481  RCP<const map_type> MyDomainMap = ! domainMap.is_null () ?
6482  domainMap : getDomainMap ();
6483  RCP<const map_type> MyRangeMap = ! rangeMap.is_null () ?
6484  rangeMap : getRangeMap ();
6485  RCP<const map_type> BaseRowMap = MyRowMap;
6486  RCP<const map_type> BaseDomainMap = MyDomainMap;
6487 
6488  // If the user gave us a nonnull destMat, then check whether it's
6489  // "pristine." That means that it has no entries.
6490  //
6491  // FIXME (mfh 15 May 2014) If this is not true on all processes,
6492  // then this exception test may hang. It would be better to
6493  // forward an error flag to the next communication phase.
6494  if (! destMat.is_null ()) {
6495  // FIXME (mfh 15 May 2014): The classic Petra idiom for checking
6496  // whether a graph or matrix has no entries on the calling
6497  // process, is that it is neither locally nor globally indexed.
6498  // This may change eventually with the Kokkos refactor version
6499  // of Tpetra, so it would be better just to check the quantity
6500  // of interest directly. Note that with the Kokkos refactor
6501  // version of Tpetra, asking for the total number of entries in
6502  // a graph or matrix that is not fill complete might require
6503  // computation (kernel launch), since it is not thread scalable
6504  // to update a count every time an entry is inserted.
6505  const bool NewFlag = ! destMat->getGraph ()->isLocallyIndexed () &&
6506  ! destMat->getGraph ()->isGloballyIndexed ();
6507  TEUCHOS_TEST_FOR_EXCEPTION(
6508  ! NewFlag, std::invalid_argument, "Tpetra::CrsMatrix::"
6509  "transferAndFillComplete: The input argument 'destMat' is only allowed "
6510  "to be nonnull, if its graph is empty (neither locally nor globally "
6511  "indexed).");
6512  // FIXME (mfh 15 May 2014) At some point, we want to change
6513  // graphs and matrices so that their DistObject Map
6514  // (this->getMap()) may differ from their row Map. This will
6515  // make redistribution for 2-D distributions more efficient. I
6516  // hesitate to change this check, because I'm not sure how much
6517  // the code here depends on getMap() and getRowMap() being the
6518  // same.
6519  TEUCHOS_TEST_FOR_EXCEPTION(
6520  ! destMat->getRowMap ()->isSameAs (*MyRowMap), std::invalid_argument,
6521  "Tpetra::CrsMatrix::transferAndFillComplete: The (row) Map of the "
6522  "input argument 'destMat' is not the same as the (row) Map specified "
6523  "by the input argument 'rowTransfer'.");
6524  TEUCHOS_TEST_FOR_EXCEPTION(
6525  ! destMat->checkSizes (*this), std::invalid_argument,
6526  "Tpetra::CrsMatrix::transferAndFillComplete: You provided a nonnull "
6527  "destination matrix, but checkSizes() indicates that it is not a legal "
6528  "legal target for redistribution from the source matrix (*this). This "
6529  "may mean that they do not have the same dimensions.");
6530  }
6531 
6532  // If forward mode (the default), then *this's (row) Map must be
6533  // the same as the source Map of the Transfer. If reverse mode,
6534  // then *this's (row) Map must be the same as the target Map of
6535  // the Transfer.
6536  //
6537  // FIXME (mfh 15 May 2014) At some point, we want to change graphs
6538  // and matrices so that their DistObject Map (this->getMap()) may
6539  // differ from their row Map. This will make redistribution for
6540  // 2-D distributions more efficient. I hesitate to change this
6541  // check, because I'm not sure how much the code here depends on
6542  // getMap() and getRowMap() being the same.
6543  TEUCHOS_TEST_FOR_EXCEPTION(
6544  ! (reverseMode || getRowMap ()->isSameAs (*rowTransfer.getSourceMap ())),
6545  std::invalid_argument, "Tpetra::CrsMatrix::transferAndFillComplete: "
6546  "rowTransfer->getSourceMap() must match this->getRowMap() in forward mode.");
6547  TEUCHOS_TEST_FOR_EXCEPTION(
6548  ! (! reverseMode || getRowMap ()->isSameAs (*rowTransfer.getTargetMap ())),
6549  std::invalid_argument, "Tpetra::CrsMatrix::transferAndFillComplete: "
6550  "rowTransfer->getTargetMap() must match this->getRowMap() in reverse mode.");
6551 
6552  // The basic algorithm here is:
6553  //
6554  // 1. Call the moral equivalent of "distor.do" to handle the import.
6555  // 2. Copy all the Imported and Copy/Permuted data into the raw
6556  // CrsMatrix / CrsGraphData pointers, still using GIDs.
6557  // 3. Call an optimized version of MakeColMap that avoids the
6558  // Directory lookups (since the importer knows who owns all the
6559  // GIDs) AND reindexes to LIDs.
6560  // 4. Call expertStaticFillComplete()
6561 
6562  // Get information from the Importer
6563  const size_t NumSameIDs = rowTransfer.getNumSameIDs();
6564  ArrayView<const LO> ExportLIDs = reverseMode ?
6565  rowTransfer.getRemoteLIDs () : rowTransfer.getExportLIDs ();
6566  ArrayView<const LO> RemoteLIDs = reverseMode ?
6567  rowTransfer.getExportLIDs () : rowTransfer.getRemoteLIDs ();
6568  ArrayView<const LO> PermuteToLIDs = reverseMode ?
6569  rowTransfer.getPermuteFromLIDs () : rowTransfer.getPermuteToLIDs ();
6570  ArrayView<const LO> PermuteFromLIDs = reverseMode ?
6571  rowTransfer.getPermuteToLIDs () : rowTransfer.getPermuteFromLIDs ();
6572  Distributor& Distor = rowTransfer.getDistributor ();
6573 
6574  // Owning PIDs
6575  Teuchos::Array<int> SourcePids;
6576  Teuchos::Array<int> TargetPids;
6577  int MyPID = getComm ()->getRank ();
6578 
6579  // Temp variables for sub-communicators
6580  RCP<const map_type> ReducedRowMap, ReducedColMap,
6581  ReducedDomainMap, ReducedRangeMap;
6582  RCP<const Comm<int> > ReducedComm;
6583 
6584  // If the user gave us a null destMat, then construct the new
6585  // destination matrix. We will replace its column Map later.
6586  if (destMat.is_null ()) {
6587  destMat = rcp (new this_type (MyRowMap, 0, StaticProfile, matrixparams));
6588  }
6589 
6590  /***************************************************/
6591  /***** 1) First communicator restriction phase ****/
6592  /***************************************************/
6593  if (restrictComm) {
6594  ReducedRowMap = MyRowMap->removeEmptyProcesses ();
6595  ReducedComm = ReducedRowMap.is_null () ?
6596  Teuchos::null :
6597  ReducedRowMap->getComm ();
6598  destMat->removeEmptyProcessesInPlace (ReducedRowMap);
6599 
6600  ReducedDomainMap = MyRowMap.getRawPtr () == MyDomainMap.getRawPtr () ?
6601  ReducedRowMap :
6602  MyDomainMap->replaceCommWithSubset (ReducedComm);
6603  ReducedRangeMap = MyRowMap.getRawPtr () == MyRangeMap.getRawPtr () ?
6604  ReducedRowMap :
6605  MyRangeMap->replaceCommWithSubset (ReducedComm);
6606 
6607  // Reset the "my" maps
6608  MyRowMap = ReducedRowMap;
6609  MyDomainMap = ReducedDomainMap;
6610  MyRangeMap = ReducedRangeMap;
6611 
6612  // Update my PID, if we've restricted the communicator
6613  if (! ReducedComm.is_null ()) {
6614  MyPID = ReducedComm->getRank ();
6615  }
6616  else {
6617  MyPID = -2; // For debugging
6618  }
6619  }
6620  else {
6621  ReducedComm = MyRowMap->getComm ();
6622  }
6623 
6624  /***************************************************/
6625  /***** 2) From Tpera::DistObject::doTransfer() ****/
6626  /***************************************************/
6627 #ifdef HAVE_TPETRA_MMM_TIMINGS
6628  MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix + std::string("TAFC ImportSetup"))));
6629 #endif
6630  // Get the owning PIDs
6631  RCP<const import_type> MyImporter = getGraph ()->getImporter ();
6632 
6633  if (! restrictComm && ! MyImporter.is_null () &&
6634  BaseDomainMap->isSameAs (*getDomainMap ())) {
6635  // Same domain map as source matrix
6636  //
6637  // NOTE: This won't work for restrictComm (because the Import
6638  // doesn't know the restricted PIDs), though writing an
6639  // optimized version for that case would be easy (Import an
6640  // IntVector of the new PIDs). Might want to add this later.
6641  Import_Util::getPids (*MyImporter, SourcePids, false);
6642  }
6643  else if (MyImporter.is_null () && BaseDomainMap->isSameAs (*getDomainMap ())) {
6644  // Matrix has no off-process entries
6645  SourcePids.resize (getColMap ()->getNodeNumElements ());
6646  SourcePids.assign (getColMap ()->getNodeNumElements (), MyPID);
6647  }
6648  else if (BaseDomainMap->isSameAs (*BaseRowMap) &&
6649  getDomainMap ()->isSameAs (*getRowMap ())) {
6650  // We can use the rowTransfer + SourceMatrix's Import to find out who owns what.
6651  IntVectorType TargetRow_pids (domainMap);
6652  IntVectorType SourceRow_pids (getRowMap ());
6653  IntVectorType SourceCol_pids (getColMap ());
6654 
6655  TargetRow_pids.putScalar (MyPID);
6656  if (! reverseMode && xferAsImport != NULL) {
6657  SourceRow_pids.doExport (TargetRow_pids, *xferAsImport, INSERT);
6658  }
6659  else if (reverseMode && xferAsExport != NULL) {
6660  SourceRow_pids.doExport (TargetRow_pids, *xferAsExport, INSERT);
6661  }
6662  else if (! reverseMode && xferAsExport != NULL) {
6663  SourceRow_pids.doImport (TargetRow_pids, *xferAsExport, INSERT);
6664  }
6665  else if (reverseMode && xferAsImport != NULL) {
6666  SourceRow_pids.doImport (TargetRow_pids, *xferAsImport, INSERT);
6667  }
6668  else {
6669  TEUCHOS_TEST_FOR_EXCEPTION(
6670  true, std::logic_error, "Tpetra::CrsMatrix::"
6671  "transferAndFillComplete: Should never get here! "
6672  "Please report this bug to a Tpetra developer.");
6673  }
6674  SourceCol_pids.doImport (SourceRow_pids, *MyImporter, INSERT);
6675  SourcePids.resize (getColMap ()->getNodeNumElements ());
6676  SourceCol_pids.get1dCopy (SourcePids ());
6677  }
6678  else {
6679  TEUCHOS_TEST_FOR_EXCEPTION(
6680  true, std::invalid_argument, "Tpetra::CrsMatrix::"
6681  "transferAndFillComplete: This method only allows either domainMap == "
6682  "getDomainMap (), or (domainMap == rowTransfer.getTargetMap () and "
6683  "getDomainMap () == getRowMap ()).");
6684  }
6685 #ifdef HAVE_TPETRA_MMM_TIMINGS
6686  MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix + std::string("TAFC Pack-2"))));
6687 #endif
6688 
6689  // Tpetra-specific stuff
6690  //
6691  // FIXME (mfh 15 May 2014) This should work fine if CrsMatrix
6692  // inherits from DistObject (in which case all arrays that get
6693  // resized here are Teuchos::Array), but it won't work if
6694  // CrsMatrix inherits from DistObjectKA (in which case all arrays
6695  // that get resized here are Kokkos::View). In the latter case,
6696  // imports_ and numExportPacketsPerLID_ each have only a device
6697  // view, but numImportPacketsPerLID_ has a device view and a host
6698  // view (host_numImportPacketsPerLID_).
6699  //
6700  // Currently, CrsMatrix inherits from DistObject, not
6701  // DistObjectKA, so the code below should be fine for the Kokkos
6702  // refactor version of CrsMatrix.
6703  //
6704  // For this and for all other cases in this function that want to
6705  // resize the DistObject's communication arrays, it would make
6706  // sense to give DistObject (and DistObjectKA) methods for
6707  // resizing that don't expose the details of whether these are
6708  // Teuchos::Array or Kokkos::View.
6709  size_t constantNumPackets = destMat->constantNumberOfPackets ();
6710  if (constantNumPackets == 0) {
6711  destMat->numExportPacketsPerLID_old_.resize (ExportLIDs.size ());
6712  destMat->numImportPacketsPerLID_old_.resize (RemoteLIDs.size ());
6713  }
6714  else {
6715  // There are a constant number of packets per element. We
6716  // already know (from the number of "remote" (incoming)
6717  // elements) how many incoming elements we expect, so we can
6718  // resize the buffer accordingly.
6719  const size_t rbufLen = RemoteLIDs.size() * constantNumPackets;
6720  if (static_cast<size_t> (destMat->imports_old_.size ()) != rbufLen) {
6721  destMat->imports_old_.resize (rbufLen);
6722  }
6723  }
6724 
6725  // Pack & Prepare w/ owning PIDs
6726  //
6727  // FIXME (mfh 15 May 2014) This should work fine if CrsMatrix
6728  // inherits from DistObject (in which case all arrays that get
6729  // passed in here are Teuchos::Array), but it won't work if
6730  // CrsMatrix inherits from DistObjectKA (in which case all arrays
6731  // that get passed in here are Kokkos::View). In the latter case,
6732  // exports_ and numExportPacketsPerLID_ each have only a device
6733  // view.
6734  //
6735  // Currently, CrsMatrix inherits from DistObject, not
6736  // DistObjectKA, so the code below should be fine for the Kokkos
6737  // refactor version of CrsMatrix.
6738 #ifdef HAVE_TPETRA_DEBUG
6739  {
6740  using Teuchos::outArg;
6741  using Teuchos::REDUCE_MAX;
6742  using Teuchos::reduceAll;
6743  using std::cerr;
6744  using std::endl;
6745  RCP<const Teuchos::Comm<int> > comm = this->getComm ();
6746  const int myRank = comm->getRank ();
6747  const int numProcs = comm->getSize ();
6748 
6749  std::ostringstream os;
6750  int lclErr = 0;
6751  try {
6752  Import_Util::packAndPrepareWithOwningPIDs (*this, ExportLIDs,
6753  destMat->exports_old_,
6754  destMat->numExportPacketsPerLID_old_ (),
6755  constantNumPackets, Distor,
6756  SourcePids);
6757  }
6758  catch (std::exception& e) {
6759  os << "Proc " << myRank << ": " << e.what ();
6760  lclErr = 1;
6761  }
6762  int gblErr = 0;
6763  if (! comm.is_null ()) {
6764  reduceAll<int, int> (*comm, REDUCE_MAX, lclErr, outArg (gblErr));
6765  }
6766  if (gblErr != 0) {
6767  if (myRank == 0) {
6768  cerr << "packAndPrepareWithOwningPIDs threw an exception: " << endl;
6769  }
6770  std::ostringstream err;
6771  for (int r = 0; r < numProcs; ++r) {
6772  if (r == myRank && lclErr != 0) {
6773  cerr << os.str () << endl;
6774  }
6775  comm->barrier ();
6776  comm->barrier ();
6777  comm->barrier ();
6778  }
6779 
6780  TEUCHOS_TEST_FOR_EXCEPTION(
6781  true, std::logic_error, "packAndPrepareWithOwningPIDs threw an "
6782  "exception.");
6783  }
6784  }
6785 
6786 #else
6787  Import_Util::packAndPrepareWithOwningPIDs (*this, ExportLIDs,
6788  destMat->exports_old_,
6789  destMat->numExportPacketsPerLID_old_ (),
6790  constantNumPackets, Distor,
6791  SourcePids);
6792 #endif // HAVE_TPETRA_DEBUG
6793 
6794  // Do the exchange of remote data.
6795  //
6796  // FIXME (mfh 15 May 2014) This should work fine if CrsMatrix
6797  // inherits from DistObject (in which case all arrays that get
6798  // passed in here are Teuchos::Array), but it won't work if
6799  // CrsMatrix inherits from DistObjectKA (in which case all arrays
6800  // that get passed in here are Kokkos::View).
6801  //
6802  // In the latter case, imports_, exports_, and
6803  // numExportPacketsPerLID_ each have only a device view.
6804  // numImportPacketsPerLIDs_ is a device view, and also has a host
6805  // view (host_numImportPacketsPerLID_).
6806 #ifdef HAVE_TPETRA_MMM_TIMINGS
6807  MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix + std::string("TAFC Transfer"))));
6808 #endif
6809 
6810  if (communication_needed) {
6811  if (reverseMode) {
6812  if (constantNumPackets == 0) { // variable number of packets per LID
6813  Distor.doReversePostsAndWaits (destMat->numExportPacketsPerLID_old_ ().getConst (), 1,
6814  destMat->numImportPacketsPerLID_old_ ());
6815  size_t totalImportPackets = 0;
6816  for (Array_size_type i = 0; i < destMat->numImportPacketsPerLID_old_.size (); ++i) {
6817  totalImportPackets += destMat->numImportPacketsPerLID_old_[i];
6818  }
6819  destMat->imports_old_.resize (totalImportPackets);
6820  Distor.doReversePostsAndWaits (destMat->exports_old_ ().getConst (),
6821  destMat->numExportPacketsPerLID_old_ (),
6822  destMat->imports_old_ (),
6823  destMat->numImportPacketsPerLID_old_ ());
6824  }
6825  else { // constant number of packets per LID
6826  Distor.doReversePostsAndWaits (destMat->exports_old_ ().getConst (),
6827  constantNumPackets,
6828  destMat->imports_old_ ());
6829  }
6830  }
6831  else { // forward mode (the default)
6832  if (constantNumPackets == 0) { // variable number of packets per LID
6833  Distor.doPostsAndWaits (destMat->numExportPacketsPerLID_old_ ().getConst (), 1,
6834  destMat->numImportPacketsPerLID_old_ ());
6835  size_t totalImportPackets = 0;
6836  for (Array_size_type i = 0; i < destMat->numImportPacketsPerLID_old_.size (); ++i) {
6837  totalImportPackets += destMat->numImportPacketsPerLID_old_[i];
6838  }
6839  destMat->imports_old_.resize (totalImportPackets);
6840  Distor.doPostsAndWaits (destMat->exports_old_ ().getConst (),
6841  destMat->numExportPacketsPerLID_old_ (),
6842  destMat->imports_old_ (),
6843  destMat->numImportPacketsPerLID_old_ ());
6844  }
6845  else { // constant number of packets per LID
6846  Distor.doPostsAndWaits (destMat->exports_old_ ().getConst (),
6847  constantNumPackets,
6848  destMat->imports_old_ ());
6849  }
6850  }
6851  }
6852 
6853  /*********************************************************************/
6854  /**** 3) Copy all of the Same/Permute/Remote data into CSR_arrays ****/
6855  /*********************************************************************/
6856 
6857  // FIXME (mfh 15 May 2014) This should work fine if CrsMatrix
6858  // inherits from DistObject (in which case all arrays that get
6859  // passed in here are Teuchos::Array), but it won't work if
6860  // CrsMatrix inherits from DistObjectKA (in which case all arrays
6861  // that get passed in here are Kokkos::View).
6862  //
6863  // In the latter case, imports_ only has a device view.
6864  // numImportPacketsPerLIDs_ is a device view, and also has a host
6865  // view (host_numImportPacketsPerLID_).
6866 #ifdef HAVE_TPETRA_MMM_TIMINGS
6867  MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix + std::string("TAFC Unpack-1"))));
6868 #endif
6869  size_t mynnz =
6870  Import_Util::unpackAndCombineWithOwningPIDsCount (*this, RemoteLIDs,
6871  destMat->imports_old_ (),
6872  destMat->numImportPacketsPerLID_old_ (),
6873  constantNumPackets, Distor, INSERT,
6874  NumSameIDs, PermuteToLIDs,
6875  PermuteFromLIDs);
6876  size_t N = BaseRowMap->getNodeNumElements ();
6877 
6878  // Allocations
6879  ArrayRCP<size_t> CSR_rowptr(N+1);
6880  ArrayRCP<GO> CSR_colind_GID;
6881  ArrayRCP<LO> CSR_colind_LID;
6882  ArrayRCP<Scalar> CSR_vals;
6883  CSR_colind_GID.resize (mynnz);
6884  CSR_vals.resize (mynnz);
6885 
6886  // If LO and GO are the same, we can reuse memory when
6887  // converting the column indices from global to local indices.
6888  if (typeid (LO) == typeid (GO)) {
6889  CSR_colind_LID = Teuchos::arcp_reinterpret_cast<LO> (CSR_colind_GID);
6890  }
6891  else {
6892  CSR_colind_LID.resize (mynnz);
6893  }
6894 
6895  // FIXME (mfh 15 May 2014) This should work fine if CrsMatrix
6896  // inherits from DistObject (in which case all arrays that get
6897  // passed in here are Teuchos::Array), but it won't work if
6898  // CrsMatrix inherits from DistObjectKA (in which case all arrays
6899  // that get passed in here are Kokkos::View).
6900  //
6901  // In the latter case, imports_ only has a device view.
6902  // numImportPacketsPerLIDs_ is a device view, and also has a host
6903  // view (host_numImportPacketsPerLID_).
6904  //
6905  // FIXME (mfh 15 May 2014) Why can't we abstract this out as an
6906  // unpackAndCombine method on a "CrsArrays" object? This passing
6907  // in a huge list of arrays is icky. Can't we have a bit of an
6908  // abstraction? Implementing a concrete DistObject subclass only
6909  // takes five methods.
6910  Import_Util::unpackAndCombineIntoCrsArrays (*this, RemoteLIDs, destMat->imports_old_ (),
6911  destMat->numImportPacketsPerLID_old_ (),
6912  constantNumPackets, Distor, INSERT, NumSameIDs,
6913  PermuteToLIDs, PermuteFromLIDs, N, mynnz, MyPID,
6914  CSR_rowptr (), CSR_colind_GID (), CSR_vals (),
6915  SourcePids (), TargetPids);
6916 
6917  /**************************************************************/
6918  /**** 4) Call Optimized MakeColMap w/ no Directory Lookups ****/
6919  /**************************************************************/
6920 #ifdef HAVE_TPETRA_MMM_TIMINGS
6921  MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix + std::string("TAFC Unpack-2"))));
6922 #endif
6923  // Call an optimized version of makeColMap that avoids the
6924  // Directory lookups (since the Import object knows who owns all
6925  // the GIDs).
6926  Teuchos::Array<int> RemotePids;
6927  Import_Util::lowCommunicationMakeColMapAndReindex (CSR_rowptr (),
6928  CSR_colind_LID (),
6929  CSR_colind_GID (),
6930  BaseDomainMap,
6931  TargetPids, RemotePids,
6932  MyColMap);
6933 
6934  /*******************************************************/
6935  /**** 4) Second communicator restriction phase ****/
6936  /*******************************************************/
6937  if (restrictComm) {
6938  ReducedColMap = (MyRowMap.getRawPtr () == MyColMap.getRawPtr ()) ?
6939  ReducedRowMap :
6940  MyColMap->replaceCommWithSubset (ReducedComm);
6941  MyColMap = ReducedColMap; // Reset the "my" maps
6942  }
6943 
6944  // Replace the col map
6945  destMat->replaceColMap (MyColMap);
6946 
6947  // Short circuit if the processor is no longer in the communicator
6948  //
6949  // NOTE: Epetra replaces modifies all "removed" processes so they
6950  // have a dummy (serial) Map that doesn't touch the original
6951  // communicator. Duplicating that here might be a good idea.
6952  if (ReducedComm.is_null ()) {
6953  return;
6954  }
6955 
6956  /***************************************************/
6957  /**** 5) Sort ****/
6958  /***************************************************/
6959 #ifdef HAVE_TPETRA_MMM_TIMINGS
6960  MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix + std::string("TAFC Unpack-3"))));
6961 #endif
6962  Import_Util::sortCrsEntries (CSR_rowptr (),
6963  CSR_colind_LID (),
6964  CSR_vals ());
6965  if ((! reverseMode && xferAsImport != NULL) ||
6966  (reverseMode && xferAsExport != NULL)) {
6967  Import_Util::sortCrsEntries (CSR_rowptr (),
6968  CSR_colind_LID (),
6969  CSR_vals ());
6970  }
6971  else if ((! reverseMode && xferAsExport != NULL) ||
6972  (reverseMode && xferAsImport != NULL)) {
6973  Import_Util::sortAndMergeCrsEntries (CSR_rowptr (),
6974  CSR_colind_LID (),
6975  CSR_vals ());
6976  if (CSR_rowptr[N] != mynnz) {
6977  CSR_colind_LID.resize (CSR_rowptr[N]);
6978  CSR_vals.resize (CSR_rowptr[N]);
6979  }
6980  }
6981  else {
6982  TEUCHOS_TEST_FOR_EXCEPTION(
6983  true, std::logic_error, "Tpetra::CrsMatrix::"
6984  "transferAndFillComplete: Should never get here! "
6985  "Please report this bug to a Tpetra developer.");
6986  }
6987  /***************************************************/
6988  /**** 6) Reset the colmap and the arrays ****/
6989  /***************************************************/
6990 
6991  // Call constructor for the new matrix (restricted as needed)
6992  //
6993  // NOTE (mfh 15 May 2014) This should work fine for the Kokkos
6994  // refactor version of CrsMatrix, though it reserves the right to
6995  // make a deep copy of the arrays.
6996  destMat->setAllValues (CSR_rowptr, CSR_colind_LID, CSR_vals);
6997 
6998  /***************************************************/
6999  /**** 7) Build Importer & Call ESFC ****/
7000  /***************************************************/
7001  // Pre-build the importer using the existing PIDs
7002  Teuchos::ParameterList esfc_params;
7003 #ifdef HAVE_TPETRA_MMM_TIMINGS
7004  MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix + std::string("TAFC CreateImporter"))));
7005 #endif
7006  RCP<import_type> MyImport = rcp (new import_type (MyDomainMap, MyColMap, RemotePids));
7007 #ifdef HAVE_TPETRA_MMM_TIMINGS
7008  MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix + std::string("TAFC ESFC"))));
7009 
7010  esfc_params.set("Timer Label",prefix + std::string("TAFC"));
7011 #endif
7012 
7013  destMat->expertStaticFillComplete (MyDomainMap, MyRangeMap, MyImport,Teuchos::null,rcp(&esfc_params,false));
7014  }
7015 
7016  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
7017  void
7020  const import_type& importer,
7021  const Teuchos::RCP<const map_type>& domainMap,
7022  const Teuchos::RCP<const map_type>& rangeMap,
7023  const Teuchos::RCP<Teuchos::ParameterList>& params) const
7024  {
7025  transferAndFillComplete (destMatrix, importer, domainMap, rangeMap, params);
7026  }
7027 
7028 
7029  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
7030  void
7033  const export_type& exporter,
7034  const Teuchos::RCP<const map_type>& domainMap,
7035  const Teuchos::RCP<const map_type>& rangeMap,
7036  const Teuchos::RCP<Teuchos::ParameterList>& params) const
7037  {
7038  transferAndFillComplete (destMatrix, exporter, domainMap, rangeMap, params);
7039  }
7040 
7041 } // namespace Tpetra
7042 
7043 //
7044 // Explicit instantiation macro
7045 //
7046 // Must be expanded from within the Tpetra namespace!
7047 //
7048 
7049 #define TPETRA_CRSMATRIX_MATRIX_INSTANT(SCALAR,LO,GO,NODE) \
7050  \
7051  template class CrsMatrix< SCALAR , LO , GO , NODE >; \
7052  template RCP< CrsMatrix< SCALAR , LO , GO , NODE > > \
7053  CrsMatrix< SCALAR , LO , GO , NODE >::convert< SCALAR > () const;
7054 
7055 #define TPETRA_CRSMATRIX_CONVERT_INSTANT(SO,SI,LO,GO,NODE) \
7056  \
7057  template RCP< CrsMatrix< SO , LO , GO , NODE > > \
7058  CrsMatrix< SI , LO , GO , NODE >::convert< SO > () const;
7059 
7060 #define TPETRA_CRSMATRIX_IMPORT_AND_FILL_COMPLETE_INSTANT(SCALAR, LO, GO, NODE) \
7061  template<> \
7062  RCP<CrsMatrix<SCALAR, LO, GO, NODE> > \
7063  importAndFillCompleteCrsMatrix (const RCP<const CrsMatrix<SCALAR, LO, GO, NODE> >& sourceMatrix, \
7064  const Import<CrsMatrix<SCALAR, LO, GO, NODE>::local_ordinal_type, \
7065  CrsMatrix<SCALAR, LO, GO, NODE>::global_ordinal_type, \
7066  CrsMatrix<SCALAR, LO, GO, NODE>::node_type>& importer, \
7067  const RCP<const Map<CrsMatrix<SCALAR, LO, GO, NODE>::local_ordinal_type, \
7068  CrsMatrix<SCALAR, LO, GO, NODE>::global_ordinal_type, \
7069  CrsMatrix<SCALAR, LO, GO, NODE>::node_type> >& domainMap, \
7070  const RCP<const Map<CrsMatrix<SCALAR, LO, GO, NODE>::local_ordinal_type, \
7071  CrsMatrix<SCALAR, LO, GO, NODE>::global_ordinal_type, \
7072  CrsMatrix<SCALAR, LO, GO, NODE>::node_type> >& rangeMap, \
7073  const RCP<Teuchos::ParameterList>& params);
7074 
7075 #define TPETRA_CRSMATRIX_EXPORT_AND_FILL_COMPLETE_INSTANT(SCALAR, LO, GO, NODE) \
7076  template<> \
7077  RCP<CrsMatrix<SCALAR, LO, GO, NODE> > \
7078  exportAndFillCompleteCrsMatrix (const RCP<const CrsMatrix<SCALAR, LO, GO, NODE> >& sourceMatrix, \
7079  const Export<CrsMatrix<SCALAR, LO, GO, NODE>::local_ordinal_type, \
7080  CrsMatrix<SCALAR, LO, GO, NODE>::global_ordinal_type, \
7081  CrsMatrix<SCALAR, LO, GO, NODE>::node_type>& exporter, \
7082  const RCP<const Map<CrsMatrix<SCALAR, LO, GO, NODE>::local_ordinal_type, \
7083  CrsMatrix<SCALAR, LO, GO, NODE>::global_ordinal_type, \
7084  CrsMatrix<SCALAR, LO, GO, NODE>::node_type> >& domainMap, \
7085  const RCP<const Map<CrsMatrix<SCALAR, LO, GO, NODE>::local_ordinal_type, \
7086  CrsMatrix<SCALAR, LO, GO, NODE>::global_ordinal_type, \
7087  CrsMatrix<SCALAR, LO, GO, NODE>::node_type> >& rangeMap, \
7088  const RCP<Teuchos::ParameterList>& params);
7089 
7090 #define TPETRA_CRSMATRIX_INSTANT(SCALAR, LO, GO ,NODE) \
7091  TPETRA_CRSMATRIX_MATRIX_INSTANT(SCALAR, LO, GO, NODE) \
7092  TPETRA_CRSMATRIX_IMPORT_AND_FILL_COMPLETE_INSTANT(SCALAR, LO, GO, NODE) \
7093  TPETRA_CRSMATRIX_EXPORT_AND_FILL_COMPLETE_INSTANT(SCALAR, LO, GO, NODE)
7094 
7095 #endif // TPETRA_CRSMATRIX_DEF_HPP
void scale(const Scalar &alpha)
Scale in place: this = alpha*this.
LocalOrdinal sumIntoLocalValues(const LocalOrdinal localRow, const ArrayView< const LocalOrdinal > &cols, const ArrayView< const Scalar > &vals)
Sum into one or more sparse matrix entries, using local indices.
Kokkos::CrsMatrix< impl_scalar_type, LocalOrdinal, execution_space, void, typename local_graph_type::size_type > local_matrix_type
The specialization of Kokkos::CrsMatrix that represents the part of the sparse matrix on each MPI pro...
std::string description() const
A one-line description of this object.
Communication plan for data redistribution from a uniquely-owned to a (possibly) multiply-owned distr...
void reindexColumns(crs_graph_type *const graph, const Teuchos::RCP< const map_type > &newColMap, const Teuchos::RCP< const import_type > &newImport=Teuchos::null, const bool sortEachRow=true)
Reindex the column indices in place, and replace the column Map. Optionally, replace the Import objec...
Namespace Tpetra contains the class and methods constituting the Tpetra library.
Teuchos::RCP< node_type > getNode() const
The Kokkos Node instance.
Functor for the the ABSMAX CombineMode of Import and Export operations.
Sparse matrix that presents a row-oriented interface that lets users read or modify entries...
Teuchos::ArrayView< const impl_scalar_type > getView(RowInfo rowinfo) const
Constant view of all entries (including extra space) in the given row.
void getLocalRowCopy(LocalOrdinal localRow, const Teuchos::ArrayView< LocalOrdinal > &colInds, const Teuchos::ArrayView< Scalar > &vals, size_t &numEntries) const
Fill given arrays with a deep copy of the locally owned entries of the matrix in a given row...
virtual bool isLocallyIndexed() const =0
Whether matrix indices are locally indexed.
void checkInternalState() const
Check that this object&#39;s state is sane; throw if it&#39;s not.
void setAllToScalar(const Scalar &alpha)
Set all matrix entries equal to alpha.
Teuchos::RCP< const crs_graph_type > getCrsGraph() const
This matrix&#39;s graph, as a CrsGraph.
bool isNodeGlobalElement(GlobalOrdinal globalIndex) const
Whether the given global index is owned by this Map on the calling process.
Definition of the Tpetra::CrsGraph class.
void sortEntries()
Sort the entries of each row by their column indices.
Teuchos::RCP< const RowGraph< LocalOrdinal, GlobalOrdinal, Node > > getGraph() const
This matrix&#39;s graph, as a RowGraph.
virtual void copyAndPermute(const SrcDistObject &source, size_t numSameIDs, const Teuchos::ArrayView< const LocalOrdinal > &permuteToLIDs, const Teuchos::ArrayView< const LocalOrdinal > &permuteFromLIDs)
Perform copies and permutations that are local to this process.
void gaussSeidel(const MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node, classic > &B, MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node, classic > &X, const MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node, classic > &D, const Scalar &dampingFactor, const ESweepDirection direction, const int numSweeps) const
"Hybrid" Jacobi + (Gauss-Seidel or SOR) on .
Teuchos::RCP< const map_type > getRowMap() const
The Map that describes the row distribution in this matrix.
LookupStatus
Return status of Map remote index lookup (getRemoteIndexList()).
LocalOrdinal sumIntoGlobalValues(const GlobalOrdinal globalRow, const ArrayView< const GlobalOrdinal > &cols, const ArrayView< const Scalar > &vals)
Sum into one or more sparse matrix entries, using global indices.
void rightScale(const Vector< Scalar, LocalOrdinal, GlobalOrdinal, Node, classic > &x)
Details::EStorageStatus storageStatus_
Status of the matrix&#39;s storage, when not in a fill-complete state.
void getGlobalRowView(GlobalOrdinal GlobalRow, Teuchos::ArrayView< const GlobalOrdinal > &indices, Teuchos::ArrayView< const Scalar > &values) const
Get a constant, nonpersisting view of a row of this matrix, using global row and column indices...
Teuchos::RCP< const map_type > getColMap() const
The Map that describes the column distribution in this matrix.
One or more distributed dense vectors.
virtual size_t getNumEntriesInLocalRow(LocalOrdinal localRow) const =0
The current number of entries on the calling process in the specified local row.
void mergeRedundantEntries()
Merge entries in each row with the same column indices.
bool isLocallyIndexed() const
Whether the matrix is locally indexed on the calling process.
virtual bool supportsRowViews() const
Return true if getLocalRowView() and getGlobalRowView() are valid for this object.
void deep_copy(MultiVector< DS, DL, DG, DN, dstClassic > &dst, const MultiVector< SS, SL, SG, SN, srcClassic > &src)
Copy the contents of the MultiVector src into dst.
bool isUpperTriangular() const
Indicates whether the matrix is upper triangular.
void replaceColMap(const Teuchos::RCP< const map_type > &newColMap)
Replace the matrix&#39;s column Map with the given Map.
virtual bool checkSizes(const SrcDistObject &source)
Compare the source and target (this) objects for compatibility.
void setAllValues(const typename local_matrix_type::row_map_type &rowPointers, const typename local_graph_type::entries_type::non_const_type &columnIndices, const typename local_matrix_type::values_type &values)
Sets the 1D pointer arrays of the graph.
size_t getNodeNumRows() const
The number of matrix rows owned by the calling process.
Node node_type
This class&#39; fourth template parameter; the Kokkos device type.
bool fillComplete_
Whether the matrix is fill complete.
bool isConstantStride() const
Whether this multivector has constant stride between columns.
virtual Teuchos::RCP< const Map< LocalOrdinal, GlobalOrdinal, Node > > getRangeMap() const =0
The Map associated with the range of this operator, which must be compatible with Y...
GlobalOrdinal getIndexBase() const
The index base for global indices for this matrix.
void getGlobalRowCopy(GlobalOrdinal GlobalRow, const Teuchos::ArrayView< GlobalOrdinal > &Indices, const Teuchos::ArrayView< Scalar > &Values, size_t &NumEntries) const
Fill given arrays with a deep copy of the locally owned entries of the matrix in a given row...
Allocation information for a locally owned row in a CrsGraph or CrsMatrix.
Teuchos_Ordinal Array_size_type
Size type for Teuchos Array objects.
Teuchos::RCP< CrsMatrix< T, LocalOrdinal, GlobalOrdinal, Node, classic > > convert() const
Return another CrsMatrix with the same entries, but converted to a different Scalar type T...
CrsIJV()
Default constructor.
bool isNodeLocalElement(LocalOrdinal localIndex) const
Whether the given local index is valid for this Map on the calling process.
virtual void removeEmptyProcessesInPlace(const Teuchos::RCP< const map_type > &newMap)
Remove processes owning zero rows from the Maps and their communicator.
void insertLocalValues(const LocalOrdinal localRow, const ArrayView< const LocalOrdinal > &cols, const ArrayView< const Scalar > &vals)
Insert one or more entries into the matrix, using local indices.
void getLocalRowView(LocalOrdinal LocalRow, Teuchos::ArrayView< const LocalOrdinal > &indices, Teuchos::ArrayView< const Scalar > &values) const
Get a constant, nonpersisting view of a row of this matrix, using local row and column indices...
mag_type frobNorm_
Cached Frobenius norm of the (global) matrix.
Implementation details of Tpetra.
void insertGlobalValues(const GlobalOrdinal globalRow, const Teuchos::ArrayView< const GlobalOrdinal > &cols, const Teuchos::ArrayView< const Scalar > &vals)
Insert one or more entries into the matrix, using global indices.
void reduce()
Sum values of a locally replicated multivector across all processes.
void fillLocalMatrix(const Teuchos::RCP< Teuchos::ParameterList > &params)
Fill data into the local matrix.
size_t global_size_t
Global size_t object.
virtual void getGlobalRowCopy(GlobalOrdinal GlobalRow, const Teuchos::ArrayView< GlobalOrdinal > &Indices, const Teuchos::ArrayView< Scalar > &Values, size_t &NumEntries) const =0
Get a copy of the given global row&#39;s entries.
Kokkos::StaticCrsGraph< LocalOrdinal, Kokkos::LayoutLeft, execution_space > local_graph_type
The type of the part of the sparse graph on each MPI process.
Traits class for "invalid" (flag) values of integer types that Tpetra uses as local ordinals or globa...
Insert new values that don&#39;t currently exist.
void getLocalDiagCopy(Vector< Scalar, LocalOrdinal, GlobalOrdinal, Node, classic > &diag) const
Get a copy of the diagonal entries of the matrix.
dual_view_type getDualView() const
Get the Kokkos::DualView which implements local storage.
Teuchos::RCP< MV > getRowMapMultiVector(const MV &Y_rangeMap, const bool force=false) const
Create a (or fetch a cached) row Map MultiVector.
bool isLowerTriangular() const
Indicates whether the matrix is lower triangular.
ESweepDirection
Sweep direction for Gauss-Seidel or Successive Over-Relaxation (SOR).
void allocateValues(ELocalGlobal lg, GraphAllocationStatus gas)
Allocate values (and optionally indices) using the Node.
LocalOrdinal replaceLocalValues(const LocalOrdinal localRow, const ArrayView< const LocalOrdinal > &cols, const ArrayView< const Scalar > &vals)
Replace one or more entries&#39; values, using local indices.
Teuchos::RCP< const map_type > getDomainMap() const
The domain Map of this matrix.
Communication plan for data redistribution from a (possibly) multiply-owned to a uniquely-owned distr...
Teuchos::RCP< const Teuchos::Comm< int > > getComm() const
The communicator over which the matrix is distributed.
bool isFillComplete() const
Whether the matrix is fill complete.
void scale(const Scalar &alpha)
Scale the matrix&#39;s values: this := alpha*this.
void resumeFill(const RCP< ParameterList > &params=null)
Resume operations that may change the values or structure of the matrix.
virtual Teuchos::RCP< const Map< LocalOrdinal, GlobalOrdinal, Node > > getDomainMap() const =0
The Map associated with the domain of this operator, which must be compatible with X...
#define TPETRA_ABUSE_WARNING(throw_exception_test, Exception, msg)
Handle an abuse warning, according to HAVE_TPETRA_THROW_ABUSE_WARNINGS and HAVE_TPETRA_PRINT_ABUSE_WA...
void unpackAndCombine(const Teuchos::ArrayView< const LocalOrdinal > &importLIDs, const Teuchos::ArrayView< const char > &imports, const Teuchos::ArrayView< size_t > &numPacketsPerLID, size_t constantNumPackets, Distributor &distor, CombineMode combineMode)
Unpack the imported column indices and values, and combine into matrix.
Sets up and executes a communication plan for a Tpetra DistObject.
void reorderedGaussSeidel(const MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node, classic > &B, MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node, classic > &X, const MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node, classic > &D, const Teuchos::ArrayView< LocalOrdinal > &rowIndices, const Scalar &dampingFactor, const ESweepDirection direction, const int numSweeps) const
Reordered "Hybrid" Jacobi + (Gauss-Seidel or SOR) on .
Struct representing a sparse matrix entry as an i,j,v triplet.
GlobalOrdinal getGlobalElement(LocalOrdinal localIndex) const
The global index corresponding to the given local index.
CombineMode
Rule for combining data in an Import or Export.
bool isStorageOptimized() const
Returns true if storage has been optimized.
Sum new values into existing values.
Teuchos::RCP< Node > getNode() const
Get this Map&#39;s Node object.
void exportAndFillComplete(Teuchos::RCP< CrsMatrix< Scalar, LocalOrdinal, GlobalOrdinal, Node, classic > > &destMatrix, const export_type &exporter, const Teuchos::RCP< const map_type > &domainMap=Teuchos::null, const Teuchos::RCP< const map_type > &rangeMap=Teuchos::null, const Teuchos::RCP< Teuchos::ParameterList > &params=Teuchos::null) const
Export from this to the given destination matrix, and make the result fill complete.
Teuchos::RCP< const map_type > map_
The Map over which this object is distributed.
size_t getNodeNumDiags() const
Returns the number of local diagonal entries, based on global row/column index comparisons.
Utility functions for packing and unpacking sparse matrix entries.
bool isDistributed() const
Whether this is a globally distributed object.
ProfileType getProfileType() const
Returns true if the matrix was allocated with static data structures.
virtual ~CrsMatrix()
Destructor.
Replace old value with maximum of magnitudes of old and new values.
Abstract base class for objects that can be the source of an Import or Export operation.
global_size_t getGlobalNumRows() const
Number of global elements in the row map of this matrix.
global_size_t getGlobalNumDiags() const
Returns the number of global diagonal entries, based on global row/column index comparisons.
void replaceDomainMapAndImporter(const Teuchos::RCP< const map_type > &newDomainMap, Teuchos::RCP< const import_type > &newImporter)
Replace the current domain Map and Import with the given objects.
Teuchos::RCP< MV > getColumnMapMultiVector(const MV &X_domainMap, const bool force=false) const
Create a (or fetch a cached) column Map MultiVector.
Replace existing values with new values.
#define TPETRA_EFFICIENCY_WARNING(throw_exception_test, Exception, msg)
Print or throw an efficency warning.
Binary function that returns its second argument.
void reindexColumns(const Teuchos::RCP< const map_type > &newColMap, const Teuchos::RCP< const import_type > &newImport=Teuchos::null, const bool sortIndicesInEachRow=true)
Reindex the column indices in place, and replace the column Map. Optionally, replace the Import objec...
bool hasTransposeApply() const
Whether apply() allows applying the transpose or conjugate transpose.
Scalar v
Value of matrix entry.
void computeGlobalConstants()
Compute matrix properties that require collectives.
bool isFillActive() const
Whether the matrix is not fill complete.
bool isStaticGraph() const
Indicates that the graph is static, so that new entries cannot be added to this matrix.
Replace old values with zero.
void putScalar(const Scalar &value)
Set all values in the multivector with the given value.
global_size_t getGlobalNumCols() const
The number of global columns in the matrix.
bool hasColMap() const
Indicates whether the matrix has a well-defined column map.
mag_type getFrobeniusNorm() const
Compute and return the Frobenius norm of the matrix.
Kokkos::Details::ArithTraits< impl_scalar_type >::mag_type mag_type
Type of a norm result.
LocalOrdinal replaceGlobalValues(GlobalOrdinal globalRow, const ArrayView< const GlobalOrdinal > &cols, const ArrayView< const Scalar > &vals)
Replace one or more entries&#39; values, using global indices.
LocalOrdinal getLocalElement(GlobalOrdinal globalIndex) const
The local index corresponding to the given global index.
void leftScale(const Vector< Scalar, LocalOrdinal, GlobalOrdinal, Node, classic > &x)
A distributed graph accessed by rows (adjacency lists) and stored sparsely.
Teuchos::RCP< MV > importMV_
Column Map MultiVector used in apply() and gaussSeidel().
Ordinal i
(Global) row index
Kokkos::DualView< impl_scalar_type **, Kokkos::LayoutLeft, typename execution_space::execution_space > dual_view_type
Kokkos::DualView specialization used by this class.
Describes a parallel distribution of objects over processes.
size_t getGlobalMaxNumRowEntries() const
Returns the maximum number of entries across all rows/columns on all nodes.
global_size_t getGlobalNumEntries() const
The global number of entries in this matrix.
Internal functions and macros designed for use with Tpetra::Import and Tpetra::Export objects...
A read-only, row-oriented interface to a sparse matrix.
Scalar operator()(const Scalar &x, const Scalar &y)
Return the maximum of the magnitudes (absolute values) of x and y.
std::map< GlobalOrdinal, std::pair< Teuchos::Array< GlobalOrdinal >, Teuchos::Array< Scalar > > > nonlocals_
Nonlocal data added using insertGlobalValues().
A distributed dense vector.
Stand-alone utility functions and macros.
virtual void pack(const Teuchos::ArrayView< const LocalOrdinal > &exportLIDs, Teuchos::Array< char > &exports, const Teuchos::ArrayView< size_t > &numPacketsPerLID, size_t &constantNumPackets, Distributor &distor) const
Pack this object&#39;s data for an Import or Export.
void expertStaticFillComplete(const RCP< const map_type > &domainMap, const RCP< const map_type > &rangeMap, const RCP< const import_type > &importer=Teuchos::null, const RCP< const export_type > &exporter=Teuchos::null, const RCP< ParameterList > &params=Teuchos::null)
Perform a fillComplete on a matrix that already has data.
Ordinal j
(Global) column index
void describe(Teuchos::FancyOStream &out, const Teuchos::EVerbosityLevel verbLevel=Teuchos::Describable::verbLevel_default) const
Print the object with some verbosity level to an FancyOStream object.
size_t getNumVectors() const
Number of columns in the multivector.
void doExport(const SrcDistObject &source, const Export< LocalOrdinal, GlobalOrdinal, Node > &exporter, CombineMode CM)
Export data into this object using an Export object ("forward mode").
virtual Teuchos::RCP< const map_type > getMap() const
The Map describing the parallel distribution of this object.
void globalAssemble()
Communicate nonlocal contributions to other processes.
Kokkos::Details::ArithTraits< Scalar >::val_type impl_scalar_type
The type used internally in place of Scalar.
void gaussSeidelCopy(MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node, classic > &X, const MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node, classic > &B, const MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node, classic > &D, const Scalar &dampingFactor, const ESweepDirection direction, const int numSweeps, const bool zeroInitialGuess) const
Version of gaussSeidel(), with fewer requirements on X.
size_t getNumEntriesInLocalRow(LocalOrdinal localRow) const
Returns the current number of entries on this node in the specified local row.
void applyNonTranspose(const MV &X_in, MV &Y_in, Scalar alpha, Scalar beta) const
Special case of apply() for mode == Teuchos::NO_TRANS.
Teuchos::RCP< MV > exportMV_
Row Map MultiVector used in apply().
void getLocalDiagOffsets(Teuchos::ArrayRCP< size_t > &offsets) const
Get offsets of the diagonal entries in the matrix.
void apply(const MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node, classic > &X, MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node, classic > &Y, Teuchos::ETransp mode=Teuchos::NO_TRANS, Scalar alpha=ScalarTraits< Scalar >::one(), Scalar beta=ScalarTraits< Scalar >::zero()) const
Compute a sparse matrix-MultiVector multiply.
size_t getNodeNumCols() const
The number of columns connected to the locally owned rows of this matrix.
Teuchos::ArrayView< impl_scalar_type > getViewNonConst(RowInfo rowinfo)
Nonconst view of all entries (including extra space) in the given row.
void reorderedGaussSeidelCopy(MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node, classic > &X, const MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node, classic > &B, const MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node, classic > &D, const Teuchos::ArrayView< LocalOrdinal > &rowIndices, const Scalar &dampingFactor, const ESweepDirection direction, const int numSweeps, const bool zeroInitialGuess) const
Version of reorderedGaussSeidel(), with fewer requirements on X.
size_t getNumEntriesInGlobalRow(GlobalOrdinal globalRow) const
Returns the current number of entries on this node in the specified global row.
size_t getNodeNumEntries() const
The local number of entries in this matrix.
virtual Teuchos::RCP< RowMatrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > > add(const Scalar &alpha, const RowMatrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > &A, const Scalar &beta, const Teuchos::RCP< const Map< LocalOrdinal, GlobalOrdinal, Node > > &domainMap, const Teuchos::RCP< const Map< LocalOrdinal, GlobalOrdinal, Node > > &rangeMap, const Teuchos::RCP< Teuchos::ParameterList > &params) const
Implementation of RowMatrix::add: return alpha*A + beta*this.
void clearGlobalConstants()
Clear matrix properties that require collectives.
CrsIJV(Ordinal row, Ordinal col, const Scalar &val)
Standard constructor.
bool isGloballyIndexed() const
Whether the matrix is globally indexed on the calling process.
virtual Teuchos::RCP< const Map< LocalOrdinal, GlobalOrdinal, Node > > getRowMap() const =0
The Map that describes the distribution of rows over processes.
void applyTranspose(const MV &X_in, MV &Y_in, const Teuchos::ETransp mode, Scalar alpha, Scalar beta) const
Special case of apply() for mode != Teuchos::NO_TRANS.
void fillComplete(const RCP< const map_type > &domainMap, const RCP< const map_type > &rangeMap, const RCP< ParameterList > &params=null)
Signal that data entry is complete, specifying domain and range maps.
local_matrix_type lclMatrix_
The local sparse matrix.
void importAndFillComplete(Teuchos::RCP< CrsMatrix< Scalar, LocalOrdinal, GlobalOrdinal, Node, classic > > &destMatrix, const import_type &importer, const Teuchos::RCP< const map_type > &domainMap, const Teuchos::RCP< const map_type > &rangeMap, const Teuchos::RCP< Teuchos::ParameterList > &params=Teuchos::null) const
Import from this to the given destination matrix, and make the result fill complete.
void fillLocalGraphAndMatrix(const Teuchos::RCP< Teuchos::ParameterList > &params)
Fill data into the local graph and matrix.
Teuchos::RCP< const map_type > getRangeMap() const
The range Map of this matrix.
RowInfo getRowInfo(const size_t myRow) const
Get information about the locally owned row with local index myRow.
size_t getNodeMaxNumRowEntries() const
Returns the maximum number of entries across all rows/columns on this node.