Tpetra parallel linear algebra  Version of the Day
Tpetra_DirectoryImpl_def.hpp
1 // @HEADER
2 // ***********************************************************************
3 //
4 // Tpetra: Templated Linear Algebra Services Package
5 // Copyright (2008) Sandia Corporation
6 //
7 // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
8 // the U.S. Government retains certain rights in this software.
9 //
10 // Redistribution and use in source and binary forms, with or without
11 // modification, are permitted provided that the following conditions are
12 // met:
13 //
14 // 1. Redistributions of source code must retain the above copyright
15 // notice, this list of conditions and the following disclaimer.
16 //
17 // 2. Redistributions in binary form must reproduce the above copyright
18 // notice, this list of conditions and the following disclaimer in the
19 // documentation and/or other materials provided with the distribution.
20 //
21 // 3. Neither the name of the Corporation nor the names of the
22 // contributors may be used to endorse or promote products derived from
23 // this software without specific prior written permission.
24 //
25 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
26 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
29 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
30 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
31 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
32 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
33 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
34 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
35 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36 //
37 // Questions? Contact Michael A. Heroux (maherou@sandia.gov)
38 //
39 // ************************************************************************
40 // @HEADER
41 
42 #ifndef __Tpetra_DirectoryImpl_def_hpp
43 #define __Tpetra_DirectoryImpl_def_hpp
44 
45 #include <Tpetra_DirectoryImpl_decl.hpp>
46 #include <Tpetra_Distributor.hpp>
47 #include <Tpetra_Map.hpp>
48 #include <Tpetra_TieBreak.hpp>
49 
50 #include <Tpetra_Details_FixedHashTable.hpp>
51 #include <Tpetra_HashTable.hpp>
52 
53 #include <unordered_map>
54 
55 // FIXME (mfh 16 Apr 2013) GIANT HACK BELOW
56 #ifdef HAVE_MPI
57 # include "mpi.h"
58 #endif // HAVE_MPI
59 // FIXME (mfh 16 Apr 2013) GIANT HACK ABOVE
60 
61 
62 namespace Tpetra {
63  namespace Details {
64  template<class LO, class GO, class NT>
67 
68  template<class LO, class GO, class NT>
71  getEntries (const map_type& map,
72  const Teuchos::ArrayView<const GO> &globalIDs,
73  const Teuchos::ArrayView<int> &nodeIDs,
74  const Teuchos::ArrayView<LO> &localIDs,
75  const bool computeLIDs) const
76  {
77  // Ensure that globalIDs, nodeIDs, and localIDs (if applicable)
78  // all have the same size, before modifying any output arguments.
79  TEUCHOS_TEST_FOR_EXCEPTION(nodeIDs.size() != globalIDs.size(),
80  std::invalid_argument, Teuchos::typeName(*this) << "::getEntries(): "
81  "Output arrays do not have the right sizes. nodeIDs.size() = "
82  << nodeIDs.size() << " != globalIDs.size() = " << globalIDs.size()
83  << ".");
84  TEUCHOS_TEST_FOR_EXCEPTION(
85  computeLIDs && localIDs.size() != globalIDs.size(),
86  std::invalid_argument, Teuchos::typeName(*this) << "::getEntries(): "
87  "Output array do not have the right sizes. localIDs.size() = "
88  << localIDs.size() << " != globalIDs.size() = " << globalIDs.size()
89  << ".");
90 
91  // Initially, fill nodeIDs and localIDs (if applicable) with
92  // invalid values. The "invalid" process ID is -1 (this means
93  // the same thing as MPI_ANY_SOURCE to Teuchos, so it's an
94  // "invalid" process ID); the invalid local ID comes from
95  // OrdinalTraits.
96  std::fill (nodeIDs.begin(), nodeIDs.end(), -1);
97  if (computeLIDs) {
98  std::fill (localIDs.begin(), localIDs.end(),
99  Teuchos::OrdinalTraits<LO>::invalid ());
100  }
101  // Actually do the work.
102  return this->getEntriesImpl (map, globalIDs, nodeIDs, localIDs, computeLIDs);
103  }
104 
105 
106  template<class LO, class GO, class NT>
109  numProcs_ (map.getComm ()->getSize ())
110  {}
111 
112 
113  template<class LO, class GO, class NT>
116  numProcs_ (0) // to be set later
117  {}
118 
119 
120  template<class LO, class GO, class NT>
121  bool
123  isOneToOne (const Teuchos::Comm<int>& comm) const
124  {
125  // A locally replicated Map is one-to-one only if there is no
126  // replication, that is, only if the Map's communicator only has
127  // one process.
128  return (numProcs_ == 1);
129  }
130 
131 
132  template<class LO, class GO, class NT>
133  std::string
135  {
136  std::ostringstream os;
137  os << "ReplicatedDirectory"
138  << "<" << Teuchos::TypeNameTraits<LO>::name ()
139  << ", " << Teuchos::TypeNameTraits<GO>::name ()
140  << ", " << Teuchos::TypeNameTraits<NT>::name () << ">";
141  return os.str ();
142  }
143 
144 
145  template<class LO, class GO, class NT>
148  {
149  TEUCHOS_TEST_FOR_EXCEPTION(! map.isContiguous (), std::invalid_argument,
150  Teuchos::typeName (*this) << " constructor: Map is not contiguous.");
151  TEUCHOS_TEST_FOR_EXCEPTION(! map.isUniform (), std::invalid_argument,
152  Teuchos::typeName (*this) << " constructor: Map is not uniform.");
153  }
154 
155 
156  template<class LO, class GO, class NT>
157  std::string
159  {
160  std::ostringstream os;
161  os << "ContiguousUniformDirectory"
162  << "<" << Teuchos::TypeNameTraits<LO>::name ()
163  << ", " << Teuchos::TypeNameTraits<GO>::name ()
164  << ", " << Teuchos::TypeNameTraits<NT>::name () << ">";
165  return os.str ();
166  }
167 
168 
169  template<class LO, class GO, class NT>
173  const Teuchos::ArrayView<const GO> &globalIDs,
174  const Teuchos::ArrayView<int> &nodeIDs,
175  const Teuchos::ArrayView<LO> &localIDs,
176  const bool computeLIDs) const
177  {
178  using Teuchos::Comm;
179  using Teuchos::RCP;
180  typedef typename Teuchos::ArrayView<const GO>::size_type size_type;
181  const LO invalidLid = Teuchos::OrdinalTraits<LO>::invalid ();
183 
184  RCP<const Comm<int> > comm = map.getComm ();
185  const GO g_min = map.getMinAllGlobalIndex ();
186 
187  // Let N_G be the global number of elements in the Map,
188  // and P be the number of processes in its communicator.
189  // Then, N_G = P * N_L + R = R*(N_L + 1) + (P - R)*N_L.
190  //
191  // The first R processes own N_L+1 elements.
192  // The remaining P-R processes own N_L elements.
193  //
194  // Let g be the current GID, g_min be the global minimum GID,
195  // and g_0 = g - g_min. If g is a valid GID in this Map, then
196  // g_0 is in [0, N_G - 1].
197  //
198  // If g is a valid GID in this Map and g_0 < R*(N_L + 1), then
199  // the rank of the process that owns g is floor(g_0 / (N_L +
200  // 1)), and its corresponding local index on that process is g_0
201  // mod (N_L + 1).
202  //
203  // Let g_R = g_0 - R*(N_L + 1). If g is a valid GID in this Map
204  // and g_0 >= R*(N_L + 1), then the rank of the process that
205  // owns g is then R + floor(g_R / N_L), and its corresponding
206  // local index on that process is g_R mod N_L.
207 
208  const size_type N_G =
209  static_cast<size_type> (map.getGlobalNumElements ());
210  const size_type P = static_cast<size_type> (comm->getSize ());
211  const size_type N_L = N_G / P;
212  const size_type R = N_G - N_L * P; // N_G mod P
213  const size_type N_R = R * (N_L + static_cast<size_type> (1));
214 
215 #ifdef HAVE_TPETRA_DEBUG
216  TEUCHOS_TEST_FOR_EXCEPTION(
217  N_G != P*N_L + R, std::logic_error,
218  "Tpetra::ContiguousUniformDirectory::getEntriesImpl: "
219  "N_G = " << N_G << " != P*N_L + R = " << P << "*" << N_L << " + " << R
220  << " = " << P*N_L + R << ". "
221  "Please report this bug to the Tpetra developers.");
222 #endif // HAVE_TPETRA_DEBUG
223 
224  const size_type numGids = globalIDs.size (); // for const loop bound
225  // Avoid signed/unsigned comparisons below, in case GO is
226  // unsigned. (Integer literals are generally signed.)
227  const GO ONE = static_cast<GO> (1);
228 
229  if (computeLIDs) {
230  for (size_type k = 0; k < numGids; ++k) {
231  const GO g_0 = globalIDs[k] - g_min;
232 
233  // The first test is a little strange just in case GO is
234  // unsigned. Compilers raise a warning on tests like "x <
235  // 0" if x is unsigned, but don't usually raise a warning if
236  // the expression is a bit more complicated than that.
237  if (g_0 + ONE < ONE || g_0 >= static_cast<GO> (N_G)) {
238  nodeIDs[k] = -1;
239  localIDs[k] = invalidLid;
240  res = IDNotPresent;
241  }
242  else if (g_0 < static_cast<GO> (N_R)) {
243  // The GID comes from the initial sequence of R processes.
244  nodeIDs[k] = static_cast<int> (g_0 / static_cast<GO> (N_L + 1));
245  localIDs[k] = static_cast<LO> (g_0 % static_cast<GO> (N_L + 1));
246  }
247  else if (g_0 >= static_cast<GO> (N_R)) {
248  // The GID comes from the remaining P-R processes.
249  const GO g_R = g_0 - static_cast<GO> (N_R);
250  nodeIDs[k] = static_cast<int> (R + g_R / N_L);
251  localIDs[k] = static_cast<int> (g_R % N_L);
252  }
253 #ifdef HAVE_TPETRA_DEBUG
254  else {
255  TEUCHOS_TEST_FOR_EXCEPTION(true, std::logic_error,
256  "Tpetra::ContiguousUniformDirectory::getEntriesImpl: "
257  "should never get here. "
258  "Please report this bug to the Tpetra developers.");
259  }
260 #endif // HAVE_TPETRA_DEBUG
261  }
262  }
263  else { // don't compute local indices
264  for (size_type k = 0; k < numGids; ++k) {
265  const GO g_0 = globalIDs[k] - g_min;
266  // The first test is a little strange just in case GO is
267  // unsigned. Compilers raise a warning on tests like "x <
268  // 0" if x is unsigned, but don't usually raise a warning if
269  // the expression is a bit more complicated than that.
270  if (g_0 + ONE < ONE || g_0 >= static_cast<GO> (N_G)) {
271  nodeIDs[k] = -1;
272  res = IDNotPresent;
273  }
274  else if (g_0 < static_cast<GO> (N_R)) {
275  // The GID comes from the initial sequence of R processes.
276  nodeIDs[k] = static_cast<int> (g_0 / static_cast<GO> (N_L + 1));
277  }
278  else if (g_0 >= static_cast<GO> (N_R)) {
279  // The GID comes from the remaining P-R processes.
280  const GO g_R = g_0 - static_cast<GO> (N_R);
281  nodeIDs[k] = static_cast<int> (R + g_R / N_L);
282  }
283 #ifdef HAVE_TPETRA_DEBUG
284  else {
285  TEUCHOS_TEST_FOR_EXCEPTION(true, std::logic_error,
286  "Tpetra::ContiguousUniformDirectory::getEntriesImpl: "
287  "should never get here. "
288  "Please report this bug to the Tpetra developers.");
289  }
290 #endif // HAVE_TPETRA_DEBUG
291  }
292  }
293  return res;
294  }
295 
296  template<class LO, class GO, class NT>
299  {
300  using Teuchos::gatherAll;
301  using Teuchos::RCP;
302 
303  RCP<const Teuchos::Comm<int> > comm = map.getComm ();
304 
305  TEUCHOS_TEST_FOR_EXCEPTION(! map.isDistributed (), std::invalid_argument,
306  Teuchos::typeName (*this) << " constructor: Map is not distributed.");
307  TEUCHOS_TEST_FOR_EXCEPTION(! map.isContiguous (), std::invalid_argument,
308  Teuchos::typeName (*this) << " constructor: Map is not contiguous.");
309 
310  const int numProcs = comm->getSize ();
311 
312  // Make room for the min global ID on each process, plus one
313  // entry at the end for the "max cap."
314  allMinGIDs_ = arcp<GO> (numProcs + 1);
315  // Get my process' min global ID.
316  GO minMyGID = map.getMinGlobalIndex ();
317  // Gather all of the min global IDs into the first numProcs
318  // entries of allMinGIDs_.
319 
320  // FIXME (mfh 16 Apr 2013) GIANT HACK BELOW
321  //
322  // The purpose of this giant hack is that gatherAll appears to
323  // interpret the "receive count" argument differently than
324  // MPI_Allgather does. Matt Bettencourt reports Valgrind issues
325  // (memcpy with overlapping data) with MpiComm<int>::gatherAll,
326  // which could relate either to this, or to OpenMPI.
327 #ifdef HAVE_MPI
328  MPI_Datatype rawMpiType = MPI_INT;
329  bool useRawMpi = true;
330  if (typeid (GO) == typeid (int)) {
331  rawMpiType = MPI_INT;
332  } else if (typeid (GO) == typeid (long)) {
333  rawMpiType = MPI_LONG;
334  } else {
335  useRawMpi = false;
336  }
337  if (useRawMpi) {
338  using Teuchos::rcp_dynamic_cast;
339  using Teuchos::MpiComm;
340  RCP<const MpiComm<int> > mpiComm =
341  rcp_dynamic_cast<const MpiComm<int> > (comm);
342  // It could be a SerialComm instead, even in an MPI build, so
343  // be sure to check.
344  if (! comm.is_null ()) {
345  MPI_Comm rawMpiComm = * (mpiComm->getRawMpiComm ());
346  const int err =
347  MPI_Allgather (&minMyGID, 1, rawMpiType,
348  allMinGIDs_.getRawPtr (), 1, rawMpiType,
349  rawMpiComm);
350  TEUCHOS_TEST_FOR_EXCEPTION(err != MPI_SUCCESS, std::runtime_error,
351  "Tpetra::DistributedContiguousDirectory: MPI_Allgather failed");
352  } else {
353  gatherAll<int, GO> (*comm, 1, &minMyGID, numProcs, allMinGIDs_.getRawPtr ());
354  }
355  } else {
356  gatherAll<int, GO> (*comm, 1, &minMyGID, numProcs, allMinGIDs_.getRawPtr ());
357  }
358 #else // NOT HAVE_MPI
359  gatherAll<int, GO> (*comm, 1, &minMyGID, numProcs, allMinGIDs_.getRawPtr ());
360 #endif // HAVE_MPI
361  // FIXME (mfh 16 Apr 2013) GIANT HACK ABOVE
362 
363  //gatherAll<int, GO> (*comm, 1, &minMyGID, numProcs, allMinGIDs_.getRawPtr ());
364 
365  // Put the max cap at the end. Adding one lets us write loops
366  // over the global IDs with the usual strict less-than bound.
367  allMinGIDs_[numProcs] = map.getMaxAllGlobalIndex ()
368  + Teuchos::OrdinalTraits<GO>::one ();
369  }
370 
371  template<class LO, class GO, class NT>
372  std::string
374  {
375  std::ostringstream os;
376  os << "DistributedContiguousDirectory"
377  << "<" << Teuchos::TypeNameTraits<LO>::name ()
378  << ", " << Teuchos::TypeNameTraits<GO>::name ()
379  << ", " << Teuchos::TypeNameTraits<NT>::name () << ">";
380  return os.str ();
381  }
382 
383  template<class LO, class GO, class NT>
387  const Teuchos::ArrayView<const GO> &globalIDs,
388  const Teuchos::ArrayView<int> &nodeIDs,
389  const Teuchos::ArrayView<LO> &localIDs,
390  const bool computeLIDs) const
391  {
392  using Teuchos::Array;
393  using Teuchos::ArrayRCP;
394  using Teuchos::ArrayView;
395  using Teuchos::as;
396  using Teuchos::Comm;
397  using Teuchos::RCP;
398 
400  RCP<const Teuchos::Comm<int> > comm = map.getComm ();
401  const int myRank = comm->getRank ();
402 
403  // Map is on one process or is locally replicated.
404  typename ArrayView<int>::iterator procIter = nodeIDs.begin();
405  typename ArrayView<LO>::iterator lidIter = localIDs.begin();
406  typename ArrayView<const GO>::iterator gidIter;
407  for (gidIter = globalIDs.begin(); gidIter != globalIDs.end(); ++gidIter) {
408  if (map.isNodeGlobalElement (*gidIter)) {
409  *procIter++ = myRank;
410  if (computeLIDs) {
411  *lidIter++ = map.getLocalElement (*gidIter);
412  }
413  }
414  else {
415  // Advance the pointers, leaving these values set to invalid
416  procIter++;
417  if (computeLIDs) {
418  lidIter++;
419  }
420  res = IDNotPresent;
421  }
422  }
423  return res;
424  }
425 
426  template<class LO, class GO, class NT>
430  const Teuchos::ArrayView<const GO> &globalIDs,
431  const Teuchos::ArrayView<int> &nodeIDs,
432  const Teuchos::ArrayView<LO> &localIDs,
433  const bool computeLIDs) const
434  {
435  using Teuchos::Array;
436  using Teuchos::ArrayRCP;
437  using Teuchos::ArrayView;
438  using Teuchos::as;
439  using Teuchos::Comm;
440  using Teuchos::RCP;
441 
442  RCP<const Teuchos::Comm<int> > comm = map.getComm ();
443  const int numProcs = comm->getSize ();
444  const global_size_t nOverP = map.getGlobalNumElements () / numProcs;
445  const LO LINVALID = Teuchos::OrdinalTraits<LO>::invalid();
447 
448  // Map is distributed but contiguous.
449  typename ArrayView<int>::iterator procIter = nodeIDs.begin();
450  typename ArrayView<LO>::iterator lidIter = localIDs.begin();
451  typename ArrayView<const GO>::iterator gidIter;
452  for (gidIter = globalIDs.begin(); gidIter != globalIDs.end(); ++gidIter) {
453  LO LID = LINVALID; // Assume not found until proven otherwise
454  int image = -1;
455  GO GID = *gidIter;
456  // Guess uniform distribution and start a little above it
457  // TODO: replace by a binary search
458  int curRank;
459  { // We go through all this trouble to avoid overflow and
460  // signed / unsigned casting mistakes (that were made in
461  // previous versions of this code).
462  const GO one = as<GO> (1);
463  const GO two = as<GO> (2);
464  const GO nOverP_GID = as<GO> (nOverP);
465  const GO lowerBound = GID / std::max(nOverP_GID, one) + two;
466  curRank = as<int>(std::min(lowerBound, as<GO>(numProcs - 1)));
467  }
468  bool found = false;
469  while (curRank >= 0 && curRank < numProcs) {
470  if (allMinGIDs_[curRank] <= GID) {
471  if (GID < allMinGIDs_[curRank + 1]) {
472  found = true;
473  break;
474  }
475  else {
476  curRank++;
477  }
478  }
479  else {
480  curRank--;
481  }
482  }
483  if (found) {
484  image = curRank;
485  LID = as<LO> (GID - allMinGIDs_[image]);
486  }
487  else {
488  res = IDNotPresent;
489  }
490  *procIter++ = image;
491  if (computeLIDs) {
492  *lidIter++ = LID;
493  }
494  }
495  return res;
496  }
497 
498  template<class LO, class GO, class NT>
501  oneToOneResult_ (ONE_TO_ONE_NOT_CALLED_YET), // to be revised below
502  locallyOneToOne_ (true), // to be revised below
503  useHashTables_ (false) // to be revised below
504  {
505  initialize (map, Teuchos::null);
506  }
507 
508  template<class LO, class GO, class NT>
511  const tie_break_type& tie_break) :
512  oneToOneResult_ (ONE_TO_ONE_NOT_CALLED_YET), // to be revised below
513  locallyOneToOne_ (true), // to be revised below
514  useHashTables_ (false) // to be revised below
515  {
516  initialize (map, Teuchos::ptrFromRef (tie_break));
517  }
518 
519  template<class LO, class GO, class NT>
520  void
522  initialize (const map_type& map,
523  Teuchos::Ptr<const tie_break_type> tie_break)
524  {
525  using Teuchos::Array;
526  using Teuchos::ArrayView;
527  using Teuchos::as;
528  using Teuchos::RCP;
529  using Teuchos::rcp;
530  using Teuchos::typeName;
531  using Teuchos::TypeNameTraits;
532  using std::cerr;
533  using std::endl;
534  typedef Array<int>::size_type size_type;
535 
536  // This class' implementation of getEntriesImpl() currently
537  // encodes the following assumptions:
538  //
539  // 1. global_size_t >= GO
540  // 2. global_size_t >= int
541  // 3. global_size_t >= LO
542  //
543  // We check these assumptions here.
544  TEUCHOS_TEST_FOR_EXCEPTION(sizeof(global_size_t) < sizeof(GO),
545  std::logic_error, typeName (*this) << ": sizeof(Tpetra::"
546  "global_size_t) = " << sizeof(global_size_t) << " < sizeof(Global"
547  "Ordinal = " << TypeNameTraits<LO>::name () << ") = " << sizeof(GO)
548  << ".");
549  TEUCHOS_TEST_FOR_EXCEPTION(sizeof(global_size_t) < sizeof(int),
550  std::logic_error, typeName (*this) << ": sizeof(Tpetra::"
551  "global_size_t) = " << sizeof(global_size_t) << " < sizeof(int) = "
552  << sizeof(int) << ".");
553  TEUCHOS_TEST_FOR_EXCEPTION(sizeof(global_size_t) < sizeof(LO),
554  std::logic_error, typeName (*this) << ": sizeof(Tpetra::"
555  "global_size_t) = " << sizeof(global_size_t) << " < sizeof(Local"
556  "Ordinal = " << TypeNameTraits<LO>::name () << ") = " << sizeof(LO)
557  << ".");
558 
559  RCP<const Teuchos::Comm<int> > comm = map.getComm ();
560  const LO LINVALID = Teuchos::OrdinalTraits<LO>::invalid ();
561  const GO minAllGID = map.getMinAllGlobalIndex ();
562  const GO maxAllGID = map.getMaxAllGlobalIndex ();
563 
564  // The "Directory Map" (see below) will have a range of elements
565  // from the minimum to the maximum GID of the user Map, and a
566  // minimum GID of minAllGID from the user Map. It doesn't
567  // actually have to store all those entries, though do beware of
568  // calling getNodeElementList on it (see Bug 5822).
569  const global_size_t numGlobalEntries = maxAllGID - minAllGID + 1;
570 
571  // We can't afford to replicate the whole directory on each
572  // process, so create the "Directory Map", a uniform contiguous
573  // Map that describes how we will distribute the directory over
574  // processes.
575  //
576  // FIXME (mfh 08 May 2012) Here we're setting minAllGID to be
577  // the index base. The index base should be separate from the
578  // minimum GID.
579  directoryMap_ = rcp (new map_type (numGlobalEntries, minAllGID, comm,
580  GloballyDistributed, map.getNode ()));
581  // The number of Directory elements that my process owns.
582  const size_t dir_numMyEntries = directoryMap_->getNodeNumElements ();
583 
584  // Fix for Bug 5822: If the input Map is "sparse," that is if
585  // the difference between the global min and global max GID is
586  // much larger than the global number of elements in the input
587  // Map, then it's possible that the Directory Map might have
588  // many more entries than the input Map on this process. This
589  // can cause memory scalability issues. In that case, we switch
590  // from the array-based implementation of Directory storage to
591  // the hash table - based implementation. We don't use hash
592  // tables all the time, because they are slower in the common
593  // case of a nonsparse Map.
594  //
595  // NOTE: This is a per-process decision. Some processes may use
596  // array-based storage, whereas others may use hash table -
597  // based storage.
598 
599  // A hash table takes a constant factor more space, more or
600  // less, than an array. Thus, it's not worthwhile, even in
601  // terms of memory usage, always to use a hash table.
602  // Furthermore, array lookups are faster than hash table
603  // lookups, so it may be worthwhile to use an array even if it
604  // takes more space. The "sparsity threshold" governs when to
605  // switch to a hash table - based implementation.
606  const size_t inverseSparsityThreshold = 10;
607  useHashTables_ =
608  dir_numMyEntries >= inverseSparsityThreshold * map.getNodeNumElements ();
609 
610  // Get list of process IDs that own the directory entries for the
611  // Map GIDs. These will be the targets of the sends that the
612  // Distributor will do.
613  const int myRank = comm->getRank ();
614  const size_t numMyEntries = map.getNodeNumElements ();
615  Array<int> sendImageIDs (numMyEntries);
616  ArrayView<const GO> myGlobalEntries = map.getNodeElementList ();
617  // An ID not present in this lookup indicates that it lies outside
618  // of the range [minAllGID,maxAllGID] (from map_). this means
619  // something is wrong with map_, our fault.
620  const LookupStatus lookupStatus =
621  directoryMap_->getRemoteIndexList (myGlobalEntries, sendImageIDs);
622  TEUCHOS_TEST_FOR_EXCEPTION(
623  lookupStatus == IDNotPresent, std::logic_error, Teuchos::typeName(*this)
624  << " constructor: the Directory Map could not find out where one or "
625  "more of my Map's indices should go. The input to getRemoteIndexList "
626  "is " << Teuchos::toString (myGlobalEntries) << ", and the output is "
627  << Teuchos::toString (sendImageIDs ()) << ". The input Map itself has "
628  "the following entries on the calling process " <<
629  map.getComm ()->getRank () << ": " <<
630  Teuchos::toString (map.getNodeElementList ()) << ", and has "
631  << map.getGlobalNumElements () << " total global indices in ["
632  << map.getMinAllGlobalIndex () << "," << map.getMaxAllGlobalIndex ()
633  << "]. The Directory Map has "
634  << directoryMap_->getGlobalNumElements () << " total global indices in "
635  "[" << directoryMap_->getMinAllGlobalIndex () << "," <<
636  directoryMap_->getMaxAllGlobalIndex () << "], and the calling process "
637  "has GIDs [" << directoryMap_->getMinGlobalIndex () << "," <<
638  directoryMap_->getMaxGlobalIndex () << "]. "
639  "This probably means there is a bug in Map or Directory. "
640  "Please report this bug to the Tpetra developers.");
641 
642  // Initialize the distributor using the list of process IDs to
643  // which to send. We'll use the distributor to send out triples
644  // of (GID, process ID, LID). We're sending the entries to the
645  // processes that the Directory Map says should own them, which is
646  // why we called directoryMap_->getRemoteIndexList() above.
647  Distributor distor (comm);
648  const size_t numReceives = distor.createFromSends (sendImageIDs);
649 
650  // NOTE (mfh 21 Mar 2012) The following code assumes that
651  // sizeof(GO) >= sizeof(int) and sizeof(GO) >= sizeof(LO).
652  //
653  // Create and fill buffer of (GID, PID, LID) triples to send
654  // out. We pack the (GID, PID, LID) triples into a single Array
655  // of GO, casting the PID from int to GO and the LID from LO to
656  // GO as we do so.
657  //
658  // FIXME (mfh 23 Mar 2014) This assumes that sizeof(LO) <=
659  // sizeof(GO) and sizeof(int) <= sizeof(GO). The former is
660  // required, and the latter is generally the case, but we should
661  // still check for this.
662  const int packetSize = 3; // We're sending triples, so packet size is 3.
663  Array<GO> exportEntries (packetSize * numMyEntries); // data to send out
664  {
665  size_type exportIndex = 0;
666  for (size_type i = 0; i < static_cast<size_type> (numMyEntries); ++i) {
667  exportEntries[exportIndex++] = myGlobalEntries[i];
668  exportEntries[exportIndex++] = as<GO> (myRank);
669  exportEntries[exportIndex++] = as<GO> (i);
670  }
671  }
672  // Buffer of data to receive. The Distributor figured out for
673  // us how many packets we're receiving, when we called its
674  // createFromSends() method to set up the distribution plan.
675  Array<GO> importElements (packetSize * distor.getTotalReceiveLength ());
676 
677  // Distribute the triples of (GID, process ID, LID).
678  distor.doPostsAndWaits (exportEntries ().getConst (), packetSize, importElements ());
679 
680  // Unpack the redistributed data. Both implementations of
681  // Directory storage map from an LID in the Directory Map (which
682  // is the LID of the GID to store) to either a PID or an LID in
683  // the input Map. Each "packet" (contiguous chunk of
684  // importElements) contains a triple: (GID, PID, LID).
685  if (useHashTables_) {
686  // Create the hash tables. We know exactly how many elements
687  // to expect in each hash table. FixedHashTable's constructor
688  // currently requires all the keys and values at once, so we
689  // have to extract them in temporary arrays. It may be
690  // possible to rewrite FixedHashTable to use a "start fill" /
691  // "end fill" approach that avoids the temporary arrays, but
692  // we won't try that for now.
693 
694  // The constructors of Array and ArrayRCP that take a number
695  // of elements all initialize the arrays. Instead, allocate
696  // raw arrays, then hand them off to ArrayRCP, to avoid the
697  // initial unnecessary initialization without losing the
698  // benefit of exception safety (and bounds checking, in a
699  // debug build).
700  LO* tableKeysRaw = NULL;
701  LO* tableLidsRaw = NULL;
702  int* tablePidsRaw = NULL;
703  try {
704  tableKeysRaw = new LO [numReceives];
705  tableLidsRaw = new LO [numReceives];
706  tablePidsRaw = new int [numReceives];
707  } catch (...) {
708  if (tableKeysRaw != NULL) {
709  delete [] tableKeysRaw;
710  }
711  if (tableLidsRaw != NULL) {
712  delete [] tableLidsRaw;
713  }
714  if (tablePidsRaw != NULL) {
715  delete [] tablePidsRaw;
716  }
717  throw;
718  }
719  ArrayRCP<LO> tableKeys (tableKeysRaw, 0, numReceives, true);
720  ArrayRCP<LO> tableLids (tableLidsRaw, 0, numReceives, true);
721  ArrayRCP<int> tablePids (tablePidsRaw, 0, numReceives, true);
722 
723  if (tie_break.is_null ()) {
724  // Fill the temporary arrays of keys and values.
725  size_type importIndex = 0;
726  for (size_type i = 0; i < static_cast<size_type> (numReceives); ++i) {
727  const GO curGID = importElements[importIndex++];
728  const LO curLID = directoryMap_->getLocalElement (curGID);
729  TEUCHOS_TEST_FOR_EXCEPTION(
730  curLID == LINVALID, std::logic_error,
731  Teuchos::typeName(*this) << " constructor: Incoming global index "
732  << curGID << " does not have a corresponding local index in the "
733  "Directory Map. Please report this bug to the Tpetra developers.");
734  tableKeys[i] = curLID;
735  tablePids[i] = importElements[importIndex++];
736  tableLids[i] = importElements[importIndex++];
737  }
738  // Set up the hash tables. The hash tables' constructor
739  // detects whether there are duplicates, so that we can set
740  // locallyOneToOne_.
741  typedef Kokkos::Device<typename NT::execution_space,
742  typename NT::memory_space> DT;
743  lidToPidTable_ =
744  rcp (new Details::FixedHashTable<LO, int, DT> (tableKeys (),
745  tablePids ()));
746  locallyOneToOne_ = ! (lidToPidTable_->hasDuplicateKeys ());
747  lidToLidTable_ =
748  rcp (new Details::FixedHashTable<LO, LO, DT> (tableKeys (),
749  tableLids ()));
750  }
751  else { // tie_break is NOT null
752 
753  // For each directory Map LID received, collect all the
754  // corresponding (PID,LID) pairs. If the input Map is not
755  // one-to-one, corresponding directory Map LIDs will have
756  // more than one pair. In that case, we will use the
757  // TieBreak object to pick exactly one pair.
758  typedef std::map<LO, std::vector<std::pair<int, LO> > > pair_table_type;
759  pair_table_type ownedPidLidPairs;
760 
761  // For each directory Map LID received, collect the zero or
762  // more input Map (PID,LID) pairs into ownedPidLidPairs.
763  size_type importIndex = 0;
764  for (size_type i = 0; i < static_cast<size_type> (numReceives); ++i) {
765  const GO curGID = importElements[importIndex++];
766  const LO dirMapLid = directoryMap_->getLocalElement (curGID);
767  TEUCHOS_TEST_FOR_EXCEPTION(
768  dirMapLid == LINVALID, std::logic_error,
769  Teuchos::typeName(*this) << " constructor: Incoming global index "
770  << curGID << " does not have a corresponding local index in the "
771  "Directory Map. Please report this bug to the Tpetra developers.");
772  tableKeys[i] = dirMapLid;
773  const int PID = importElements[importIndex++];
774  const int LID = importElements[importIndex++];
775 
776  // These may change below. We fill them in just to ensure
777  // that they won't have invalid values.
778  tablePids[i] = PID;
779  tableLids[i] = LID;
780 
781  // For every directory Map LID, we have to remember all
782  // (PID, LID) pairs. The TieBreak object will arbitrate
783  // between them in the loop below.
784  ownedPidLidPairs[dirMapLid].push_back (std::make_pair (PID, LID));
785  }
786 
787  // Use TieBreak to arbitrate between (PID,LID) pairs
788  // corresponding to each directory Map LID.
789  //
790  // FIXME (mfh 23 Mar 2014) How do I know that i is the same
791  // as the directory Map LID?
792  const size_type numPairs =
793  static_cast<size_type> (ownedPidLidPairs.size ());
794  for (size_type i = 0; i < numPairs; ++i) {
795  const LO dirMapLid = static_cast<LO> (i);
796  const GO dirMapGid = directoryMap_->getGlobalElement (dirMapLid);
797  const std::vector<std::pair<int, LO> >& pidLidList =
798  ownedPidLidPairs[i];
799  const size_t listLen = pidLidList.size ();
800  if (listLen > 0) {
801  if (listLen > 1) {
802  locallyOneToOne_ = false;
803  }
804  // If there is some (PID,LID) pair for the current input
805  // Map LID, then it makes sense to invoke the TieBreak
806  // object to arbitrate between the options. Even if
807  // there is only one (PID,LID) pair, we still want to
808  // give the TieBreak object a chance to do whatever it
809  // likes to do, in terms of side effects (e.g., track
810  // (PID,LID) pairs).
811  const size_type index =
812  static_cast<size_type> (tie_break->selectedIndex (dirMapGid,
813  pidLidList));
814  tablePids[i] = pidLidList[index].first;
815  tableLids[i] = pidLidList[index].second;
816  }
817  }
818 
819  // Set up the hash tables.
820  typedef Kokkos::Device<typename NT::execution_space,
821  typename NT::memory_space> DT;
822  lidToPidTable_ =
823  rcp (new Details::FixedHashTable<LO, int, DT> (tableKeys (),
824  tablePids ()));
825  lidToLidTable_ =
826  rcp (new Details::FixedHashTable<LO, LO, DT> (tableKeys (),
827  tableLids ()));
828  }
829  }
830  else {
831  if (tie_break.is_null ()) {
832  // Use array-based implementation of Directory storage.
833  // Allocate these arrays and fill them with invalid values,
834  // in case the input Map's GID list is sparse (i.e., does
835  // not populate all GIDs from minAllGID to maxAllGID).
836  PIDs_ = arcp<int> (dir_numMyEntries);
837  std::fill (PIDs_.begin (), PIDs_.end (), -1);
838  LIDs_ = arcp<LO> (dir_numMyEntries);
839  std::fill (LIDs_.begin (), LIDs_.end (), LINVALID);
840  // Fill in the arrays with PIDs resp. LIDs.
841  size_type importIndex = 0;
842  for (size_type i = 0; i < static_cast<size_type> (numReceives); ++i) {
843  const GO curGID = importElements[importIndex++];
844  const LO curLID = directoryMap_->getLocalElement (curGID);
845  TEUCHOS_TEST_FOR_EXCEPTION(curLID == LINVALID, std::logic_error,
846  Teuchos::typeName(*this) << " constructor: Incoming global index "
847  << curGID << " does not have a corresponding local index in the "
848  "Directory Map. Please report this bug to the Tpetra developers.");
849 
850  // If PIDs_[curLID] is not -1, then curGID is a duplicate
851  // on the calling process, so the Directory is not locally
852  // one-to-one.
853  if (PIDs_[curLID] != -1) {
854  locallyOneToOne_ = false;
855  }
856  PIDs_[curLID] = importElements[importIndex++];
857  LIDs_[curLID] = importElements[importIndex++];
858  }
859  }
860  else {
861  PIDs_ = arcp<int> (dir_numMyEntries);
862  LIDs_ = arcp<LO> (dir_numMyEntries);
863  std::fill (PIDs_.begin (), PIDs_.end (), -1);
864 
865  // All received (PID, LID) pairs go into ownedPidLidPairs.
866  // This is a map from the directory Map's LID to the (PID,
867  // LID) pair (where the latter LID comes from the input Map,
868  // not the directory Map). If the input Map is not
869  // one-to-one, corresponding LIDs will have
870  // ownedPidLidPairs[curLID].size() > 1. In that case, we
871  // will use the TieBreak object to pick exactly one pair.
872  Array<std::vector<std::pair<int, LO> > > ownedPidLidPairs (dir_numMyEntries);
873  size_type importIndex = 0;
874  for (size_type i = 0; i < static_cast<size_type> (numReceives); ++i) {
875  const GO GID = importElements[importIndex++];
876  const int PID = importElements[importIndex++];
877  const LO LID = importElements[importIndex++];
878 
879  const LO dirMapLid = directoryMap_->getLocalElement (GID);
880  TEUCHOS_TEST_FOR_EXCEPTION(
881  dirMapLid == LINVALID, std::logic_error,
882  Teuchos::typeName(*this) << " constructor: Incoming global index "
883  << GID << " does not have a corresponding local index in the "
884  "Directory Map. Please report this bug to the Tpetra developers.");
885  ownedPidLidPairs[dirMapLid].push_back (std::make_pair (PID, LID));
886  }
887 
888  // Use TieBreak to arbitrate between (PID,LID) pairs
889  // corresponding to each directory Map LID.
890  //
891  // FIXME (mfh 23 Mar 2014) How do I know that i is the same
892  // as the directory Map LID?
893  const size_type numPairs =
894  static_cast<size_type> (ownedPidLidPairs.size ());
895  for (size_type i = 0; i < numPairs; ++i) {
896  const LO dirMapLid = static_cast<LO> (i);
897  const GO dirMapGid = directoryMap_->getGlobalElement (dirMapLid);
898  const std::vector<std::pair<int, LO> >& pidLidList =
899  ownedPidLidPairs[i];
900  const size_t listLen = pidLidList.size ();
901  if (listLen > 0) {
902  if (listLen > 1) {
903  locallyOneToOne_ = false;
904  }
905  // If there is some (PID,LID) pair for the current input
906  // Map LID, then it makes sense to invoke the TieBreak
907  // object to arbitrate between the options. Even if
908  // there is only one (PID,LID) pair, we still want to
909  // give the TieBreak object a chance to do whatever it
910  // likes to do, in terms of side effects (e.g., track
911  // (PID,LID) pairs).
912  const size_type index =
913  static_cast<size_type> (tie_break->selectedIndex (dirMapGid,
914  pidLidList));
915  PIDs_[i] = pidLidList[index].first;
916  LIDs_[i] = pidLidList[index].second;
917  }
918  // else no GID specified by source map
919  }
920  }
921  }
922  }
923 
924  template<class LO, class GO, class NT>
925  std::string
927  {
928  std::ostringstream os;
929  os << "DistributedNoncontiguousDirectory"
930  << "<" << Teuchos::TypeNameTraits<LO>::name ()
931  << ", " << Teuchos::TypeNameTraits<GO>::name ()
932  << ", " << Teuchos::TypeNameTraits<NT>::name () << ">";
933  return os.str ();
934  }
935 
936  template<class LO, class GO, class NT>
940  const Teuchos::ArrayView<const GO> &globalIDs,
941  const Teuchos::ArrayView<int> &nodeIDs,
942  const Teuchos::ArrayView<LO> &localIDs,
943  const bool computeLIDs) const
944  {
945  using Teuchos::Array;
946  using Teuchos::ArrayRCP;
947  using Teuchos::ArrayView;
948  using Teuchos::as;
949  using Teuchos::RCP;
950  using std::cerr;
951  using std::endl;
952  typedef typename Array<GO>::size_type size_type;
953 
954  RCP<const Teuchos::Comm<int> > comm = map.getComm ();
955  const size_t numEntries = globalIDs.size ();
956  const LO LINVALID = Teuchos::OrdinalTraits<LO>::invalid();
958 
959  //
960  // Set up directory structure.
961  //
962 
963  // If we're computing LIDs, we also have to include them in each
964  // packet, along with the GID and process ID.
965  const int packetSize = computeLIDs ? 3 : 2;
966 
967  // For data distribution, we use: Surprise! A Distributor!
968  Distributor distor (comm);
969 
970  // Get directory locations for the requested list of entries.
971  Array<int> dirImages (numEntries);
972  res = directoryMap_->getRemoteIndexList (globalIDs, dirImages ());
973  // Check for unfound globalIDs and set corresponding nodeIDs to -1
974  size_t numMissing = 0;
975  if (res == IDNotPresent) {
976  for (size_t i=0; i < numEntries; ++i) {
977  if (dirImages[i] == -1) {
978  nodeIDs[i] = -1;
979  if (computeLIDs) {
980  localIDs[i] = LINVALID;
981  }
982  numMissing++;
983  }
984  }
985  }
986 
987  Array<GO> sendGIDs;
988  Array<int> sendImages;
989  distor.createFromRecvs (globalIDs, dirImages (), sendGIDs, sendImages);
990  const size_type numSends = sendGIDs.size ();
991 
992  //
993  // mfh 13 Nov 2012:
994  //
995  // The code below temporarily stores LO, GO, and int values in
996  // an array of global_size_t. If one of the signed types (LO
997  // and GO should both be signed) happened to be -1 (or some
998  // negative number, but -1 is the one that came up today), then
999  // conversion to global_size_t will result in a huge
1000  // global_size_t value, and thus conversion back may overflow.
1001  // (Teuchos::as doesn't know that we meant it to be an LO or GO
1002  // all along.)
1003  //
1004  // The overflow normally would not be a problem, since it would
1005  // just go back to -1 again. However, Teuchos::as does range
1006  // checking on conversions in a debug build, so it throws an
1007  // exception (std::range_error) in this case. Range checking is
1008  // generally useful in debug mode, so we don't want to disable
1009  // this behavior globally.
1010  //
1011  // We solve this problem by forgoing use of Teuchos::as for the
1012  // conversions below from LO, GO, or int to global_size_t, and
1013  // the later conversions back from global_size_t to LO, GO, or
1014  // int.
1015  //
1016  // I've recorded this discussion as Bug 5760.
1017  //
1018 
1019  // global_size_t >= GO
1020  // global_size_t >= size_t >= int
1021  // global_size_t >= size_t >= LO
1022  // Therefore, we can safely store all of these in a global_size_t
1023  Array<global_size_t> exports (packetSize * numSends);
1024  {
1025  // Packet format:
1026  // - If computing LIDs: (GID, PID, LID)
1027  // - Otherwise: (GID, PID)
1028  //
1029  // "PID" means "process ID" (a.k.a. "node ID," a.k.a. "rank").
1030 
1031  // Current position to which to write in exports array. If
1032  // sending pairs, we pack the (GID, PID) pair for gid =
1033  // sendGIDs[k] in exports[2*k], exports[2*k+1]. If sending
1034  // triples, we pack the (GID, PID, LID) pair for gid =
1035  // sendGIDs[k] in exports[3*k, 3*k+1, 3*k+2].
1036  size_type exportsIndex = 0;
1037 
1038  if (useHashTables_) {
1039  for (size_type gidIndex = 0; gidIndex < numSends; ++gidIndex) {
1040  const GO curGID = sendGIDs[gidIndex];
1041  // Don't use as() here (see above note).
1042  exports[exportsIndex++] = static_cast<global_size_t> (curGID);
1043  const LO curLID = directoryMap_->getLocalElement (curGID);
1044  TEUCHOS_TEST_FOR_EXCEPTION(curLID == LINVALID, std::logic_error,
1045  Teuchos::typeName (*this) << "::getEntriesImpl(): The Directory "
1046  "Map's global index " << curGID << " does not have a corresponding "
1047  "local index. Please report this bug to the Tpetra developers.");
1048  // Don't use as() here (see above note).
1049  exports[exportsIndex++] = static_cast<global_size_t> (lidToPidTable_->get (curLID));
1050  if (computeLIDs) {
1051  // Don't use as() here (see above note).
1052  exports[exportsIndex++] = static_cast<global_size_t> (lidToLidTable_->get (curLID));
1053  }
1054  }
1055  } else {
1056  for (size_type gidIndex = 0; gidIndex < numSends; ++gidIndex) {
1057  const GO curGID = sendGIDs[gidIndex];
1058  // Don't use as() here (see above note).
1059  exports[exportsIndex++] = static_cast<global_size_t> (curGID);
1060  const LO curLID = directoryMap_->getLocalElement (curGID);
1061  TEUCHOS_TEST_FOR_EXCEPTION(curLID == LINVALID, std::logic_error,
1062  Teuchos::typeName (*this) << "::getEntriesImpl(): The Directory "
1063  "Map's global index " << curGID << " does not have a corresponding "
1064  "local index. Please report this bug to the Tpetra developers.");
1065  // Don't use as() here (see above note).
1066  exports[exportsIndex++] = static_cast<global_size_t> (PIDs_[curLID]);
1067  if (computeLIDs) {
1068  // Don't use as() here (see above note).
1069  exports[exportsIndex++] = static_cast<global_size_t> (LIDs_[curLID]);
1070  }
1071  }
1072  }
1073 
1074  TEUCHOS_TEST_FOR_EXCEPTION(
1075  exportsIndex > exports.size (), std::logic_error,
1076  Teuchos::typeName (*this) << "::getEntriesImpl(): On Process " <<
1077  comm->getRank () << ", exportsIndex = " << exportsIndex <<
1078  " > exports.size() = " << exports.size () <<
1079  ". Please report this bug to the Tpetra developers.");
1080  }
1081 
1082  TEUCHOS_TEST_FOR_EXCEPTION(
1083  numEntries < numMissing, std::logic_error,
1084  Teuchos::typeName (*this) << "::getEntriesImpl(): On Process "
1085  << comm->getRank () << ", numEntries = " << numEntries
1086  << " < numMissing = " << numMissing
1087  << ". Please report this bug to the Tpetra developers.");
1088 
1089  //
1090  // mfh 13 Nov 2012: See note above on conversions between
1091  // global_size_t and LO, GO, or int.
1092  //
1093  const size_t numRecv = numEntries - numMissing;
1094 
1095  {
1096  const size_t importLen = packetSize * distor.getTotalReceiveLength ();
1097  const size_t requiredImportLen = numRecv * packetSize;
1098  const int myRank = comm->getRank ();
1099  TEUCHOS_TEST_FOR_EXCEPTION(
1100  importLen < requiredImportLen, std::logic_error,
1101  "Tpetra::Details::DistributedNoncontiguousDirectory::getEntriesImpl: "
1102  "On Process " << myRank << ": The 'imports' array must have length "
1103  "at least " << requiredImportLen << ", but its actual length is " <<
1104  importLen << ". numRecv: " << numRecv << ", packetSize: " <<
1105  packetSize << ", numEntries (# GIDs): " << numEntries <<
1106  ", numMissing: " << numMissing << ": distor.getTotalReceiveLength(): "
1107  << distor.getTotalReceiveLength () << ". " << std::endl <<
1108  "Distributor description: " << distor.description () << ". "
1109  << std::endl <<
1110  "Please report this bug to the Tpetra developers.");
1111  }
1112 
1113  Array<global_size_t> imports (packetSize * distor.getTotalReceiveLength ());
1114 
1115  // FIXME (mfh 20 Mar 2014, 22 Sep 2015) One could overlap the
1116  // work below with communication, by splitting this call into
1117  // doPosts and doWaits. The code is still correct in this form,
1118  // however.
1119  distor.doPostsAndWaits (exports ().getConst (), packetSize, imports ());
1120 
1121  // mfh 22 Sep 2015 (Thanks to Karen Devine; see Bug 6412
1122  // discussion): Does doPostsAndWaits return items in the same
1123  // order they were requested? If so, we could just copy from
1124  // the imports array into the return argument arrays, without
1125  // the need for other mechanisms. If not, we can build a hash
1126  // table of the imports, and then search it to fill our return
1127  // arguments.
1128 
1129  std::unordered_map<GO, std::pair<int, LO> > received (numRecv);
1130 
1131  // Build a look-up table of received values
1132  size_t importsIndex = 0;
1133  for (size_t i = 0; i < numRecv; i++) {
1134  const GO curGID = static_cast<GO> (imports[importsIndex++]);
1135  const int curPID = static_cast<int> (imports[importsIndex++]);
1136  std::pair<int, LO> curPair;
1137  if (computeLIDs) {
1138  const LO curLID = static_cast<LO> (imports[importsIndex++]);
1139  curPair = std::make_pair (curPID, curLID);
1140  }
1141  else {
1142  curPair = std::make_pair (curPID, LINVALID);
1143  }
1144  received[curGID] = curPair;
1145  }
1146 
1147  // Look up the received value for each entry requested
1148  for (size_t i = 0; i < numEntries; ++i) {
1149  const GO curGID = globalIDs[i];
1150 
1151  auto tableIter = received.find (curGID);
1152  if (tableIter == received.end ()) {
1153  res = IDNotPresent;
1154  nodeIDs[i] = -1;
1155  if (computeLIDs) {
1156  localIDs[i] = LINVALID;
1157  }
1158  }
1159  else {
1160  auto curPair = tableIter->second;
1161  nodeIDs[i] = curPair.first;
1162  if (nodeIDs[i] == -1) {
1163  res = IDNotPresent;
1164  if (computeLIDs) {
1165  localIDs[i] = LINVALID;
1166  }
1167  }
1168  else if (computeLIDs) {
1169  localIDs[i] = curPair.second;
1170  }
1171  }
1172  }
1173 
1174  TEUCHOS_TEST_FOR_EXCEPTION(
1175  static_cast<size_t> (importsIndex) > static_cast<size_t> (imports.size ()),
1176  std::logic_error,
1177  "Tpetra::Details::DistributedNoncontiguousDirectory::getEntriesImpl: "
1178  "On Process " << comm->getRank () << ": importsIndex = " <<
1179  importsIndex << " > imports.size() = " << imports.size () << ". "
1180  "numRecv: " << numRecv << ", packetSize: " << packetSize << ", "
1181  "numEntries (# GIDs): " << numEntries << ", numMissing: " << numMissing
1182  << ": distor.getTotalReceiveLength(): "
1183  << distor.getTotalReceiveLength () << ". Please report this bug to "
1184  "the Tpetra developers.");
1185 
1186  return res;
1187  }
1188 
1189 
1190  template<class LO, class GO, class NT>
1191  bool
1193  isOneToOne (const Teuchos::Comm<int>& comm) const
1194  {
1195  if (oneToOneResult_ == ONE_TO_ONE_NOT_CALLED_YET) {
1196  const int lcl121 = isLocallyOneToOne () ? 1 : 0;
1197  int gbl121 = 0;
1198  Teuchos::reduceAll<int, int> (comm, Teuchos::REDUCE_MIN, lcl121,
1199  Teuchos::outArg (gbl121));
1200  oneToOneResult_ = (gbl121 == 1) ? ONE_TO_ONE_TRUE : ONE_TO_ONE_FALSE;
1201  }
1202  return (oneToOneResult_ == ONE_TO_ONE_TRUE);
1203  }
1204  } // namespace Details
1205 } // namespace Tpetra
1206 
1207 //
1208 // Explicit instantiation macro
1209 //
1210 // Must be expanded from within the Tpetra::Details namespace!
1211 //
1212 #define TPETRA_DIRECTORY_IMPL_INSTANT(LO,GO,NODE) \
1213  template class Directory< LO , GO , NODE >; \
1214  template class ReplicatedDirectory< LO , GO , NODE >; \
1215  template class ContiguousUniformDirectory< LO, GO, NODE >; \
1216  template class DistributedContiguousDirectory< LO , GO , NODE >; \
1217  template class DistributedNoncontiguousDirectory< LO , GO , NODE >; \
1218 
1219 #endif // __Tpetra_DirectoryImpl_def_hpp
Interface for breaking ties in ownership.
Namespace Tpetra contains the class and methods constituting the Tpetra library.
GlobalOrdinal getMinGlobalIndex() const
The minimum global index owned by the calling process.
Interface for breaking ties in ownership.
std::string description() const
A one-line human-readable description of this object.
std::string description() const
A simple one-line description of this object.
bool isNodeGlobalElement(GlobalOrdinal globalIndex) const
Whether the given global index is owned by this Map on the calling process.
bool isContiguous() const
True if this Map is distributed contiguously, else false.
LookupStatus
Return status of Map remote index lookup (getRemoteIndexList()).
LookupStatus getEntriesImpl(const map_type &map, const Teuchos::ArrayView< const GlobalOrdinal > &globalIDs, const Teuchos::ArrayView< int > &nodeIDs, const Teuchos::ArrayView< LocalOrdinal > &localIDs, const bool computeLIDs) const
Find process IDs and (optionally) local IDs for the given global IDs.
bool isUniform() const
Whether the range of global indices is uniform.
Implementation of Directory for a distributed noncontiguous Map.
bool isDistributed() const
Whether this Map is globally distributed or locally replicated.
Teuchos::RCP< const Teuchos::Comm< int > > getComm() const
Accessors for the Teuchos::Comm and Kokkos Node objects.
Implementation of Directory for a distributed contiguous Map.
GlobalOrdinal getMinAllGlobalIndex() const
The minimum global index over all processes in the communicator.
virtual bool isOneToOne(const Teuchos::Comm< int > &comm) const
Whether the Directory&#39;s input Map is (globally) one to one.
ReplicatedDirectory()
Constructor (that takes no arguments).
Implementation details of Tpetra.
GlobalOrdinal getMaxAllGlobalIndex() const
The maximum global index over all processes in the communicator.
virtual bool isOneToOne(const Teuchos::Comm< int > &comm) const
Whether the Directory&#39;s input Map is (globally) one to one.
size_t global_size_t
Global size_t object.
LookupStatus getEntriesImpl(const map_type &map, const Teuchos::ArrayView< const GlobalOrdinal > &globalIDs, const Teuchos::ArrayView< int > &nodeIDs, const Teuchos::ArrayView< LocalOrdinal > &localIDs, const bool computeLIDs) const
Find process IDs and (optionally) local IDs for the given global IDs.
void doPostsAndWaits(const ArrayView< const Packet > &exports, size_t numPackets, const ArrayView< Packet > &imports)
Execute the (forward) communication plan.
LookupStatus getEntries(const map_type &map, const Teuchos::ArrayView< const GlobalOrdinal > &globalIDs, const Teuchos::ArrayView< int > &nodeIDs, const Teuchos::ArrayView< LocalOrdinal > &localIDs, const bool computeLIDs) const
Sets up and executes a communication plan for a Tpetra DistObject.
size_t getTotalReceiveLength() const
Total number of values this process will receive from other processes.
Teuchos::RCP< Node > getNode() const
Get this Map&#39;s Node object.
LookupStatus getEntriesImpl(const map_type &map, const Teuchos::ArrayView< const GlobalOrdinal > &globalIDs, const Teuchos::ArrayView< int > &nodeIDs, const Teuchos::ArrayView< LocalOrdinal > &localIDs, const bool computeLIDs) const
Find process IDs and (optionally) local IDs for the given global IDs.
std::string description() const
A one-line human-readable description of this object.
std::string description() const
A one-line human-readable description of this object.
LocalOrdinal getLocalElement(GlobalOrdinal globalIndex) const
The local index corresponding to the given global index.
Teuchos::ArrayView< const GlobalOrdinal > getNodeElementList() const
Return a view of the global indices owned by this process.
size_t getNodeNumElements() const
The number of elements belonging to the calling process.
Describes a parallel distribution of objects over processes.
std::string description() const
A one-line human-readable description of this object.
Implementation of Directory for a contiguous, uniformly distributed Map.
LookupStatus getEntriesImpl(const map_type &map, const Teuchos::ArrayView< const GlobalOrdinal > &globalIDs, const Teuchos::ArrayView< int > &nodeIDs, const Teuchos::ArrayView< LocalOrdinal > &localIDs, const bool computeLIDs) const
Find process IDs and (optionally) local IDs for the given global IDs.
void createFromRecvs(const ArrayView< const Ordinal > &remoteIDs, const ArrayView< const int > &remoteNodeIDs, Array< Ordinal > &exportIDs, Array< int > &exportNodeIDs)
Set up Distributor using list of process ranks from which to receive.
global_size_t getGlobalNumElements() const
The number of elements in this Map.