Ifpack2 Templated Preconditioning Package  Version 1.0
Ifpack2_Experimental_RBILUK_def.hpp
1 /*@HEADER
2 // ***********************************************************************
3 //
4 // Ifpack2: Tempated Object-Oriented Algebraic Preconditioner Package
5 // Copyright (2009) Sandia Corporation
6 //
7 // Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive
8 // license for use of this work by or on behalf of the U.S. Government.
9 //
10 // Redistribution and use in source and binary forms, with or without
11 // modification, are permitted provided that the following conditions are
12 // met:
13 //
14 // 1. Redistributions of source code must retain the above copyright
15 // notice, this list of conditions and the following disclaimer.
16 //
17 // 2. Redistributions in binary form must reproduce the above copyright
18 // notice, this list of conditions and the following disclaimer in the
19 // documentation and/or other materials provided with the distribution.
20 //
21 // 3. Neither the name of the Corporation nor the names of the
22 // contributors may be used to endorse or promote products derived from
23 // this software without specific prior written permission.
24 //
25 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
26 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
29 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
30 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
31 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
32 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
33 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
34 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
35 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36 //
37 // Questions? Contact Michael A. Heroux (maherou@sandia.gov)
38 //
39 // ***********************************************************************
40 //@HEADER
41 */
42 
43 #ifndef IFPACK2_EXPERIMENTAL_CRSRBILUK_DEF_HPP
44 #define IFPACK2_EXPERIMENTAL_CRSRBILUK_DEF_HPP
45 
46 #include <Tpetra_Experimental_BlockMultiVector.hpp>
47 #include<Ifpack2_OverlappingRowMatrix.hpp>
48 #include<Ifpack2_LocalFilter.hpp>
49 #include <Ifpack2_Experimental_RBILUK.hpp>
50 #include <Ifpack2_Utilities.hpp>
51 #include <Ifpack2_RILUK.hpp>
52 // Need to include this if using the "classic" version of Tpetra,
53 // since it may not get included in that case.
54 #include <Kokkos_ArithTraits.hpp>
55 
56 namespace Ifpack2 {
57 
58 namespace Experimental {
59 
60 template<class MatrixType>
61 RBILUK<MatrixType>::RBILUK (const Teuchos::RCP<const row_matrix_type>& Matrix_in)
62  : RILUK<row_matrix_type>(Teuchos::rcp_dynamic_cast<const row_matrix_type>(Matrix_in) ),
63  A_(Matrix_in),
64  A_block_(Teuchos::rcp_dynamic_cast<const block_crs_matrix_type>(Matrix_in))
65 {}
66 
67 template<class MatrixType>
68 RBILUK<MatrixType>::RBILUK (const Teuchos::RCP<const block_crs_matrix_type>& Matrix_in)
69  : RILUK<row_matrix_type>(Teuchos::rcp_dynamic_cast<const row_matrix_type>(Matrix_in) ),
70  A_block_(Matrix_in)
71 {}
72 
73 
74 template<class MatrixType>
76 
77 
78 template<class MatrixType>
79 void
80 RBILUK<MatrixType>::setMatrix (const Teuchos::RCP<const block_crs_matrix_type>& A)
81 {
82  // It's legal for A to be null; in that case, you may not call
83  // initialize() until calling setMatrix() with a nonnull input.
84  // Regardless, setting the matrix invalidates any previous
85  // factorization.
86  if (A.getRawPtr () != A_block_.getRawPtr ())
87  {
88  this->isAllocated_ = false;
89  this->isInitialized_ = false;
90  this->isComputed_ = false;
91  this->Graph_ = Teuchos::null;
92  L_block_ = Teuchos::null;
93  U_block_ = Teuchos::null;
94  D_block_ = Teuchos::null;
95  A_block_ = A;
96  }
97 }
98 
99 
100 
101 template<class MatrixType>
102 const typename RBILUK<MatrixType>::block_crs_matrix_type&
104 {
105  TEUCHOS_TEST_FOR_EXCEPTION(
106  L_block_.is_null (), std::runtime_error, "Ifpack2::RILUK::getL: The L factor "
107  "is null. Please call initialize() (and preferably also compute()) "
108  "before calling this method. If the input matrix has not yet been set, "
109  "you must first call setMatrix() with a nonnull input matrix before you "
110  "may call initialize() or compute().");
111  return *L_block_;
112 }
113 
114 
115 template<class MatrixType>
116 const typename RBILUK<MatrixType>::block_crs_matrix_type&
118 {
119  TEUCHOS_TEST_FOR_EXCEPTION(
120  D_block_.is_null (), std::runtime_error, "Ifpack2::RILUK::getD: The D factor "
121  "(of diagonal entries) is null. Please call initialize() (and "
122  "preferably also compute()) before calling this method. If the input "
123  "matrix has not yet been set, you must first call setMatrix() with a "
124  "nonnull input matrix before you may call initialize() or compute().");
125  return *D_block_;
126 }
127 
128 
129 template<class MatrixType>
130 const typename RBILUK<MatrixType>::block_crs_matrix_type&
132 {
133  TEUCHOS_TEST_FOR_EXCEPTION(
134  U_block_.is_null (), std::runtime_error, "Ifpack2::RILUK::getU: The U factor "
135  "is null. Please call initialize() (and preferably also compute()) "
136  "before calling this method. If the input matrix has not yet been set, "
137  "you must first call setMatrix() with a nonnull input matrix before you "
138  "may call initialize() or compute().");
139  return *U_block_;
140 }
141 
142 template<class MatrixType>
144 {
145  using Teuchos::null;
146  using Teuchos::rcp;
147 
148  if (! this->isAllocated_) {
149  // Deallocate any existing storage. This avoids storing 2x
150  // memory, since RCP op= does not deallocate until after the
151  // assignment.
152  this->L_ = null;
153  this->U_ = null;
154  this->D_ = null;
155  L_block_ = null;
156  U_block_ = null;
157  D_block_ = null;
158 
159  // Allocate Matrix using ILUK graphs
160  L_block_ = rcp(new block_crs_matrix_type(*this->Graph_->getL_Graph (), blockSize_) );
161  U_block_ = rcp(new block_crs_matrix_type(*this->Graph_->getU_Graph (), blockSize_) );
162  D_block_ = rcp(new block_crs_matrix_type(*(Ifpack2::Details::computeDiagonalGraph(*(this->Graph_->getOverlapGraph()))),
163  blockSize_) );
164  L_block_->setAllToScalar (STM::zero ()); // Zero out L and U matrices
165  U_block_->setAllToScalar (STM::zero ());
166  D_block_->setAllToScalar (STM::zero ());
167 
168  }
169  this->isAllocated_ = true;
170 }
171 
172 template<class MatrixType>
173 Teuchos::RCP<const typename RBILUK<MatrixType>::block_crs_matrix_type>
175  return A_block_;
176 }
177 
178 template<class MatrixType>
180 {
181  using Teuchos::RCP;
182  using Teuchos::rcp;
183  using Teuchos::rcp_dynamic_cast;
184 
185  if (A_block_.is_null()) {
186  TEUCHOS_TEST_FOR_EXCEPTION(A_.is_null(), std::runtime_error, "Ifpack2::Experimental::RBILUK::initialize: "
187  "The matrix is null. Please call setMatrix() with a nonnull input before calling this method.");
188  RCP<const LocalFilter<row_matrix_type> > filteredA = rcp_dynamic_cast<const LocalFilter<row_matrix_type> >(A_);
189  TEUCHOS_TEST_FOR_EXCEPTION(filteredA.is_null(), std::runtime_error, "Ifpack2::Experimental::RBILUK::initialize: "
190  "Cannot cast to filtered matrix.");
191  RCP<const OverlappingRowMatrix<row_matrix_type> > overlappedA = rcp_dynamic_cast<const OverlappingRowMatrix<row_matrix_type> >(filteredA->getUnderlyingMatrix());
192  if (overlappedA != Teuchos::null) {
193  A_block_ = rcp_dynamic_cast<const block_crs_matrix_type>(overlappedA->getUnderlyingMatrix());
194  } else {
195  //If there is no overlap, filteredA could be the block CRS matrix
196  A_block_ = rcp_dynamic_cast<const block_crs_matrix_type>(filteredA->getUnderlyingMatrix());
197  }
198  }
199 
200  TEUCHOS_TEST_FOR_EXCEPTION(
201  A_block_.is_null (), std::runtime_error, "Ifpack2::Experimental::RBILUK::initialize: "
202  "The matrix is null. Please call setMatrix() with a nonnull input "
203  "before calling this method.");
204 
205  blockSize_ = A_block_->getBlockSize();
206 
207  Teuchos::Time timer ("RBILUK::initialize");
208  { // Start timing
209  Teuchos::TimeMonitor timeMon (timer);
210 
211  // Calling initialize() means that the user asserts that the graph
212  // of the sparse matrix may have changed. We must not just reuse
213  // the previous graph in that case.
214  //
215  // Regarding setting isAllocated_ to false: Eventually, we may want
216  // some kind of clever memory reuse strategy, but it's always
217  // correct just to blow everything away and start over.
218  this->isInitialized_ = false;
219  this->isAllocated_ = false;
220  this->isComputed_ = false;
221  this->Graph_ = Teuchos::null;
222 
223  typedef Tpetra::CrsGraph<local_ordinal_type,
225  node_type> crs_graph_type;
226 
227  RCP<const crs_graph_type> matrixCrsGraph = Teuchos::rcpFromRef(A_block_->getCrsGraph() );
228  this->Graph_ = rcp (new Ifpack2::IlukGraph<crs_graph_type> (matrixCrsGraph,
229  this->LevelOfFill_, 0));
230 
231  this->Graph_->initialize ();
232  allocate_L_and_U_blocks ();
233  initAllValues (*A_block_);
234  } // Stop timing
235 
236  this->isInitialized_ = true;
237  this->numInitialize_ += 1;
238  this->initializeTime_ += timer.totalElapsedTime ();
239 }
240 
241 
242 template<class MatrixType>
243 void
245 initAllValues (const block_crs_matrix_type& A)
246 {
247  //using Teuchos::ArrayRCP;
248  using Teuchos::RCP;
249  typedef Tpetra::Map<local_ordinal_type,global_ordinal_type,node_type> map_type;
250 
251  local_ordinal_type NumIn = 0, NumL = 0, NumU = 0;
252  bool DiagFound = false;
253  size_t NumNonzeroDiags = 0;
254  size_t MaxNumEntries = A.getNodeMaxNumRowEntries();
255  local_ordinal_type blockMatSize = blockSize_*blockSize_;
256 
257  // First check that the local row map ordering is the same as the local portion of the column map.
258  // The extraction of the strictly lower/upper parts of A, as well as the factorization,
259  // implicitly assume that this is the case.
260  Teuchos::ArrayView<const global_ordinal_type> rowGIDs = A.getRowMap()->getNodeElementList();
261  Teuchos::ArrayView<const global_ordinal_type> colGIDs = A.getColMap()->getNodeElementList();
262  bool gidsAreConsistentlyOrdered=true;
263  global_ordinal_type indexOfInconsistentGID=0;
264  for (global_ordinal_type i=0; i<rowGIDs.size(); ++i) {
265  if (rowGIDs[i] != colGIDs[i]) {
266  gidsAreConsistentlyOrdered=false;
267  indexOfInconsistentGID=i;
268  break;
269  }
270  }
271  TEUCHOS_TEST_FOR_EXCEPTION(gidsAreConsistentlyOrdered==false, std::runtime_error,
272  "The ordering of the local GIDs in the row and column maps is not the same"
273  << std::endl << "at index " << indexOfInconsistentGID
274  << ". Consistency is required, as all calculations are done with"
275  << std::endl << "local indexing.");
276 
277  // Allocate temporary space for extracting the strictly
278  // lower and upper parts of the matrix A.
279  Teuchos::Array<local_ordinal_type> LI(MaxNumEntries);
280  Teuchos::Array<local_ordinal_type> UI(MaxNumEntries);
281  Teuchos::Array<scalar_type> LV(MaxNumEntries*blockMatSize);
282  Teuchos::Array<scalar_type> UV(MaxNumEntries*blockMatSize);
283 
284  Teuchos::Array<scalar_type> diagValues(blockMatSize);
285 
286  L_block_->setAllToScalar (STM::zero ()); // Zero out L and U matrices
287  U_block_->setAllToScalar (STM::zero ());
288  D_block_->setAllToScalar (STM::zero ()); // Set diagonal values to zero
289 
290  RCP<const map_type> rowMap = L_block_->getRowMap ();
291 
292  // First we copy the user's matrix into L and U, regardless of fill level.
293  // It is important to note that L and U are populated using local indices.
294  // This means that if the row map GIDs are not monotonically increasing
295  // (i.e., permuted or gappy), then the strictly lower (upper) part of the
296  // matrix is not the one that you would get if you based L (U) on GIDs.
297  // This is ok, as the *order* of the GIDs in the rowmap is a better
298  // expression of the user's intent than the GIDs themselves.
299 
300  for (size_t myRow=0; myRow<A.getNodeNumRows(); ++myRow) {
301  local_ordinal_type local_row = myRow;
302 
303  //TODO JJH 4April2014 An optimization is to use getLocalRowView. Not all matrices support this,
304  // we'd need to check via the Tpetra::RowMatrix method supportsRowViews().
305  const local_ordinal_type * InI = 0;
306  scalar_type * InV = 0;
307  A.getLocalRowView(local_row, InI, InV, NumIn);
308 
309  // Split into L and U (we don't assume that indices are ordered).
310 
311  NumL = 0;
312  NumU = 0;
313  DiagFound = false;
314 
315  for (local_ordinal_type j = 0; j < NumIn; ++j) {
316  const local_ordinal_type k = InI[j];
317  const local_ordinal_type blockOffset = blockMatSize*j;
318 
319  if (k == local_row) {
320  DiagFound = true;
321  // Store perturbed diagonal in Tpetra::Vector D_
322  for (local_ordinal_type jj = 0; jj < blockMatSize; ++jj)
323  diagValues[jj] = this->Rthresh_ * InV[blockOffset+jj] + IFPACK2_SGN(InV[blockOffset+jj]) * this->Athresh_;
324  D_block_->replaceLocalValues(local_row, &InI[j], diagValues.getRawPtr(), 1);
325  }
326  else if (k < 0) { // Out of range
327  TEUCHOS_TEST_FOR_EXCEPTION(
328  true, std::runtime_error, "Ifpack2::RILUK::initAllValues: current "
329  "GID k = " << k << " < 0. I'm not sure why this is an error; it is "
330  "probably an artifact of the undocumented assumptions of the "
331  "original implementation (likely copied and pasted from Ifpack). "
332  "Nevertheless, the code I found here insisted on this being an error "
333  "state, so I will throw an exception here.");
334  }
335  else if (k < local_row) {
336  LI[NumL] = k;
337  const local_ordinal_type LBlockOffset = NumL*blockMatSize;
338  for (local_ordinal_type jj = 0; jj < blockMatSize; ++jj)
339  LV[LBlockOffset+jj] = InV[blockOffset+jj];
340  NumL++;
341  }
342  else if (Teuchos::as<size_t>(k) <= rowMap->getNodeNumElements()) {
343  UI[NumU] = k;
344  const local_ordinal_type UBlockOffset = NumU*blockMatSize;
345  for (local_ordinal_type jj = 0; jj < blockMatSize; ++jj)
346  UV[UBlockOffset+jj] = InV[blockOffset+jj];
347  NumU++;
348  }
349  }
350 
351  // Check in things for this row of L and U
352 
353  if (DiagFound) {
354  ++NumNonzeroDiags;
355  } else
356  {
357  for (local_ordinal_type jj = 0; jj < blockSize_; ++jj)
358  diagValues[jj*(blockSize_+1)] = this->Athresh_;
359  D_block_->replaceLocalValues(local_row, &local_row, diagValues.getRawPtr(), 1);
360  }
361 
362  if (NumL) {
363  L_block_->replaceLocalValues(local_row, &LI[0], &LV[0], NumL);
364  }
365 
366  if (NumU) {
367  U_block_->replaceLocalValues(local_row, &UI[0], &UV[0], NumU);
368  }
369  }
370 
371  this->isInitialized_ = true;
372 }
373 
374 template<class MatrixType>
376 {
377  // initialize() checks this too, but it's easier for users if the
378  // error shows them the name of the method that they actually
379  // called, rather than the name of some internally called method.
380  TEUCHOS_TEST_FOR_EXCEPTION(
381  A_block_.is_null (), std::runtime_error, "Ifpack2::Experimental::RBILUK::compute: "
382  "The matrix is null. Please call setMatrix() with a nonnull input "
383  "before calling this method.");
384 
385  if (! this->isInitialized ()) {
386  initialize (); // Don't count this in the compute() time
387  }
388 
389  typedef typename Tpetra::Details::GetLapackType<impl_scalar_type>::lapack_scalar_type LST;
390  typedef typename Tpetra::Details::GetLapackType<impl_scalar_type>::lapack_type lapack_type;
391 
392  lapack_type lapack;
393 
394  Teuchos::Time timer ("RBILUK::compute");
395  { // Start timing
396  this->isComputed_ = false;
397 
398  // MinMachNum should be officially defined, for now pick something a little
399  // bigger than IEEE underflow value
400 
401 // const scalar_type MinDiagonalValue = STS::rmin ();
402 // const scalar_type MaxDiagonalValue = STS::one () / MinDiagonalValue;
403  initAllValues (*A_block_);
404 
405  size_t NumIn;
406  local_ordinal_type NumL, NumU, NumURead;
407 
408  // Get Maximum Row length
409  const size_t MaxNumEntries =
410  L_block_->getNodeMaxNumRowEntries () + U_block_->getNodeMaxNumRowEntries () + 1;
411 
412  const local_ordinal_type blockMatSize = blockSize_*blockSize_;
413 
414  const local_ordinal_type rowStride = blockSize_;
415  const local_ordinal_type colStride = 1;
416 
417  Teuchos::Array<int> ipiv(blockSize_);
418  Teuchos::Array<LST> work(1);
419 
420  size_t num_cols = U_block_->getColMap()->getNodeNumElements();
421  Teuchos::Array<int> colflag(num_cols);
422 
423  Teuchos::Array<impl_scalar_type> diagMod(blockMatSize,STM::zero());
424  little_block_type diagModBlock(&diagMod[0], blockSize_, rowStride, colStride);
425  Teuchos::Array<impl_scalar_type> matTmpArray(blockMatSize,STM::zero());
426  little_block_type matTmp(&matTmpArray[0], blockSize_, rowStride, colStride);
427  Teuchos::Array<impl_scalar_type> multiplierArray(blockMatSize, STM::zero());
428  little_block_type multiplier(&multiplierArray[0], blockSize_, rowStride, colStride);
429 
430 // Teuchos::ArrayRCP<scalar_type> DV = D_->get1dViewNonConst(); // Get view of diagonal
431 
432  // Now start the factorization.
433 
434  // Need some integer workspace and pointers
435  local_ordinal_type NumUU;
436  for (size_t j = 0; j < num_cols; ++j) {
437  colflag[j] = -1;
438  }
439  Teuchos::Array<local_ordinal_type> InI(MaxNumEntries, 0);
440  Teuchos::Array<scalar_type> InV(MaxNumEntries*blockMatSize,STM::zero());
441 
442  const local_ordinal_type numLocalRows = L_block_->getNodeNumRows ();
443  for (local_ordinal_type local_row = 0; local_row < numLocalRows; ++local_row) {
444 
445  // Fill InV, InI with current row of L, D and U combined
446 
447  NumIn = MaxNumEntries;
448  const local_ordinal_type * colValsL;
449  scalar_type * valsL;
450 
451  L_block_->getLocalRowView(local_row, colValsL, valsL, NumL);
452  for (local_ordinal_type j = 0; j < NumL; ++j)
453  {
454  const local_ordinal_type matOffset = blockMatSize*j;
455  little_block_type lmat(&valsL[matOffset],blockSize_,rowStride, colStride);
456  little_block_type lmatV(&InV[matOffset],blockSize_,rowStride, colStride);
457  lmatV.assign(lmat);
458  InI[j] = colValsL[j];
459  }
460 
461  little_block_type dmat = D_block_->getLocalBlock(local_row, local_row);
462  little_block_type dmatV(&InV[NumL*blockMatSize], blockSize_, rowStride, colStride);
463  dmatV.assign(dmat);
464  InI[NumL] = local_row;
465 
466  const local_ordinal_type * colValsU;
467  scalar_type * valsU;
468  U_block_->getLocalRowView(local_row, colValsU, valsU, NumURead);
469  NumU = 0;
470  for (local_ordinal_type j = 0; j < NumURead; ++j)
471  {
472  if (!(colValsU[j] < numLocalRows)) continue;
473  InI[NumL+1+j] = colValsU[j];
474  const local_ordinal_type matOffset = blockMatSize*(NumL+1+j);
475  little_block_type umat(&valsU[blockMatSize*j], blockSize_, rowStride, colStride);
476  little_block_type umatV(&InV[matOffset], blockSize_, rowStride, colStride);
477  umatV.assign(umat);
478  NumU += 1;
479  }
480  NumIn = NumL+NumU+1;
481 
482  // Set column flags
483  for (size_t j = 0; j < NumIn; ++j) {
484  colflag[InI[j]] = j;
485  }
486 
487 
488  scalar_type diagmod = STM::zero (); // Off-diagonal accumulator
489  diagModBlock.fill(diagmod);
490 
491  for (local_ordinal_type jj = 0; jj < NumL; ++jj) {
492  local_ordinal_type j = InI[jj];
493  little_block_type currentVal(&InV[jj*blockMatSize], blockSize_, rowStride, colStride); // current_mults++;
494  multiplier.assign(currentVal);
495 
496  const little_block_type dmatInverse = D_block_->getLocalBlock(j,j);
497  blockMatOpts.square_matrix_matrix_multiply(reinterpret_cast<impl_scalar_type*>(currentVal.getRawPtr()), reinterpret_cast<impl_scalar_type*>(dmatInverse.getRawPtr()), reinterpret_cast<impl_scalar_type*>(matTmp.getRawPtr()), blockSize_);
498  currentVal.assign(matTmp);
499 
500  const local_ordinal_type * UUI;
501  scalar_type * UUV;
502  U_block_->getLocalRowView(j, UUI, UUV, NumUU);
503 
504  if (this->RelaxValue_ == STM::zero ()) {
505  for (local_ordinal_type k = 0; k < NumUU; ++k) {
506  if (!(UUI[k] < numLocalRows)) continue;
507  const int kk = colflag[UUI[k]];
508  if (kk > -1) {
509  little_block_type kkval(&InV[kk*blockMatSize], blockSize_, rowStride, colStride);
510  little_block_type uumat(&UUV[k*blockMatSize], blockSize_, rowStride, colStride);
511  blockMatOpts.square_matrix_matrix_multiply(reinterpret_cast<impl_scalar_type*>(multiplier.getRawPtr()), reinterpret_cast<impl_scalar_type*>(uumat.getRawPtr()), reinterpret_cast<impl_scalar_type*>(kkval.getRawPtr()), blockSize_, -STM::one(), STM::one());
512  }
513  }
514  }
515  else {
516  for (local_ordinal_type k = 0; k < NumUU; ++k) {
517  if (!(UUI[k] < numLocalRows)) continue;
518  const int kk = colflag[UUI[k]];
519  little_block_type uumat(&UUV[k*blockMatSize], blockSize_, rowStride, colStride);
520  if (kk > -1) {
521  little_block_type kkval(&InV[kk*blockMatSize], blockSize_, rowStride, colStride);
522  blockMatOpts.square_matrix_matrix_multiply(reinterpret_cast<impl_scalar_type*>(multiplier.getRawPtr()), reinterpret_cast<impl_scalar_type*>(uumat.getRawPtr()), reinterpret_cast<impl_scalar_type*>(kkval.getRawPtr()), blockSize_, -STM::one(), STM::one());
523  }
524  else {
525  blockMatOpts.square_matrix_matrix_multiply(reinterpret_cast<impl_scalar_type*>(multiplier.getRawPtr()), reinterpret_cast<impl_scalar_type*>(uumat.getRawPtr()), reinterpret_cast<impl_scalar_type*>(diagModBlock.getRawPtr()), blockSize_, -STM::one(), STM::one());
526  }
527  }
528  }
529  }
530  if (NumL) {
531  // Replace current row of L
532  L_block_->replaceLocalValues(local_row, InI.getRawPtr(), InV.getRawPtr(), NumL);
533  }
534 
535  dmat.assign(dmatV);
536 
537  if (this->RelaxValue_ != STM::zero ()) {
538  dmat.update(this->RelaxValue_, diagModBlock);
539  }
540 
541 // if (STS::magnitude (DV[i]) > STS::magnitude (MaxDiagonalValue)) {
542 // if (STS::real (DV[i]) < STM::zero ()) {
543 // DV[i] = -MinDiagonalValue;
544 // }
545 // else {
546 // DV[i] = MinDiagonalValue;
547 // }
548 // }
549 // else
550  {
551 
552  LST* const d_raw = reinterpret_cast<LST*> (dmat.getRawPtr());
553  int lapackInfo;
554  for (int k = 0; k < blockSize_; ++k) {
555  ipiv[k] = 0;
556  }
557 
558  lapack.GETRF(blockSize_, blockSize_, d_raw, blockSize_, ipiv.getRawPtr(), &lapackInfo);
559  TEUCHOS_TEST_FOR_EXCEPTION(
560  lapackInfo != 0, std::runtime_error, "Ifpack2::Experimental::RBILUK::compute: "
561  "lapackInfo = " << lapackInfo << " which indicates an error in the factorization GETRF.");
562 
563  int lwork = -1;
564  lapack.GETRI(blockSize_, d_raw, blockSize_, ipiv.getRawPtr(), work.getRawPtr(), lwork, &lapackInfo);
565  TEUCHOS_TEST_FOR_EXCEPTION(
566  lapackInfo != 0, std::runtime_error, "Ifpack2::Experimental::RBILUK::compute: "
567  "lapackInfo = " << lapackInfo << " which indicates an error in the matrix inverse GETRI.");
568 
569  typedef typename Kokkos::Details::ArithTraits<impl_scalar_type>::mag_type ImplMagnitudeType;
570  ImplMagnitudeType worksize = Kokkos::Details::ArithTraits<impl_scalar_type>::magnitude(work[0]);
571  lwork = static_cast<int>(worksize);
572  work.resize(lwork);
573  lapack.GETRI(blockSize_, d_raw, blockSize_, ipiv.getRawPtr(), work.getRawPtr(), lwork, &lapackInfo);
574  TEUCHOS_TEST_FOR_EXCEPTION(
575  lapackInfo != 0, std::runtime_error, "Ifpack2::Experimental::RBILUK::compute: "
576  "lapackInfo = " << lapackInfo << " which indicates an error in the matrix inverse GETRI.");
577  }
578 
579  for (local_ordinal_type j = 0; j < NumU; ++j) {
580  little_block_type currentVal(&InV[(NumL+1+j)*blockMatSize], blockSize_, rowStride, colStride); // current_mults++;
581  // scale U by the diagonal inverse
582  blockMatOpts.square_matrix_matrix_multiply(reinterpret_cast<impl_scalar_type*>(dmat.getRawPtr()), reinterpret_cast<impl_scalar_type*>(currentVal.getRawPtr()), reinterpret_cast<impl_scalar_type*>(matTmp.getRawPtr()), blockSize_);
583  currentVal.assign(matTmp);
584  }
585 
586  if (NumU) {
587  // Replace current row of L and U
588  U_block_->replaceLocalValues (local_row, &InI[NumL+1], &InV[blockMatSize*(NumL+1)], NumU);
589  }
590 
591  // Reset column flags
592  for (size_t j = 0; j < num_cols; ++j) {
593  colflag[j] = -1;
594  }
595  }
596 
597  } // Stop timing
598 
599  this->isComputed_ = true;
600  this->numCompute_ += 1;
601  this->computeTime_ += timer.totalElapsedTime ();
602 }
603 
604 
605 template<class MatrixType>
606 void
608 apply (const Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& X,
609  Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& Y,
610  Teuchos::ETransp mode,
611  scalar_type alpha,
612  scalar_type beta) const
613 {
614  using Teuchos::RCP;
615  typedef Tpetra::Experimental::BlockMultiVector<scalar_type,
617  typedef Tpetra::MultiVector<scalar_type,
619 
620  TEUCHOS_TEST_FOR_EXCEPTION(
621  A_block_.is_null (), std::runtime_error, "Ifpack2::Experimental::RBILUK::apply: The matrix is "
622  "null. Please call setMatrix() with a nonnull input, then initialize() "
623  "and compute(), before calling this method.");
624  TEUCHOS_TEST_FOR_EXCEPTION(
625  ! this->isComputed (), std::runtime_error,
626  "Ifpack2::Experimental::RBILUK::apply: If you have not yet called compute(), "
627  "you must call compute() before calling this method.");
628  TEUCHOS_TEST_FOR_EXCEPTION(
629  X.getNumVectors () != Y.getNumVectors (), std::invalid_argument,
630  "Ifpack2::Experimental::RBILUK::apply: X and Y do not have the same number of columns. "
631  "X.getNumVectors() = " << X.getNumVectors ()
632  << " != Y.getNumVectors() = " << Y.getNumVectors () << ".");
633  TEUCHOS_TEST_FOR_EXCEPTION(
634  STS::isComplex && mode == Teuchos::CONJ_TRANS, std::logic_error,
635  "Ifpack2::Experimental::RBILUK::apply: mode = Teuchos::CONJ_TRANS is not implemented for "
636  "complex Scalar type. Please talk to the Ifpack2 developers to get this "
637  "fixed. There is a FIXME in this file about this very issue.");
638 
639  const local_ordinal_type blockMatSize = blockSize_*blockSize_;
640 
641  const local_ordinal_type rowStride = blockSize_;
642  const local_ordinal_type colStride = 1;
643 
644  BMV yBlock (Y, * (A_block_->getGraph ()->getDomainMap ()), blockSize_);
645  const BMV xBlock (X, * (A_block_->getColMap ()), blockSize_);
646 
647  Teuchos::Array<scalar_type> lclarray(blockSize_);
648  little_vec_type lclvec(&lclarray[0], blockSize_, colStride);
649  const scalar_type one = STM::one ();
650  const scalar_type zero = STM::zero ();
651 
652  Teuchos::Time timer ("RBILUK::apply");
653  { // Start timing
654  Teuchos::TimeMonitor timeMon (timer);
655  if (alpha == one && beta == zero) {
656  if (mode == Teuchos::NO_TRANS) { // Solve L (D (U Y)) = X for Y.
657  // Start by solving L C = X for C. C must have the same Map
658  // as D. We have to use a temp multivector, since
659  // localSolve() does not allow its input and output to alias
660  // one another.
661  //
662  // FIXME (mfh 24 Jan 2014) Cache this temp multivector.
663  const local_ordinal_type numVectors = xBlock.getNumVectors();
664  BMV cBlock (* (A_block_->getGraph ()->getDomainMap ()), blockSize_, numVectors);
665  BMV rBlock (* (A_block_->getGraph ()->getDomainMap ()), blockSize_, numVectors);
666  for (local_ordinal_type imv = 0; imv < numVectors; ++imv)
667  {
668  for (size_t i = 0; i < D_block_->getNodeNumRows(); ++i)
669  {
670  local_ordinal_type local_row = i;
671  little_vec_type xval = xBlock.getLocalBlock(local_row,imv);
672  little_vec_type cval = cBlock.getLocalBlock(local_row,imv);
673  cval.assign(xval);
674 
675  local_ordinal_type NumL;
676  const local_ordinal_type * colValsL;
677  scalar_type * valsL;
678 
679  L_block_->getLocalRowView(local_row, colValsL, valsL, NumL);
680 
681  for (local_ordinal_type j = 0; j < NumL; ++j)
682  {
683  local_ordinal_type col = colValsL[j];
684  little_vec_type prevVal = cBlock.getLocalBlock(col, imv);
685 
686  const local_ordinal_type matOffset = blockMatSize*j;
687  little_block_type lij(&valsL[matOffset],blockSize_,rowStride, colStride);
688 
689  cval.matvecUpdate(-one, lij, prevVal);
690  }
691  }
692  }
693 
694  // Solve D R = C. Note that D has been replaced by D^{-1} at this point.
695  D_block_->applyBlock(cBlock, rBlock);
696 
697  // Solve U Y = R.
698  for (local_ordinal_type imv = 0; imv < numVectors; ++imv)
699  {
700  const local_ordinal_type numRows = D_block_->getNodeNumRows();
701  for (local_ordinal_type i = 0; i < numRows; ++i)
702  {
703  local_ordinal_type local_row = (numRows-1)-i;
704  little_vec_type rval = rBlock.getLocalBlock(local_row,imv);
705  little_vec_type yval = yBlock.getLocalBlock(local_row,imv);
706  yval.assign(rval);
707 
708  local_ordinal_type NumU;
709  const local_ordinal_type * colValsU;
710  scalar_type * valsU;
711 
712  U_block_->getLocalRowView(local_row, colValsU, valsU, NumU);
713 
714  for (local_ordinal_type j = 0; j < NumU; ++j)
715  {
716  local_ordinal_type col = colValsU[NumU-1-j];
717  little_vec_type prevVal = yBlock.getLocalBlock(col, imv);
718 
719  const local_ordinal_type matOffset = blockMatSize*(NumU-1-j);
720  little_block_type uij(&valsU[matOffset], blockSize_, rowStride, colStride);
721 
722  yval.matvecUpdate(-one, uij, prevVal);
723  }
724  }
725  }
726  }
727  else { // Solve U^P (D^P (L^P Y)) = X for Y (where P is * or T).
728  TEUCHOS_TEST_FOR_EXCEPTION(
729  true, std::runtime_error,
730  "Ifpack2::Experimental::RBILUK::apply: transpose apply is not implemented for the block algorithm. ");
731  }
732  }
733  else { // alpha != 1 or beta != 0
734  if (alpha == zero) {
735  if (beta == zero) {
736  Y.putScalar (zero);
737  } else {
738  Y.scale (beta);
739  }
740  } else { // alpha != zero
741  MV Y_tmp (Y.getMap (), Y.getNumVectors ());
742  apply (X, Y_tmp, mode);
743  Y.update (alpha, Y_tmp, beta);
744  }
745  }
746  } // Stop timing
747 
748  this->numApply_ += 1;
749  this->applyTime_ = timer.totalElapsedTime ();
750 }
751 
752 
753 template<class MatrixType>
755 {
756  std::ostringstream os;
757 
758  // Output is a valid YAML dictionary in flow style. If you don't
759  // like everything on a single line, you should call describe()
760  // instead.
761  os << "\"Ifpack2::Experimental::RBILUK\": {";
762  os << "Initialized: " << (this->isInitialized () ? "true" : "false") << ", "
763  << "Computed: " << (this->isComputed () ? "true" : "false") << ", ";
764 
765  os << "Level-of-fill: " << this->getLevelOfFill() << ", ";
766 
767  if (A_block_.is_null ()) {
768  os << "Matrix: null";
769  }
770  else {
771  os << "Global matrix dimensions: ["
772  << A_block_->getGlobalNumRows () << ", " << A_block_->getGlobalNumCols () << "]"
773  << ", Global nnz: " << A_block_->getGlobalNumEntries();
774  }
775 
776  os << "}";
777  return os.str ();
778 }
779 
780 } // namespace Experimental
781 
782 } // namespace Ifpack2
783 
784 // FIXME (mfh 26 Aug 2015) We only need to do instantiation for
785 // MatrixType = Tpetra::RowMatrix. Conversions to BlockCrsMatrix are
786 // handled internally via dynamic cast.
787 
788 #define IFPACK2_EXPERIMENTAL_RBILUK_INSTANT(S,LO,GO,N) \
789  template class Ifpack2::Experimental::RBILUK< Tpetra::Experimental::BlockCrsMatrix<S, LO, GO, N> >; \
790  template class Ifpack2::Experimental::RBILUK< Tpetra::RowMatrix<S, LO, GO, N> >;
791 
792 #endif
Ifpack2 features that are experimental. Use at your own risk.
void initialize()
Initialize by computing the symbolic incomplete factorization.
Definition: Ifpack2_Experimental_RBILUK_def.hpp:179
MatrixType::global_ordinal_type global_ordinal_type
The type of global indices in the input MatrixType.
Definition: Ifpack2_Experimental_RBILUK_decl.hpp:179
virtual ~RBILUK()
Destructor (declared virtual for memory safety).
Definition: Ifpack2_Experimental_RBILUK_def.hpp:75
Teuchos::RCP< const block_crs_matrix_type > getBlockMatrix() const
Get the input matrix.
Definition: Ifpack2_Experimental_RBILUK_def.hpp:174
const block_crs_matrix_type & getUBlock() const
Return the U factor of the ILU factorization.
Definition: Ifpack2_Experimental_RBILUK_def.hpp:131
MatrixType::scalar_type scalar_type
The type of the entries of the input MatrixType.
Definition: Ifpack2_Experimental_RBILUK_decl.hpp:170
Teuchos::RCP< crs_matrix_type > L_
The L (lower triangular) factor of ILU(k).
Definition: Ifpack2_RILUK_decl.hpp:571
void apply(const Tpetra::MultiVector< scalar_type, local_ordinal_type, global_ordinal_type, node_type > &X, Tpetra::MultiVector< scalar_type, local_ordinal_type, global_ordinal_type, node_type > &Y, Teuchos::ETransp mode=Teuchos::NO_TRANS, scalar_type alpha=Teuchos::ScalarTraits< scalar_type >::one(), scalar_type beta=Teuchos::ScalarTraits< scalar_type >::zero()) const
Apply the (inverse of the) incomplete factorization to X, resulting in Y.
Definition: Ifpack2_Experimental_RBILUK_def.hpp:608
ILU(k) factorization of a given Tpetra::RowMatrix.
Definition: Ifpack2_RILUK_decl.hpp:243
MatrixType::node_type node_type
The Node type used by the input MatrixType.
Definition: Ifpack2_Experimental_RBILUK_decl.hpp:182
bool isComputed() const
Whether compute() has been called on this object.
Definition: Ifpack2_RILUK_decl.hpp:344
MatrixType::local_ordinal_type local_ordinal_type
The type of local indices in the input MatrixType.
Definition: Ifpack2_Experimental_RBILUK_decl.hpp:176
const block_crs_matrix_type & getDBlock() const
Return the diagonal entries of the ILU factorization.
Definition: Ifpack2_Experimental_RBILUK_def.hpp:117
void compute()
Compute the (numeric) incomplete factorization.
Definition: Ifpack2_Experimental_RBILUK_def.hpp:375
Teuchos::RCP< Ifpack2::IlukGraph< Tpetra::CrsGraph< local_ordinal_type, global_ordinal_type, node_type > > > Graph_
The ILU(k) graph.
Definition: Ifpack2_RILUK_decl.hpp:559
File for utility functions.
Construct a level filled graph for use in computing an ILU(k) incomplete factorization.
Definition: Ifpack2_IlukGraph.hpp:97
bool isInitialized() const
Whether initialize() has been called on this object.
Definition: Ifpack2_RILUK_decl.hpp:340
void setMatrix(const Teuchos::RCP< const block_crs_matrix_type > &A)
Change the matrix to be preconditioned.
Definition: Ifpack2_Experimental_RBILUK_def.hpp:80
Definition: Ifpack2_Details_Amesos2Wrapper_decl.hpp:63
Teuchos::RCP< vec_type > D_
The diagonal entries of the ILU(k) factorization.
Definition: Ifpack2_RILUK_decl.hpp:575
const block_crs_matrix_type & getLBlock() const
Return the L factor of the ILU factorization.
Definition: Ifpack2_Experimental_RBILUK_def.hpp:103
Sparse matrix (Tpetra::RowMatrix subclass) with ghost rows.
Definition: Ifpack2_OverlappingRowMatrix_decl.hpp:59
Access only local rows and columns of a sparse matrix.
Definition: Ifpack2_LocalFilter_decl.hpp:160
Tpetra::RowMatrix< scalar_type, local_ordinal_type, global_ordinal_type, node_type > row_matrix_type
Tpetra::RowMatrix specialization used by this class.
Definition: Ifpack2_Experimental_RBILUK_decl.hpp:191
std::string description() const
A one-line description of this object.
Definition: Ifpack2_Experimental_RBILUK_def.hpp:754
Preconditioners and smoothers for Tpetra sparse matrices.
Definition: Ifpack2_AdditiveSchwarz_decl.hpp:72
ILU(k) factorization of a given Tpetra::Experimental::BlockCrsMatrix.
Definition: Ifpack2_Experimental_RBILUK_decl.hpp:162
Teuchos::RCP< crs_matrix_type > U_
The U (upper triangular) factor of ILU(k).
Definition: Ifpack2_RILUK_decl.hpp:573