45#include "CombBLAS/CombBLAS.h"
50 template <
class IT,
class NT>
51 std::tuple<IT,IT,NT>*
ExchangeData(std::vector<std::vector<std::tuple<IT,IT,NT>>> & tempTuples, MPI_Comm World,
IT& datasize)
54 MPI_Datatype MPI_tuple;
55 MPI_Type_contiguous(
sizeof(std::tuple<IT,IT,NT>), MPI_CHAR, &MPI_tuple);
56 MPI_Type_commit(&MPI_tuple);
59 MPI_Comm_size(World, &
nprocs);
61 int * sendcnt =
new int[
nprocs];
62 int * recvcnt =
new int[
nprocs];
63 int * sdispls =
new int[
nprocs]();
64 int * rdispls =
new int[
nprocs]();
70 sendcnt[i] = tempTuples[i].size();
71 totsend += tempTuples[i].size();
74 MPI_Alltoall(sendcnt, 1, MPI_INT, recvcnt, 1, MPI_INT, World);
76 std::partial_sum(sendcnt, sendcnt+
nprocs-1, sdispls+1);
77 std::partial_sum(recvcnt, recvcnt+
nprocs-1, rdispls+1);
78 IT totrecv = std::accumulate(recvcnt,recvcnt+
nprocs,
static_cast<IT>(0));
80 std::vector< std::tuple<IT,IT,NT> > sendTuples(totsend);
81 for(
int i=0; i<
nprocs; ++i)
83 copy(tempTuples[i].begin(), tempTuples[i].end(), sendTuples.data()+sdispls[i]);
84 std::vector< std::tuple<IT,IT,NT> >().swap(tempTuples[i]);
87 std::tuple<IT,IT,NT>* recvTuples =
new std::tuple<IT,IT,NT>[totrecv];
89 MPI_Alltoallv(sendTuples.data(), sendcnt, sdispls, MPI_tuple, recvTuples, recvcnt, rdispls, MPI_tuple, World);
90 DeleteAll(sendcnt, recvcnt, sdispls, rdispls);
91 MPI_Type_free(&MPI_tuple);
96 template <
class IT,
class NT,
class DER>
97 void SpecialExchangeData( std::vector<DER> & sendChunks, MPI_Comm World,
IT& datasize,
NT dummy, vector<DER> & recvChunks){
99 MPI_Comm_rank(MPI_COMM_WORLD,&myrank);
100 double vm_usage, resident_set;
101 typedef typename DER::LocalIT LIT;
102 int numChunks = sendChunks.size();
104 MPI_Datatype MPI_tuple;
105 MPI_Type_contiguous(
sizeof(std::tuple<LIT,LIT,NT>), MPI_CHAR, &MPI_tuple);
106 MPI_Type_commit(&MPI_tuple);
108 int * sendcnt =
new int[numChunks];
109 int * sendprfl =
new int[numChunks*3];
110 int * sdispls =
new int[numChunks]();
111 int * recvcnt =
new int[numChunks];
112 int * recvprfl =
new int[numChunks*3];
113 int * rdispls =
new int[numChunks]();
116 for(
IT i=0; i<numChunks; ++i){
117 sendprfl[i*3] = sendChunks[i].getnnz();
118 sendprfl[i*3+1] = sendChunks[i].getnrow();
119 sendprfl[i*3+2] = sendChunks[i].getncol();
120 sendcnt[i] = sendprfl[i*3];
121 totsend += sendcnt[i];
124 MPI_Alltoall(sendprfl, 3, MPI_INT, recvprfl, 3, MPI_INT, World);
125 for(
int i = 0; i < numChunks; i++) recvcnt[i] = recvprfl[i*3];
127 std::partial_sum(sendcnt, sendcnt+numChunks-1, sdispls+1);
128 std::partial_sum(recvcnt, recvcnt+numChunks-1, rdispls+1);
129 IT totrecv = std::accumulate(recvcnt,recvcnt+numChunks,
static_cast<IT>(0));
131 std::tuple<LIT,LIT,NT>* sendTuples =
new std::tuple<LIT,LIT,NT>[totsend];
132 std::tuple<LIT,LIT,NT>* recvTuples =
new std::tuple<LIT,LIT,NT>[totrecv];
135 for(
int i = 0; i < numChunks; i++){
136 for(
typename DER::SpColIter colit = sendChunks[i].begcol(); colit != sendChunks[i].endcol(); ++colit){
137 for(
typename DER::SpColIter::NzIter nzit = sendChunks[i].begnz(colit); nzit != sendChunks[i].endnz(colit); ++nzit){
138 NT val = nzit.value();
139 sendTuples[kk++] = std::make_tuple(nzit.rowid(), colit.colid(), nzit.value());
144 MPI_Alltoallv(sendTuples, sendcnt, sdispls, MPI_tuple, recvTuples, recvcnt, rdispls, MPI_tuple, World);
145 DeleteAll(sendcnt, sendprfl, sdispls, sendTuples);
148 tuple<LIT, LIT, NT> ** tempTuples =
new tuple<LIT, LIT, NT>*[numChunks];
149 for (
int i = 0; i < numChunks; i++){
150 tempTuples[i] =
new tuple<LIT, LIT, NT>[recvcnt[i]];
151 memcpy(tempTuples[i], recvTuples+rdispls[i], recvcnt[i]*
sizeof(tuple<LIT, LIT, NT>));
154 for (
int i = 0; i < numChunks; i++){
155 recvChunks.push_back(DER(SpTuples<LIT, NT>(recvcnt[i], recvprfl[i*3+1], recvprfl[i*3+2], tempTuples[i]),
false));
159 DeleteAll(recvcnt, recvprfl, rdispls, recvTuples);
160 MPI_Type_free(&MPI_tuple);
165 template <
class IT,
class NT,
class DER>
172 template <
class IT,
class NT,
class DER>
174 assert( (
sizeof(
IT) >=
sizeof(
typename DER::LocalIT)) );
175 commGrid3D.reset(
new CommGrid3D(MPI_COMM_WORLD, nlayers, 0, 0, special));
176 layermat.reset(
new SpParMat<IT, NT, DER>(commGrid3D->GetLayerWorld()));
179 template <
class IT,
class NT,
class DER>
180 SpParMat3D< IT,NT,DER >::SpParMat3D (DER * localMatrix, std::shared_ptr<CommGrid3D> grid3d,
bool colsplit,
bool special): commGrid3D(grid3d), colsplit(colsplit), special(special){
181 assert( (
sizeof(
IT) >=
sizeof(
typename DER::LocalIT)) );
182 MPI_Comm_size(commGrid3D->GetFiberWorld(), &nlayers);
183 layermat.reset(
new SpParMat<IT, NT, DER>(localMatrix, commGrid3D->GetLayerWorld()));
186 template <
class IT,
class NT,
class DER>
187 SpParMat3D< IT,NT,DER >::SpParMat3D (
const SpParMat< IT,NT,DER > & A2D,
int nlayers,
bool colsplit,
bool special): nlayers(nlayers), colsplit(colsplit), special(special){
188 typedef typename DER::LocalIT LIT;
189 auto commGrid2D = A2D.getcommgrid();
190 int nprocs = commGrid2D->GetSize();
191 commGrid3D.reset(
new CommGrid3D(commGrid2D->GetWorld(), nlayers, 0, 0, special));
193 DER* spSeq = A2D.seqptr();
194 std::vector<DER> localChunks;
195 int numChunks = (int)std::sqrt((
float)commGrid3D->GetGridLayers());
196 if(!colsplit) spSeq->Transpose();
197 spSeq->ColSplit(numChunks, localChunks);
199 for(
int i = 0; i < localChunks.size(); i++) localChunks[i].Transpose();
203 int sqrtLayer = (int)std::sqrt((
float)commGrid3D->GetGridLayers());
204 std::vector<DER> sendChunks(commGrid3D->GetGridLayers());
205 for(
int i = 0; i < sendChunks.size(); i++){
206 sendChunks[i] = DER(0, 0, 0, 0);
208 for(
int i = 0; i < localChunks.size(); i++){
209 int rcvRankInFiber = (colsplit) ? ( ( ( commGrid3D->GetRankInFiber() / sqrtLayer ) * sqrtLayer ) + i ) : ( ( ( commGrid3D->GetRankInFiber() % sqrtLayer ) * sqrtLayer ) + i );
210 sendChunks[rcvRankInFiber] = localChunks[i];
212 MPI_Barrier(commGrid3D->GetWorld());
214 IT datasize;
NT x = 0.0;
215 std::vector<DER> recvChunks;
218 typename DER::LocalIT concat_row = 0, concat_col = 0;
219 for(
int i = 0; i < recvChunks.size(); i++){
220 if(colsplit) recvChunks[i].Transpose();
221 concat_row = std::max(concat_row, recvChunks[i].getnrow());
222 concat_col = concat_col + recvChunks[i].getncol();
224 DER * localMatrix =
new DER(0, concat_row, concat_col, 0);
225 localMatrix->ColConcatenate(recvChunks);
226 if(colsplit) localMatrix->Transpose();
228 layermat.reset(
new SpParMat<IT, NT, DER>(localMatrix, commGrid3D->GetLayerWorld()));
231 IT nrows = A2D.getnrow();
232 IT ncols = A2D.getncol();
233 int pr2d = commGrid2D->GetGridRows();
234 int pc2d = commGrid2D->GetGridCols();
235 int rowrank2d = commGrid2D->GetRankInProcRow();
236 int colrank2d = commGrid2D->GetRankInProcCol();
237 IT m_perproc2d = nrows / pr2d;
238 IT n_perproc2d = ncols / pc2d;
239 DER* spSeq = A2D.seqptr();
240 IT localRowStart2d = colrank2d * m_perproc2d;
241 IT localColStart2d = rowrank2d * n_perproc2d;
244 std::vector<IT> tsendcnt(
nprocs,0);
245 for(
typename DER::SpColIter colit = spSeq->begcol(); colit != spSeq->endcol(); ++colit)
247 IT gcol = colit.colid() + localColStart2d;
248 for(
typename DER::SpColIter::NzIter nzit = spSeq->begnz(colit); nzit != spSeq->endnz(colit); ++nzit)
250 IT grow = nzit.rowid() + localRowStart2d;
251 int owner = Owner(nrows, ncols, grow, gcol, lrow3d, lcol3d);
256 std::vector< std::vector< std::tuple<LIT,LIT, NT> > > sendTuples (
nprocs);
257 for(
typename DER::SpColIter colit = spSeq->begcol(); colit != spSeq->endcol(); ++colit)
259 IT gcol = colit.colid() + localColStart2d;
260 for(
typename DER::SpColIter::NzIter nzit = spSeq->begnz(colit); nzit != spSeq->endnz(colit); ++nzit)
262 IT grow = nzit.rowid() + localRowStart2d;
263 NT val = nzit.value();
264 int owner = Owner(nrows, ncols, grow, gcol, lrow3d, lcol3d);
265 sendTuples[owner].push_back(std::make_tuple(lrow3d, lcol3d, val));
270 std::tuple<LIT,LIT,NT>* recvTuples =
ExchangeData(sendTuples, commGrid2D->GetWorld(), datasize);
273 LocalDim(nrows, ncols, mdim, ndim);
274 SpTuples<LIT, NT>spTuples3d(datasize, mdim, ndim, recvTuples);
275 DER * localm3d =
new DER(spTuples3d,
false);
277 layermat.reset(
new SpParMat<IT, NT, DER>(localm3d, commGrid3D->GetCommGridLayer()));
282 template <
class IT,
class NT,
class DER>
283 SpParMat3D< IT,NT,DER >::SpParMat3D (
const SpParMat3D< IT,NT,DER > &
A,
bool colsplit): colsplit(colsplit){
285 MPI_Comm_rank(MPI_COMM_WORLD,&myrank);
286 typedef typename DER::LocalIT LIT;
287 auto AcommGrid3D =
A.getcommgrid3D();
288 int nprocs = AcommGrid3D->GetSize();
289 commGrid3D.reset(
new CommGrid3D(AcommGrid3D->GetWorld(), AcommGrid3D->GetGridLayers(), 0, 0,
A.isSpecial()));
292 special =
A.isSpecial();
293 nlayers = AcommGrid3D->GetGridLayers();
295 DER * spSeq =
A.seqptr();
296 DER * localMatrix =
new DER(*spSeq);
297 if((
A.isColSplit() && !colsplit) || (!
A.isColSplit() && colsplit)){
300 std::vector<DER> sendChunks;
301 int numChunks = commGrid3D->GetGridLayers();
302 if(!colsplit) localMatrix->Transpose();
303 localMatrix->ColSplit(numChunks, sendChunks);
305 for(
int i = 0; i < sendChunks.size(); i++) sendChunks[i].Transpose();
308 IT datasize;
NT x = 71.0;
309 std::vector<DER> recvChunks;
313 typename DER::LocalIT concat_row = 0, concat_col = 0;
314 for(
int i = 0; i < recvChunks.size(); i++){
315 if(colsplit) recvChunks[i].Transpose();
316 concat_row = std::max(concat_row, recvChunks[i].getnrow());
317 concat_col = concat_col + recvChunks[i].getncol();
319 localMatrix =
new DER(0, concat_row, concat_col, 0);
320 localMatrix->ColConcatenate(recvChunks);
321 if(colsplit) localMatrix->Transpose();
329 layermat.reset(
new SpParMat<IT, NT, DER>(localMatrix, commGrid3D->GetLayerWorld()));
335 template <
class IT,
class NT,
class DER>
336 template <
typename LIT>
337 int SpParMat3D<IT,NT,DER>::Owner(
IT total_m,
IT total_n,
IT grow,
IT gcol, LIT & lrow, LIT & lcol)
const {
339 std::shared_ptr<CommGrid> commGridLayer = commGrid3D->GetCommGridLayer();
340 int procrows = commGridLayer->GetGridRows();
341 int proccols = commGridLayer->GetGridCols();
342 int nlayers = commGrid3D->GetGridLayers();
344 IT m_perproc_L0 = total_m / procrows;
345 IT n_perproc_L0 = total_n / proccols;
348 if(m_perproc_L0 != 0){
349 procrow_L0 = std::min(
static_cast<int>(grow / m_perproc_L0), procrows-1);
353 procrow_L0 = procrows -1;
356 if(n_perproc_L0 != 0){
357 proccol_L0 = std::min(
static_cast<int>(gcol / n_perproc_L0), proccols-1);
360 proccol_L0 = proccols-1;
363 IT lrow_L0 = grow - (procrow_L0 * m_perproc_L0);
364 IT lcol_L0 = gcol - (proccol_L0 * n_perproc_L0);
370 if(proccol_L0 < commGrid3D->GetGridCols()-1)
371 n_perproc = n_perproc_L0 / nlayers;
373 n_perproc = (total_n - (n_perproc_L0 * proccol_L0)) / nlayers;
376 layer = std::min(
static_cast<int>(lcol_L0 / n_perproc), nlayers-1);
381 lcol = lcol_L0 - (layer * n_perproc);
386 if(procrow_L0 < commGrid3D->GetGridRows()-1)
387 m_perproc = m_perproc_L0 / nlayers;
389 m_perproc = (total_m - (m_perproc_L0 * procrow_L0)) / nlayers;
392 layer = std::min(
static_cast<int>(lrow_L0 / m_perproc), nlayers-1);
397 lrow = lrow_L0 - (layer * m_perproc);
399 int proccol_layer = proccol_L0;
400 int procrow_layer = procrow_L0;
401 return commGrid3D->GetRank(layer, procrow_layer, proccol_layer);
404 template <
class IT,
class NT,
class DER>
405 void SpParMat3D<IT,NT,DER>::LocalDim(
IT total_m,
IT total_n,
IT &localm,
IT& localn)
const
408 std::shared_ptr<CommGrid> commGridLayer = commGrid3D->GetCommGridLayer();
409 int procrows = commGridLayer->GetGridRows();
410 int proccols = commGridLayer->GetGridCols();
411 int nlayers = commGrid3D->GetGridLayers();
413 IT localm_L0 = total_m / procrows;
414 IT localn_L0 = total_n / proccols;
416 if(commGridLayer->GetRankInProcRow() == commGrid3D->GetGridCols()-1)
418 localn_L0 = (total_n - localn_L0*(commGrid3D->GetGridCols()-1));
420 if(commGridLayer->GetRankInProcCol() == commGrid3D->GetGridRows()-1)
422 localm_L0 = (total_m - localm_L0 * (commGrid3D->GetGridRows()-1));
426 localn = localn_L0/nlayers;
427 if(commGrid3D->GetRankInFiber() == (commGrid3D->GetGridLayers()-1))
428 localn = localn_L0 - localn * (commGrid3D->GetGridLayers()-1);
433 localm = localm_L0/nlayers;
434 if(commGrid3D->GetRankInFiber() == (commGrid3D->GetGridLayers()-1))
435 localm = localm_L0 - localm * (commGrid3D->GetGridLayers()-1);
440 template <
class IT,
class NT,
class DER>
441 SpParMat<IT, NT, DER> SpParMat3D<IT, NT, DER>::Convert2D(){
442 typedef typename DER::LocalIT LIT;
444 DER * spSeq = layermat->seqptr();
445 std::vector<DER> localChunks;
446 int sqrtLayers = (int)std::sqrt((
float)commGrid3D->GetGridLayers());
447 LIT grid3dCols = commGrid3D->GetGridCols(); LIT grid3dRows = commGrid3D->GetGridRows();
448 LIT grid2dCols = grid3dCols * sqrtLayers; LIT grid2dRows = grid3dRows * sqrtLayers;
449 IT x = (colsplit) ? layermat->getnrow() : layermat->getncol();
450 LIT y = (colsplit) ? (x / grid2dRows) : (x / grid2dCols);
451 vector<LIT> divisions2d;
453 for(LIT i = 0; i < grid2dRows-1; i++) divisions2d.push_back(y);
454 divisions2d.push_back(layermat->getnrow()-(grid2dRows-1)*y);
457 for(LIT i = 0; i < grid2dCols-1; i++) divisions2d.push_back(y);
458 divisions2d.push_back(layermat->getncol()-(grid2dCols-1)*y);
460 vector<LIT> divisions2dChunk;
461 LIT start = (colsplit) ? ((commGrid3D->GetRankInLayer() / grid3dRows) * sqrtLayers) : ((commGrid3D->GetRankInLayer() % grid3dCols) * sqrtLayers);
462 LIT end = start + sqrtLayers;
463 for(LIT i = start; i < end; i++){
464 divisions2dChunk.push_back(divisions2d[i]);
466 if(colsplit) spSeq->Transpose();
467 spSeq->ColSplit(divisions2dChunk, localChunks);
469 for(
int i = 0; i < localChunks.size(); i++) localChunks[i].Transpose();
471 std::vector<DER> sendChunks(commGrid3D->GetGridLayers());
472 for(
int i = 0; i < sendChunks.size(); i++){
473 sendChunks[i] = DER(0, 0, 0, 0);
475 for(
int i = 0; i < localChunks.size(); i++){
476 int rcvRankInFiber = (colsplit) ? ( ( ( commGrid3D->GetRankInFiber() / sqrtLayers ) * sqrtLayers ) + i ) : ( ( ( commGrid3D->GetRankInFiber() % sqrtLayers ) * sqrtLayers ) + i );
477 sendChunks[rcvRankInFiber] = localChunks[i];
479 IT datasize;
NT z=1.0;
480 std::vector<DER> recvChunks;
483 LIT concat_row = 0, concat_col = 0;
484 for(
int i = 0; i < recvChunks.size(); i++){
485 if(!colsplit) recvChunks[i].Transpose();
486 concat_row = std::max(concat_row, recvChunks[i].getnrow());
487 concat_col = concat_col + recvChunks[i].getncol();
489 DER * localMatrix =
new DER(0, concat_row, concat_col, 0);
490 localMatrix->ColConcatenate(recvChunks);
491 if(!colsplit) localMatrix->Transpose();
492 std::shared_ptr<CommGrid> grid2d;
493 grid2d.reset(
new CommGrid(commGrid3D->GetWorld(), 0, 0));
494 SpParMat<IT, NT, DER> mat2D(localMatrix, grid2d);
498 int nProcs = commGrid3D->GetSize();
499 int nGridLayers = commGrid3D->GetGridLayers();
500 int nGridCols = commGrid3D->GetGridCols();
501 int nGridRows = commGrid3D->GetGridRows();
502 int rankInProcCol_L0 = commGrid3D->GetCommGridLayer()->GetRankInProcCol();
503 int rankInProcRow_L0 = commGrid3D->GetCommGridLayer()->GetRankInProcRow();
506 IT a = n / nGridCols;
507 IT b = n - (a * (nGridCols - 1));
508 IT c = m / nGridRows;
509 IT d = m - (c * (nGridRows - 1));
510 IT w = a / nGridLayers;
511 IT x = a - (w * (nGridLayers - 1));
512 IT y = b / nGridLayers;
513 IT z = b - (y * (nGridLayers - 1));
514 IT p = c / nGridLayers;
515 IT q = c - (p * (nGridLayers - 1));
516 IT r = d / nGridLayers;
517 IT s = d - (r * (nGridLayers - 1));
519 std::shared_ptr<CommGrid> grid2d;
520 grid2d.reset(
new CommGrid(commGrid3D->GetWorld(), 0, 0));
521 SpParMat<IT, NT, DER> A2D (grid2d);
523 std::vector< std::vector < std::tuple<LIT,LIT,NT> > > data(nProcs);
524 DER* spSeq = layermat->seqptr();
526 for(
typename DER::SpColIter colit = spSeq->begcol(); colit != spSeq->endcol(); ++colit){
527 LIT lcol = colit.colid();
528 for(
typename DER::SpColIter::NzIter nzit = spSeq->begnz(colit); nzit != spSeq->endnz(colit); ++nzit){
529 LIT lrow = nzit.rowid();
530 NT val = nzit.value();
531 LIT lrow_L0, lcol_L0;
535 if(commGrid3D->GetCommGridLayer()->GetRankInProcRow() < (nGridCols-1)){
537 lcol_L0 = w * commGrid3D->GetRankInFiber() + lcol;
541 lcol_L0 = y * commGrid3D->GetRankInFiber() + lcol;
547 if(commGrid3D->GetCommGridLayer()->GetRankInProcCol() < (nGridRows-1)){
549 lrow_L0 = p * commGrid3D->GetRankInFiber() + lrow;
553 lrow_L0 = r * commGrid3D->GetRankInFiber() + lrow;
556 IT grow = commGrid3D->GetCommGridLayer()->GetRankInProcCol() * c + lrow_L0;
557 IT gcol = commGrid3D->GetCommGridLayer()->GetRankInProcRow() * a + lcol_L0;
560 int owner = A2D.Owner(m, n, grow, gcol, lrow2d, lcol2d);
561 data[owner].push_back(std::make_tuple(lrow2d,lcol2d,val));
565 A2D.SparseCommon(data, locsize, m, n, maximum<NT>());
575 template <
class IT,
class NT,
class DER>
576 void SpParMat3D<IT,NT,DER>::CalculateColSplitDistributionOfLayer(vector<typename DER::LocalIT> & divisions3d){
578 vector<IT> divisions2d;
579 int sqrtLayers = (int)std::sqrt((
float)commGrid3D->GetGridLayers());
580 int grid3dCols = commGrid3D->GetGridCols();
581 int grid2dCols = grid3dCols * sqrtLayers;
582 IT x = (layermat)->getncol();
583 IT y = x / grid2dCols;
584 for(
int i = 0; i < grid2dCols-1; i++) divisions2d.push_back(y);
585 divisions2d.push_back(x-(grid2dCols-1)*y);
586 vector<IT> divisions2dChunk;
587 IT start = (commGrid3D->GetRankInLayer() % grid3dCols) * sqrtLayers;
588 IT end = start + sqrtLayers;
589 for(
int i = start; i < end; i++){
590 divisions2dChunk.push_back(divisions2d[i]);
592 for(
int i = 0; i < divisions2dChunk.size(); i++){
593 IT z = divisions2dChunk[i]/sqrtLayers;
594 for(
int j = 0; j < sqrtLayers-1; j++) divisions3d.push_back(z);
595 divisions3d.push_back(divisions2dChunk[i]-(sqrtLayers-1)*z);
600 IT x = layermat->seqptr()->getncol();
601 int nlayers = commGrid3D->GetGridLayers();
603 for(
int i = 0; i < nlayers-1; i++) divisions3d.push_back(y);
604 divisions3d.push_back(x-(nlayers-1)*y);
611 template <
class IT,
class NT,
class DER>
612 bool SpParMat3D<IT,NT,DER>::CheckSpParMatCompatibility(){
613 IT nLayerCols = layermat->getncol();
614 IT nLayerRows = layermat->getnrow();
615 IT localCols = layermat->getlocalcols();
616 IT localRows = layermat->getlocalrows();
617 int nGridCols = layermat->getcommgrid()->GetGridCols();
618 int nGridRows = layermat->getcommgrid()->GetGridRows();
619 int idxGridRow = layermat->getcommgrid()->GetRankInProcCol();
620 int idxGridCol = layermat->getcommgrid()->GetRankInProcRow();
622 x = nLayerRows / nGridRows;
623 y = (nLayerRows % nGridRows == 0) ? x : (nLayerRows - x * (nGridRows - 1));
624 a = nLayerCols / nGridCols;
625 b = (nLayerCols % nGridCols == 0) ? a : (nLayerCols - a * (nGridCols - 1));
627 if(idxGridRow == nGridRows-1){
628 if(localRows != y) flag =
false;
631 if(localRows != x) flag =
false;
633 if(idxGridCol == nGridCols-1){
634 if(localCols != b) flag =
false;
637 if(localCols != a) flag =
false;
642 template <
class IT,
class NT,
class DER>
643 IT SpParMat3D< IT,NT,DER >::getnrow()
const {
644 IT totalrows_layer = layermat->getnrow();
646 if(!colsplit) MPI_Allreduce( &totalrows_layer, &totalrows, 1, MPIType<IT>(), MPI_SUM, commGrid3D->GetFiberWorld());
647 else totalrows = totalrows_layer;
652 template <
class IT,
class NT,
class DER>
653 IT SpParMat3D< IT,NT,DER >::getncol()
const {
654 IT totalcols_layer = layermat->getncol();
656 if(colsplit) MPI_Allreduce( &totalcols_layer, &totalcols, 1, MPIType<IT>(), MPI_SUM, commGrid3D->GetFiberWorld());
657 else totalcols = totalcols_layer;
662 template <
class IT,
class NT,
class DER>
663 IT SpParMat3D< IT,NT,DER >::getnnz()
const {
664 IT totalnz_layer = layermat->getnnz();
666 MPI_Allreduce( &totalnz_layer, &totalnz, 1, MPIType<IT>(), MPI_SUM, commGrid3D->GetFiberWorld());
Mac OS X ATTR com apple quarantine q
std::vector< std::tuple< IT, IT, NT > > ExchangeData(std::vector< std::vector< std::tuple< IT, IT, NT > > > &tempTuples, MPI_Comm World)
void SpecialExchangeData(std::vector< DER > &sendChunks, MPI_Comm World, IT &datasize, NT dummy, vector< DER > &recvChunks)