COMBINATORIAL_BLAS 1.6
 
Loading...
Searching...
No Matches
SpParMat3D.cpp
Go to the documentation of this file.
1/****************************************************************/
2/* Parallel Combinatorial BLAS Library (for Graph Computations) */
3/* date: 6/15/2017 ---------------------------------------------*/
4/* authors: Ariful Azad, Aydin Buluc --------------------------*/
5/****************************************************************/
6/*
7 Copyright (c) 2010-2017, The Regents of the University of California
8
9 Permission is hereby granted, free of charge, to any person obtaining a copy
10 of this software and associated documentation files (the "Software"), to deal
11 in the Software without restriction, including without limitation the rights
12 to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
13 copies of the Software, and to permit persons to whom the Software is
14 furnished to do so, subject to the following conditions:
15
16 The above copyright notice and this permission notice shall be included in
17 all copies or substantial portions of the Software.
18
19 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
20 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
21 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
22 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
23 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
24 THE SOFTWARE.
25 */
26
27
28
29#include "SpParMat3D.h"
30#include "ParFriends.h"
31#include "Operations.h"
32#include "FileHeader.h"
33extern "C" {
34#include "mmio.h"
35}
36#include <sys/types.h>
37#include <sys/stat.h>
38
39#include <mpi.h>
40#include <fstream>
41#include <algorithm>
42#include <set>
43#include <stdexcept>
44#include <string>
45#include "CombBLAS/CombBLAS.h"
46#include <unistd.h>
47
48namespace combblas
49{
50 template <class IT, class NT>
51 std::tuple<IT,IT,NT>* ExchangeData(std::vector<std::vector<std::tuple<IT,IT,NT>>> & tempTuples, MPI_Comm World, IT& datasize)
52 {
53 /* Create/allocate variables for vector assignment */
55 MPI_Type_contiguous(sizeof(std::tuple<IT,IT,NT>), MPI_CHAR, &MPI_tuple);
57
58 int nprocs;
60
61 int * sendcnt = new int[nprocs];
62 int * recvcnt = new int[nprocs];
63 int * sdispls = new int[nprocs]();
64 int * rdispls = new int[nprocs]();
65
66 // Set the newly found vector entries
67 IT totsend = 0;
68 for(IT i=0; i<nprocs; ++i)
69 {
70 sendcnt[i] = tempTuples[i].size();
71 totsend += tempTuples[i].size();
72 }
73
75
76 std::partial_sum(sendcnt, sendcnt+nprocs-1, sdispls+1);
77 std::partial_sum(recvcnt, recvcnt+nprocs-1, rdispls+1);
78 IT totrecv = std::accumulate(recvcnt,recvcnt+nprocs, static_cast<IT>(0));
79
80 std::vector< std::tuple<IT,IT,NT> > sendTuples(totsend);
81 for(int i=0; i<nprocs; ++i)
82 {
83 copy(tempTuples[i].begin(), tempTuples[i].end(), sendTuples.data()+sdispls[i]);
84 std::vector< std::tuple<IT,IT,NT> >().swap(tempTuples[i]); // clear memory
85 }
86
87 std::tuple<IT,IT,NT>* recvTuples = new std::tuple<IT,IT,NT>[totrecv];
88 //std::vector< std::tuple<IT,IT,NT> > recvTuples(totrecv);
90 DeleteAll(sendcnt, recvcnt, sdispls, rdispls); // free all memory
93 return recvTuples;
94 }
95
96 template <class IT, class NT, class DER>
98 int myrank;
100 double vm_usage, resident_set;
101 typedef typename DER::LocalIT LIT;
102 int numChunks = sendChunks.size();
103
105 MPI_Type_contiguous(sizeof(std::tuple<LIT,LIT,NT>), MPI_CHAR, &MPI_tuple);
107
108 int * sendcnt = new int[numChunks];
109 int * sendprfl = new int[numChunks*3];
110 int * sdispls = new int[numChunks]();
111 int * recvcnt = new int[numChunks];
112 int * recvprfl = new int[numChunks*3];
113 int * rdispls = new int[numChunks]();
114
115 IT totsend = 0;
116 for(IT i=0; i<numChunks; ++i){
117 sendprfl[i*3] = sendChunks[i].getnnz();
118 sendprfl[i*3+1] = sendChunks[i].getnrow();
119 sendprfl[i*3+2] = sendChunks[i].getncol();
120 sendcnt[i] = sendprfl[i*3];
121 totsend += sendcnt[i];
122 }
123
125 for(int i = 0; i < numChunks; i++) recvcnt[i] = recvprfl[i*3];
126
127 std::partial_sum(sendcnt, sendcnt+numChunks-1, sdispls+1);
128 std::partial_sum(recvcnt, recvcnt+numChunks-1, rdispls+1);
129 IT totrecv = std::accumulate(recvcnt,recvcnt+numChunks, static_cast<IT>(0));
130
131 std::tuple<LIT,LIT,NT>* sendTuples = new std::tuple<LIT,LIT,NT>[totsend];
132 std::tuple<LIT,LIT,NT>* recvTuples = new std::tuple<LIT,LIT,NT>[totrecv];
133
134 int kk=0;
135 for(int i = 0; i < numChunks; i++){
136 for(typename DER::SpColIter colit = sendChunks[i].begcol(); colit != sendChunks[i].endcol(); ++colit){
137 for(typename DER::SpColIter::NzIter nzit = sendChunks[i].begnz(colit); nzit != sendChunks[i].endnz(colit); ++nzit){
138 NT val = nzit.value();
139 sendTuples[kk++] = std::make_tuple(nzit.rowid(), colit.colid(), nzit.value());
140 }
141 }
142 }
143
146
147 //tuple<LIT, LIT, NT> ** tempTuples = new tuple<LIT, LIT, NT>*[numChunks];
149 for (int i = 0; i < numChunks; i++){
151 memcpy(tempTuples[i], recvTuples+rdispls[i], recvcnt[i]*sizeof(tuple<LIT, LIT, NT>));
152 }
153
154 for (int i = 0; i < numChunks; i++){
155 recvChunks.push_back(DER(SpTuples<LIT, NT>(recvcnt[i], recvprfl[i*3+1], recvprfl[i*3+2], tempTuples[i]), false));
156 }
157
158 // Free all memory except tempTuples; Because that memory is holding data of newly created local matrices after receiving.
161
162 return;
163 }
164
165 template <class IT, class NT, class DER>
167 // No need to delete layermat because it is a smart pointer
168 //delete layermat;
169 }
170
171 // Empty contructor. Nothing is specified. Use with caution!
172 template <class IT, class NT, class DER>
173 SpParMat3D< IT,NT,DER >::SpParMat3D (int nlayers): nlayers(nlayers), colsplit(true), special(false){
174 assert( (sizeof(IT) >= sizeof(typename DER::LocalIT)) );
175 commGrid3D.reset(new CommGrid3D(MPI_COMM_WORLD, nlayers, 0, 0, special));
176 layermat.reset(new SpParMat<IT, NT, DER>(commGrid3D->GetLayerWorld()));
177 }
178
179 template <class IT, class NT, class DER>
180 SpParMat3D< IT,NT,DER >::SpParMat3D (DER * localMatrix, std::shared_ptr<CommGrid3D> grid3d, bool colsplit, bool special): commGrid3D(grid3d), colsplit(colsplit), special(special){
181 assert( (sizeof(IT) >= sizeof(typename DER::LocalIT)) );
182 MPI_Comm_size(commGrid3D->GetFiberWorld(), &nlayers);
183 layermat.reset(new SpParMat<IT, NT, DER>(localMatrix, commGrid3D->GetLayerWorld()));
184 }
185
186 template <class IT, class NT, class DER>
187 SpParMat3D< IT,NT,DER >::SpParMat3D (const SpParMat< IT,NT,DER > & A2D, int nlayers, bool colsplit, bool special): nlayers(nlayers), colsplit(colsplit), special(special){
188 typedef typename DER::LocalIT LIT;
189 auto commGrid2D = A2D.getcommgrid();
190 int nprocs = commGrid2D->GetSize();
191 commGrid3D.reset(new CommGrid3D(commGrid2D->GetWorld(), nlayers, 0, 0, special));
192 if(special){
193 DER* spSeq = A2D.seqptr(); // local submatrix
194 std::vector<DER> localChunks;
195 int numChunks = (int)std::sqrt((float)commGrid3D->GetGridLayers());
196 if(!colsplit) spSeq->Transpose();
197 spSeq->ColSplit(numChunks, localChunks);
198 if(!colsplit){
199 for(int i = 0; i < localChunks.size(); i++) localChunks[i].Transpose();
200 }
201
202 // Some necessary processing before exchanging data
203 int sqrtLayer = (int)std::sqrt((float)commGrid3D->GetGridLayers());
204 std::vector<DER> sendChunks(commGrid3D->GetGridLayers());
205 for(int i = 0; i < sendChunks.size(); i++){
206 sendChunks[i] = DER(0, 0, 0, 0);
207 }
208 for(int i = 0; i < localChunks.size(); i++){
209 int rcvRankInFiber = (colsplit) ? ( ( ( commGrid3D->GetRankInFiber() / sqrtLayer ) * sqrtLayer ) + i ) : ( ( ( commGrid3D->GetRankInFiber() % sqrtLayer ) * sqrtLayer ) + i );
211 }
212 MPI_Barrier(commGrid3D->GetWorld());
213
214 IT datasize; NT x = 0.0;
215 std::vector<DER> recvChunks;
216
217 SpecialExchangeData(sendChunks, commGrid3D->GetFiberWorld(), datasize, x, recvChunks);
218 typename DER::LocalIT concat_row = 0, concat_col = 0;
219 for(int i = 0; i < recvChunks.size(); i++){
220 if(colsplit) recvChunks[i].Transpose();
221 concat_row = std::max(concat_row, recvChunks[i].getnrow());
222 concat_col = concat_col + recvChunks[i].getncol();
223 }
224 DER * localMatrix = new DER(0, concat_row, concat_col, 0);
225 localMatrix->ColConcatenate(recvChunks);
226 if(colsplit) localMatrix->Transpose();
227 //layermat = new SpParMat<IT, NT, DER>(localMatrix, commGrid3D->GetLayerWorld());
228 layermat.reset(new SpParMat<IT, NT, DER>(localMatrix, commGrid3D->GetLayerWorld()));
229 }
230 else {
231 IT nrows = A2D.getnrow();
232 IT ncols = A2D.getncol();
233 int pr2d = commGrid2D->GetGridRows();
234 int pc2d = commGrid2D->GetGridCols();
235 int rowrank2d = commGrid2D->GetRankInProcRow();
236 int colrank2d = commGrid2D->GetRankInProcCol();
239 DER* spSeq = A2D.seqptr(); // local submatrix
240 IT localRowStart2d = colrank2d * m_perproc2d; // first row in this process
241 IT localColStart2d = rowrank2d * n_perproc2d; // first col in this process
242
244 std::vector<IT> tsendcnt(nprocs,0);
245 for(typename DER::SpColIter colit = spSeq->begcol(); colit != spSeq->endcol(); ++colit)
246 {
247 IT gcol = colit.colid() + localColStart2d;
248 for(typename DER::SpColIter::NzIter nzit = spSeq->begnz(colit); nzit != spSeq->endnz(colit); ++nzit)
249 {
250 IT grow = nzit.rowid() + localRowStart2d;
251 int owner = Owner(nrows, ncols, grow, gcol, lrow3d, lcol3d); //3D owner
252 tsendcnt[owner]++;
253 }
254 }
255
256 std::vector< std::vector< std::tuple<LIT,LIT, NT> > > sendTuples (nprocs);
257 for(typename DER::SpColIter colit = spSeq->begcol(); colit != spSeq->endcol(); ++colit)
258 {
259 IT gcol = colit.colid() + localColStart2d;
260 for(typename DER::SpColIter::NzIter nzit = spSeq->begnz(colit); nzit != spSeq->endnz(colit); ++nzit)
261 {
262 IT grow = nzit.rowid() + localRowStart2d;
263 NT val = nzit.value();
264 int owner = Owner(nrows, ncols, grow, gcol, lrow3d, lcol3d); //3D owner
265 sendTuples[owner].push_back(std::make_tuple(lrow3d, lcol3d, val));
266 }
267 }
268
270 std::tuple<LIT,LIT,NT>* recvTuples = ExchangeData(sendTuples, commGrid2D->GetWorld(), datasize);
271
272 IT mdim, ndim;
275 DER * localm3d = new DER(spTuples3d, false);
276 //layermat = new SpParMat<IT, NT, DER>(localm3d, commGrid3D->GetCommGridLayer());
277 layermat.reset(new SpParMat<IT, NT, DER>(localm3d, commGrid3D->GetCommGridLayer()));
278 }
279 }
280
281 // Create a new copy of a 3D matrix in row split or column split manner
282 template <class IT, class NT, class DER>
283 SpParMat3D< IT,NT,DER >::SpParMat3D (const SpParMat3D< IT,NT,DER > & A, bool colsplit): colsplit(colsplit){
284 int myrank;
286 typedef typename DER::LocalIT LIT;
287 auto AcommGrid3D = A.getcommgrid3D();
288 int nprocs = AcommGrid3D->GetSize();
289 commGrid3D.reset(new CommGrid3D(AcommGrid3D->GetWorld(), AcommGrid3D->GetGridLayers(), 0, 0, A.isSpecial()));
290
291 // Intialize these two variables for new SpParMat3D
292 special = A.isSpecial();
293 nlayers = AcommGrid3D->GetGridLayers();
294
295 DER * spSeq = A.seqptr(); // local submatrix
296 DER * localMatrix = new DER(*spSeq);
297 if((A.isColSplit() && !colsplit) || (!A.isColSplit() && colsplit)){
298 // If given matrix is column split and desired matrix is row split
299 // Or if given matrix is row split and desired matrix is column split
300 std::vector<DER> sendChunks;
301 int numChunks = commGrid3D->GetGridLayers();
302 if(!colsplit) localMatrix->Transpose();
303 localMatrix->ColSplit(numChunks, sendChunks);
304 if(!colsplit){
305 for(int i = 0; i < sendChunks.size(); i++) sendChunks[i].Transpose();
306 }
307
308 IT datasize; NT x = 71.0;
309 std::vector<DER> recvChunks;
310
311 SpecialExchangeData(sendChunks, commGrid3D->GetFiberWorld(), datasize, x, recvChunks);
312
313 typename DER::LocalIT concat_row = 0, concat_col = 0;
314 for(int i = 0; i < recvChunks.size(); i++){
315 if(colsplit) recvChunks[i].Transpose();
316 concat_row = std::max(concat_row, recvChunks[i].getnrow());
317 concat_col = concat_col + recvChunks[i].getncol();
318 }
319 localMatrix = new DER(0, concat_row, concat_col, 0);
320 localMatrix->ColConcatenate(recvChunks);
321 if(colsplit) localMatrix->Transpose();
322 }
323 else{
324 // If given and desired matrix both are row split
325 // Or if given and desired matrix both are column split
326 // Do nothing
327 }
328 //layermat = new SpParMat<IT, NT, DER>(localMatrix, commGrid3D->GetLayerWorld());
329 layermat.reset(new SpParMat<IT, NT, DER>(localMatrix, commGrid3D->GetLayerWorld()));
330 }
331
332 /*
333 * Only calculates owner in terms of non-special distribution
334 * */
335 template <class IT, class NT,class DER>
336 template <typename LIT>
338 // first map to Layer 0
339 std::shared_ptr<CommGrid> commGridLayer = commGrid3D->GetCommGridLayer(); // CommGrid for my layer
340 int procrows = commGridLayer->GetGridRows();
341 int proccols = commGridLayer->GetGridCols();
342 int nlayers = commGrid3D->GetGridLayers();
343
344 IT m_perproc_L0 = total_m / procrows;
345 IT n_perproc_L0 = total_n / proccols;
346
347 int procrow_L0; // within a layer
348 if(m_perproc_L0 != 0){
349 procrow_L0 = std::min(static_cast<int>(grow / m_perproc_L0), procrows-1);
350 }
351 else{
352 // all owned by the last processor row
353 procrow_L0 = procrows -1;
354 }
355 int proccol_L0;
356 if(n_perproc_L0 != 0){
357 proccol_L0 = std::min(static_cast<int>(gcol / n_perproc_L0), proccols-1);
358 }
359 else{
360 proccol_L0 = proccols-1;
361 }
362
365 int layer;
366 // next, split and scatter
367 if(colsplit){
368 IT n_perproc;
369
370 if(proccol_L0 < commGrid3D->GetGridCols()-1)
371 n_perproc = n_perproc_L0 / nlayers;
372 else
373 n_perproc = (total_n - (n_perproc_L0 * proccol_L0)) / nlayers;
374
375 if(n_perproc != 0)
376 layer = std::min(static_cast<int>(lcol_L0 / n_perproc), nlayers-1);
377 else
378 layer = nlayers-1;
379
380 lrow = lrow_L0;
381 lcol = lcol_L0 - (layer * n_perproc);
382 }
383 else{
384 IT m_perproc;
385
386 if(procrow_L0 < commGrid3D->GetGridRows()-1)
387 m_perproc = m_perproc_L0 / nlayers;
388 else
389 m_perproc = (total_m - (m_perproc_L0 * procrow_L0)) / nlayers;
390
391 if(m_perproc != 0)
392 layer = std::min(static_cast<int>(lrow_L0 / m_perproc), nlayers-1);
393 else
394 layer = nlayers-1;
395
396 lcol = lcol_L0;
397 lrow = lrow_L0 - (layer * m_perproc);
398 }
401 return commGrid3D->GetRank(layer, procrow_layer, proccol_layer);
402 }
403
404 template <class IT, class NT,class DER>
406 {
407 // first map to Layer 0 and then split
408 std::shared_ptr<CommGrid> commGridLayer = commGrid3D->GetCommGridLayer(); // CommGrid for my layer
409 int procrows = commGridLayer->GetGridRows();
410 int proccols = commGridLayer->GetGridCols();
411 int nlayers = commGrid3D->GetGridLayers();
412
413 IT localm_L0 = total_m / procrows;
414 IT localn_L0 = total_n / proccols;
415
416 if(commGridLayer->GetRankInProcRow() == commGrid3D->GetGridCols()-1)
417 {
418 localn_L0 = (total_n - localn_L0*(commGrid3D->GetGridCols()-1));
419 }
420 if(commGridLayer->GetRankInProcCol() == commGrid3D->GetGridRows()-1)
421 {
422 localm_L0 = (total_m - localm_L0 * (commGrid3D->GetGridRows()-1));
423 }
424 if(colsplit)
425 {
426 localn = localn_L0/nlayers;
427 if(commGrid3D->GetRankInFiber() == (commGrid3D->GetGridLayers()-1))
428 localn = localn_L0 - localn * (commGrid3D->GetGridLayers()-1);
429 localm = localm_L0;
430 }
431 else
432 {
433 localm = localm_L0/nlayers;
434 if(commGrid3D->GetRankInFiber() == (commGrid3D->GetGridLayers()-1))
435 localm = localm_L0 - localm * (commGrid3D->GetGridLayers()-1);
437 }
438 }
439
440 template <class IT, class NT, class DER>
442 typedef typename DER::LocalIT LIT;
443 if(special){
444 DER * spSeq = layermat->seqptr();
445 std::vector<DER> localChunks;
446 int sqrtLayers = (int)std::sqrt((float)commGrid3D->GetGridLayers());
447 LIT grid3dCols = commGrid3D->GetGridCols(); LIT grid3dRows = commGrid3D->GetGridRows();
449 IT x = (colsplit) ? layermat->getnrow() : layermat->getncol();
450 LIT y = (colsplit) ? (x / grid2dRows) : (x / grid2dCols);
452 if(colsplit){
453 for(LIT i = 0; i < grid2dRows-1; i++) divisions2d.push_back(y);
454 divisions2d.push_back(layermat->getnrow()-(grid2dRows-1)*y);
455 }
456 else{
457 for(LIT i = 0; i < grid2dCols-1; i++) divisions2d.push_back(y);
458 divisions2d.push_back(layermat->getncol()-(grid2dCols-1)*y);
459 }
461 LIT start = (colsplit) ? ((commGrid3D->GetRankInLayer() / grid3dRows) * sqrtLayers) : ((commGrid3D->GetRankInLayer() % grid3dCols) * sqrtLayers);
462 LIT end = start + sqrtLayers;
463 for(LIT i = start; i < end; i++){
464 divisions2dChunk.push_back(divisions2d[i]);
465 }
466 if(colsplit) spSeq->Transpose();
467 spSeq->ColSplit(divisions2dChunk, localChunks);
468 if(colsplit){
469 for(int i = 0; i < localChunks.size(); i++) localChunks[i].Transpose();
470 }
471 std::vector<DER> sendChunks(commGrid3D->GetGridLayers());
472 for(int i = 0; i < sendChunks.size(); i++){
473 sendChunks[i] = DER(0, 0, 0, 0);
474 }
475 for(int i = 0; i < localChunks.size(); i++){
476 int rcvRankInFiber = (colsplit) ? ( ( ( commGrid3D->GetRankInFiber() / sqrtLayers ) * sqrtLayers ) + i ) : ( ( ( commGrid3D->GetRankInFiber() % sqrtLayers ) * sqrtLayers ) + i );
478 }
479 IT datasize; NT z=1.0;
480 std::vector<DER> recvChunks;
481 SpecialExchangeData(sendChunks, commGrid3D->GetFiberWorld(), datasize, z, recvChunks);
482
483 LIT concat_row = 0, concat_col = 0;
484 for(int i = 0; i < recvChunks.size(); i++){
485 if(!colsplit) recvChunks[i].Transpose();
486 concat_row = std::max(concat_row, recvChunks[i].getnrow());
487 concat_col = concat_col + recvChunks[i].getncol();
488 }
489 DER * localMatrix = new DER(0, concat_row, concat_col, 0);
490 localMatrix->ColConcatenate(recvChunks);
491 if(!colsplit) localMatrix->Transpose();
492 std::shared_ptr<CommGrid> grid2d;
493 grid2d.reset(new CommGrid(commGrid3D->GetWorld(), 0, 0));
495 return mat2D;
496 }
497 else{
498 int nProcs = commGrid3D->GetSize(); // Total number of processes in the process grid
499 int nGridLayers = commGrid3D->GetGridLayers(); // Number of layers in the process grid
500 int nGridCols = commGrid3D->GetGridCols(); // Number of process columns in a layer of the grid, which can be thought of L0
501 int nGridRows = commGrid3D->GetGridRows(); // Number of process rows in a layer of the grid, which can be thought of L0
502 int rankInProcCol_L0 = commGrid3D->GetCommGridLayer()->GetRankInProcCol();
503 int rankInProcRow_L0 = commGrid3D->GetCommGridLayer()->GetRankInProcRow();
504 IT m = getnrow(); // Total number of rows of the matrix
505 IT n = getncol(); // Total number of columns of the matrix
506 IT a = n / nGridCols;
507 IT b = n - (a * (nGridCols - 1));
508 IT c = m / nGridRows;
509 IT d = m - (c * (nGridRows - 1));
510 IT w = a / nGridLayers;
511 IT x = a - (w * (nGridLayers - 1));
512 IT y = b / nGridLayers;
513 IT z = b - (y * (nGridLayers - 1));
514 IT p = c / nGridLayers;
515 IT q = c - (p * (nGridLayers - 1));
516 IT r = d / nGridLayers;
517 IT s = d - (r * (nGridLayers - 1));
518
519 std::shared_ptr<CommGrid> grid2d;
520 grid2d.reset(new CommGrid(commGrid3D->GetWorld(), 0, 0));
522
523 std::vector< std::vector < std::tuple<LIT,LIT,NT> > > data(nProcs);
524 DER* spSeq = layermat->seqptr(); // local submatrix
525 LIT locsize = 0;
526 for(typename DER::SpColIter colit = spSeq->begcol(); colit != spSeq->endcol(); ++colit){
527 LIT lcol = colit.colid();
528 for(typename DER::SpColIter::NzIter nzit = spSeq->begnz(colit); nzit != spSeq->endnz(colit); ++nzit){
529 LIT lrow = nzit.rowid();
530 NT val = nzit.value();
532 if(colsplit){
533 // If 3D distribution is column split
534 lrow_L0 = lrow;
535 if(commGrid3D->GetCommGridLayer()->GetRankInProcRow() < (nGridCols-1)){
536 // If this process is not last in the process column
537 lcol_L0 = w * commGrid3D->GetRankInFiber() + lcol;
538 }
539 else{
540 // If this process is last in the process column
541 lcol_L0 = y * commGrid3D->GetRankInFiber() + lcol;
542 }
543 }
544 else{
545 // If 3D distribution is rowsplit
546 lcol_L0 = lcol;
547 if(commGrid3D->GetCommGridLayer()->GetRankInProcCol() < (nGridRows-1)){
548 // If this process is not last in the process column
549 lrow_L0 = p * commGrid3D->GetRankInFiber() + lrow;
550 }
551 else{
552 // If this process is last in the process column
553 lrow_L0 = r * commGrid3D->GetRankInFiber() + lrow;
554 }
555 }
556 IT grow = commGrid3D->GetCommGridLayer()->GetRankInProcCol() * c + lrow_L0;
557 IT gcol = commGrid3D->GetCommGridLayer()->GetRankInProcRow() * a + lcol_L0;
558
560 int owner = A2D.Owner(m, n, grow, gcol, lrow2d, lcol2d);
561 data[owner].push_back(std::make_tuple(lrow2d,lcol2d,val));
562 locsize++;
563 }
564 }
565 A2D.SparseCommon(data, locsize, m, n, maximum<NT>());
566
567 return A2D;
568 }
569 }
570
571 /*
572 * Calculate, which process accross fiber should get how many columns
573 * if layer matrix of this 3D matrix is distributed in column split way
574 * */
575 template <class IT, class NT, class DER>
577 if(special){
579 int sqrtLayers = (int)std::sqrt((float)commGrid3D->GetGridLayers());
580 int grid3dCols = commGrid3D->GetGridCols();
582 IT x = (layermat)->getncol();
583 IT y = x / grid2dCols;
584 for(int i = 0; i < grid2dCols-1; i++) divisions2d.push_back(y);
585 divisions2d.push_back(x-(grid2dCols-1)*y);
587 IT start = (commGrid3D->GetRankInLayer() % grid3dCols) * sqrtLayers;
588 IT end = start + sqrtLayers;
589 for(int i = start; i < end; i++){
590 divisions2dChunk.push_back(divisions2d[i]);
591 }
592 for(int i = 0; i < divisions2dChunk.size(); i++){
594 for(int j = 0; j < sqrtLayers-1; j++) divisions3d.push_back(z);
595 divisions3d.push_back(divisions2dChunk[i]-(sqrtLayers-1)*z);
596 }
597 }
598 else{
599 // For non-special distribution, partitioning for 3D can be achieved by dividing local columns in #layers equal partitions
600 IT x = layermat->seqptr()->getncol();
601 int nlayers = commGrid3D->GetGridLayers();
602 IT y = x / nlayers;
603 for(int i = 0; i < nlayers-1; i++) divisions3d.push_back(y);
604 divisions3d.push_back(x-(nlayers-1)*y);
605 }
606 }
607
608 /*
609 * Checks if the layer matrix is 2D SpParMat compatible
610 * */
611 template <class IT, class NT, class DER>
613 IT nLayerCols = layermat->getncol();
614 IT nLayerRows = layermat->getnrow();
615 IT localCols = layermat->getlocalcols();
616 IT localRows = layermat->getlocalrows();
617 int nGridCols = layermat->getcommgrid()->GetGridCols();
618 int nGridRows = layermat->getcommgrid()->GetGridRows();
619 int idxGridRow = layermat->getcommgrid()->GetRankInProcCol();
620 int idxGridCol = layermat->getcommgrid()->GetRankInProcRow();
621 IT x, y, a, b;
623 y = (nLayerRows % nGridRows == 0) ? x : (nLayerRows - x * (nGridRows - 1));
624 a = nLayerCols / nGridCols;
625 b = (nLayerCols % nGridCols == 0) ? a : (nLayerCols - a * (nGridCols - 1));
626 bool flag = true;
627 if(idxGridRow == nGridRows-1){
628 if(localRows != y) flag = false;
629 }
630 else{
631 if(localRows != x) flag = false;
632 }
633 if(idxGridCol == nGridCols-1){
634 if(localCols != b) flag = false;
635 }
636 else{
637 if(localCols != a) flag = false;
638 }
639 return flag;
640 }
641
642 template <class IT, class NT, class DER>
644 IT totalrows_layer = layermat->getnrow();
645 IT totalrows = 0;
646 if(!colsplit) MPI_Allreduce( &totalrows_layer, &totalrows, 1, MPIType<IT>(), MPI_SUM, commGrid3D->GetFiberWorld());
648 return totalrows;
649 }
650
651
652 template <class IT, class NT, class DER>
654 IT totalcols_layer = layermat->getncol();
655 IT totalcols = 0;
656 if(colsplit) MPI_Allreduce( &totalcols_layer, &totalcols, 1, MPIType<IT>(), MPI_SUM, commGrid3D->GetFiberWorld());
658 return totalcols;
659 }
660
661
662 template <class IT, class NT, class DER>
664 IT totalnz_layer = layermat->getnnz();
665 IT totalnz = 0;
666 MPI_Allreduce( &totalnz_layer, &totalnz, 1, MPIType<IT>(), MPI_SUM, commGrid3D->GetFiberWorld());
667 return totalnz;
668 }
669
670}
int64_t IT
double NT
Mac OS X ATTR com apple quarantine q
Definition ._remapper.cpp:1
bool CheckSpParMatCompatibility()
SpParMat3D(int nlayers)
void LocalDim(IT total_m, IT total_n, IT &localm, IT &localn) const
int Owner(IT total_m, IT total_n, IT grow, IT gcol, LIT &lrow, LIT &lcol) const
SpParMat< IT, NT, DER > Convert2D()
void CalculateColSplitDistributionOfLayer(vector< typename DER::LocalIT > &divisions3d)
int nprocs
Definition comms.cpp:55
std::vector< std::tuple< IT, IT, NT > > ExchangeData(std::vector< std::vector< std::tuple< IT, IT, NT > > > &tempTuples, MPI_Comm World)
void DeleteAll(A arr1)
Definition Deleter.h:48
void SpecialExchangeData(std::vector< DER > &sendChunks, MPI_Comm World, IT &datasize, NT dummy, vector< DER > &recvChunks)
double A