COMBINATORIAL_BLAS 1.6
 
Loading...
Searching...
No Matches
ParFriendsExt.h
Go to the documentation of this file.
1/****************************************************************/
2/* Parallel Combinatorial BLAS Library (for Graph Computations) */
3/* version 1.6 -------------------------------------------------*/
4/* date: 6/15/2017 ---------------------------------------------*/
5/* authors: Ariful Azad, Aydin Buluc --------------------------*/
6/****************************************************************/
7/*
8 Copyright (c) 2010-2017, The Regents of the University of California
9
10 Permission is hereby granted, free of charge, to any person obtaining a copy
11 of this software and associated documentation files (the "Software"), to deal
12 in the Software without restriction, including without limitation the rights
13 to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
14 copies of the Software, and to permit persons to whom the Software is
15 furnished to do so, subject to the following conditions:
16
17 The above copyright notice and this permission notice shall be included in
18 all copies or substantial portions of the Software.
19
20 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
21 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
22 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
23 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
24 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
25 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
26 THE SOFTWARE.
27 */
28
29
30#ifndef _PAR_FRIENDS_EXT_H_
31#define _PAR_FRIENDS_EXT_H_
32
33#include "mpi.h"
34#include <iostream>
35#include "SpParMat.h"
36#include "SpParHelper.h"
37#include "MPIType.h"
38#include "Friends.h"
39
40namespace combblas {
41
42template <class IT, class NT, class DER>
43class SpParMat;
44
45/*************************************************************************************************/
46/**************************** FRIEND FUNCTIONS FOR PARALLEL CLASSES ******************************/
47/************************** EXTENDED SET (NOT COMMONLY USED FUNCTIONS) ***************************/
48/*************************************************************************************************/
49
50
57template <typename SR, typename IU, typename NU1, typename NU2, typename UDERA, typename UDERB>
60
61{
64
65 if(A.getncol() != B.getnrow())
66 {
67 std::cout<<"Can not multiply, dimensions does not match"<<std::endl;
70 }
71 int stages, Aoffset, Boffset; // stages = inner dimension of matrix blocks
72 std::shared_ptr<CommGrid> GridC = ProductGrid((A.commGrid).get(), (B.commGrid).get(), stages, Aoffset, Boffset);
73
74 IU C_m = A.spSeq->getnrow();
75 IU C_n = B.spSeq->getncol();
76
78 (A.spSeq)->Split( A1seq, A2seq);
79
80 // ABAB: It should be able to perform split/merge with the transpose option [single function call]
81 const_cast< UDERB* >(B.spSeq)->Transpose();
82
84 (B.spSeq)->Split( B1seq, B2seq);
85
86 // Create row and column windows (collective operation, i.e. everybody exposes its window to others)
87 std::vector<MPI_Win> rowwins1, rowwins2, colwins1, colwins2;
90 SpParHelper::SetWindows((B.commGrid)->GetColWorld(), B1seq, colwins1);
91 SpParHelper::SetWindows((B.commGrid)->GetColWorld(), B2seq, colwins2);
92
93 // ABAB: We can optimize the call to create windows in the absence of passive synchronization
94 // MPI_Info info;
95 // MPI_Info_create ( &info );
96 // MPI_Info_set( info, "no_locks", "true" );
97 // MPI_Win_create( . . ., info, . . . );
98 // MPI_Info_free( &info );
99
100 IU ** ARecvSizes1 = SpHelper::allocate2D<IU>(UDERA::esscount, stages);
101 IU ** ARecvSizes2 = SpHelper::allocate2D<IU>(UDERA::esscount, stages);
102 IU ** BRecvSizes1 = SpHelper::allocate2D<IU>(UDERB::esscount, stages);
103 IU ** BRecvSizes2 = SpHelper::allocate2D<IU>(UDERB::esscount, stages);
104
107 SpParHelper::GetSetSizes( B1seq, BRecvSizes1, (B.commGrid)->GetColWorld());
108 SpParHelper::GetSetSizes( B2seq, BRecvSizes2, (B.commGrid)->GetColWorld());
109
110 // Remotely fetched matrices are stored as pointers
111 UDERA * ARecv1, * ARecv2;
112 UDERB * BRecv1, * BRecv2;
113 std::vector< SpTuples<IU,N_promote> *> tomerge;
114
116 MPI_Comm_group((A.commGrid)->GetRowWorld(), &row_group);
117 MPI_Comm_group((B.commGrid)->GetColWorld(), &col_group);
118
119 int Aself = (A.commGrid)->GetRankInProcRow();
120 int Bself = (B.commGrid)->GetRankInProcCol();
121
122#ifdef SPGEMMDEBUG
123 MPI_Barrier(GridC->GetWorld());
124 SpParHelper::Print("Writing to file\n");
125 std::ofstream oput;
126 GridC->OpenDebugFile("deb", oput);
127 oput << "A1seq: " << A1seq.getnrow() << " " << A1seq.getncol() << " " << A1seq.getnnz() << std::endl;
128 oput << "A2seq: " << A2seq.getnrow() << " " << A2seq.getncol() << " " << A2seq.getnnz() << std::endl;
129 oput << "B1seq: " << B1seq.getnrow() << " " << B1seq.getncol() << " " << B1seq.getnnz() << std::endl;
130 oput << "B2seq: " << B2seq.getnrow() << " " << B2seq.getncol() << " " << B2seq.getnnz() << std::endl;
131 SpParHelper::Print("Wrote to file\n");
132 MPI_Barrier(GridC->GetWorld());
133#endif
134
139
140 MPI_Barrier(GridC->GetWorld());
141 SpParHelper::Print("Exposure epochs posted\n");
142 MPI_Barrier(GridC->GetWorld());
143
144 int Aowner = (0+Aoffset) % stages;
145 int Bowner = (0+Boffset) % stages;
147 SpParHelper::AccessNFetch(ARecv2, Aowner, rowwins2, row_group, ARecvSizes2); // Start prefetching next half
148
149 for(int j=0; j< rowwins1.size(); ++j) // wait for the first half to complete
151
153 SpParHelper::AccessNFetch(BRecv2, Bowner, colwins2, col_group, BRecvSizes2); // Start prefetching next half
154
155 for(int j=0; j< colwins1.size(); ++j)
157
158 for(int i = 1; i < stages; ++i)
159 {
160
161#ifdef SPGEMMDEBUG
162 SpParHelper::Print("Stage starting\n");
163#endif
164
166
167#ifdef SPGEMMDEBUG
168 SpParHelper::Print("Multiplied\n");
169#endif
170
171 if(!C_cont->isZero())
172 tomerge.push_back(C_cont);
173
174#ifdef SPGEMMDEBUG
175 SpParHelper::Print("Pushed back\n");
176 MPI_Barrier(GridC->GetWorld());
177#endif
178
179 bool remoteA = false;
180 bool remoteB = false;
181
182 delete ARecv1; // free the memory of the previous first half
183 for(int j=0; j< rowwins2.size(); ++j) // wait for the previous second half to complete
185 SpParHelper::Print("Completed A\n");
186
187 delete BRecv1;
188 for(int j=0; j< colwins2.size(); ++j) // wait for the previous second half to complete
190
191#ifdef SPGEMMDEBUG
192 SpParHelper::Print("Completed B\n");
193 MPI_Barrier(GridC->GetWorld());
194#endif
195
196 Aowner = (i+Aoffset) % stages;
197 Bowner = (i+Boffset) % stages;
198
199 // start fetching the current first half
202
203#ifdef SPGEMMDEBUG
204 SpParHelper::Print("Fetched next\n");
205 MPI_Barrier(GridC->GetWorld());
206#endif
207
208 // while multiplying the already completed previous second halfs
209 C_cont = MultiplyReturnTuples<SR>(*ARecv2, *BRecv2, false, true);
210 if(!C_cont->isZero())
211 tomerge.push_back(C_cont);
212
213#ifdef SPGEMMDEBUG
214 SpParHelper::Print("Multiplied and pushed\n");
215 MPI_Barrier(GridC->GetWorld());
216#endif
217
218 delete ARecv2; // free memory of the previous second half
219 delete BRecv2;
220
221 for(int j=0; j< rowwins1.size(); ++j) // wait for the current first half to complte
223 for(int j=0; j< colwins1.size(); ++j)
225
226#ifdef SPGEMMDEBUG
227 SpParHelper::Print("Completed next\n");
228 MPI_Barrier(GridC->GetWorld());
229#endif
230
231 // start prefetching the current second half
234 }
236 if(!C_cont->isZero())
237 tomerge.push_back(C_cont);
238
239 delete ARecv1; // free the memory of the previous first half
240 for(int j=0; j< rowwins2.size(); ++j) // wait for the previous second half to complete
242 delete BRecv1;
243 for(int j=0; j< colwins2.size(); ++j) // wait for the previous second half to complete
244 colwins2[j].Complete();
245
246 C_cont = MultiplyReturnTuples<SR>(*ARecv2, *BRecv2, false, true);
247 if(!C_cont->isZero())
248 tomerge.push_back(C_cont);
249
250 delete ARecv2;
251 delete BRecv2;
252
253 SpHelper::deallocate2D(ARecvSizes1, UDERA::esscount);
254 SpHelper::deallocate2D(ARecvSizes2, UDERA::esscount);
255 SpHelper::deallocate2D(BRecvSizes1, UDERB::esscount);
256 SpHelper::deallocate2D(BRecvSizes2, UDERB::esscount);
257
258 DER_promote * C = new DER_promote(MergeAll<SR>(tomerge, C_m, C_n), false, NULL); // First get the result in SpTuples, then convert to UDER
259 for(int i=0; i<tomerge.size(); ++i)
260 {
261 delete tomerge[i];
262 }
263
264 // MPI_Win_Wait() works like a barrier as it waits for all origins to finish their remote memory operation on "this" window
269
270 (A.spSeq)->Merge(A1seq, A2seq);
271 (B.spSeq)->Merge(B1seq, B2seq);
272
275 const_cast< UDERB* >(B.spSeq)->Transpose(); // transpose back to original
276 return SpParMat<IU,N_promote,DER_promote> (C, GridC); // return the result object
277}
278
290template <typename SR, typename IU, typename NU1, typename NU2, typename UDERA, typename UDERB>
293
294{
297
298 if(A.getncol() != B.getnrow())
299 {
300 std::cout<<"Can not multiply, dimensions does not match"<<std::endl;
303 }
304 int stages, Aoffset, Boffset; // stages = inner dimension of matrix blocks
305 std::shared_ptr<CommGrid> GridC = ProductGrid((A.commGrid).get(), (B.commGrid).get(), stages, Aoffset, Boffset);
306
307 IU C_m = A.spSeq->getnrow();
308 IU C_n = B.spSeq->getncol();
309
311 (A.spSeq)->Split( A1seq, A2seq);
312
313 // ABAB: It should be able to perform split/merge with the transpose option [single function call]
314 const_cast< UDERB* >(B.spSeq)->Transpose();
315
317 (B.spSeq)->Split( B1seq, B2seq);
318
319 // Create row and column windows (collective operation, i.e. everybody exposes its window to others)
320 std::vector<MPI_Win> rowwins1, rowwins2, colwins1, colwins2;
321 SpParHelper::SetWindows((A.commGrid)->GetRowWorld(), A1seq, rowwins1);
322 SpParHelper::SetWindows((A.commGrid)->GetRowWorld(), A2seq, rowwins2);
323 SpParHelper::SetWindows((B.commGrid)->GetColWorld(), B1seq, colwins1);
324 SpParHelper::SetWindows((B.commGrid)->GetColWorld(), B2seq, colwins2);
325
326 IU ** ARecvSizes1 = SpHelper::allocate2D<IU>(UDERA::esscount, stages);
327 IU ** ARecvSizes2 = SpHelper::allocate2D<IU>(UDERA::esscount, stages);
328 IU ** BRecvSizes1 = SpHelper::allocate2D<IU>(UDERB::esscount, stages);
329 IU ** BRecvSizes2 = SpHelper::allocate2D<IU>(UDERB::esscount, stages);
330
333 SpParHelper::GetSetSizes( B1seq, BRecvSizes1, (B.commGrid)->GetColWorld());
334 SpParHelper::GetSetSizes( B2seq, BRecvSizes2, (B.commGrid)->GetColWorld());
335
336 // Remotely fetched matrices are stored as pointers
337 UDERA * ARecv1, * ARecv2;
338 UDERB * BRecv1, * BRecv2;
339 std::vector< SpTuples<IU,N_promote> *> tomerge; // sorted triples to be merged
340
342 MPI_Comm_group((A.commGrid)->GetRowWorld(), &row_group);
343 MPI_Comm_group((B.commGrid)->GetColWorld(), &col_group);
344
345 int Aself = (A.commGrid)->GetRankInProcRow();
346 int Bself = (B.commGrid)->GetRankInProcCol();
347
348 int Aowner = (0+Aoffset) % stages;
349 int Bowner = (0+Boffset) % stages;
350
352 SpParHelper::LockNFetch(ARecv2, Aowner, rowwins2, row_group, ARecvSizes2); // Start prefetching next half
354 SpParHelper::LockNFetch(BRecv2, Bowner, colwins2, col_group, BRecvSizes2); // Start prefetching next half
355
356 // Finish the first halfs
359
360 for(int i = 1; i < stages; ++i)
361 {
363
364 if(!C_cont->isZero())
365 tomerge.push_back(C_cont);
366
367 bool remoteA = false;
368 bool remoteB = false;
369
370 delete ARecv1; // free the memory of the previous first half
371 delete BRecv1;
372
373 SpParHelper::UnlockWindows(Aowner, rowwins2); // Finish the second half
375
376 Aowner = (i+Aoffset) % stages;
377 Bowner = (i+Boffset) % stages;
378
379 // start fetching the current first half
382
383 // while multiplying the already completed previous second halfs
384 C_cont = MultiplyReturnTuples<SR>(*ARecv2, *BRecv2, false, true);
385 if(!C_cont->isZero())
386 tomerge.push_back(C_cont);
387
388 delete ARecv2; // free memory of the previous second half
389 delete BRecv2;
390
391 // wait for the current first half to complte
394
395 // start prefetching the current second half
398 }
399
401 if(!C_cont->isZero())
402 tomerge.push_back(C_cont);
403
404 delete ARecv1; // free the memory of the previous first half
405 delete BRecv1;
406
409
410 C_cont = MultiplyReturnTuples<SR>(*ARecv2, *BRecv2, false, true);
411 if(!C_cont->isZero())
412 tomerge.push_back(C_cont);
413
414 delete ARecv2;
415 delete BRecv2;
416
417 SpHelper::deallocate2D(ARecvSizes1, UDERA::esscount);
418 SpHelper::deallocate2D(ARecvSizes2, UDERA::esscount);
419 SpHelper::deallocate2D(BRecvSizes1, UDERB::esscount);
420 SpHelper::deallocate2D(BRecvSizes2, UDERB::esscount);
421
422 DER_promote * C = new DER_promote(MergeAll<SR>(tomerge, C_m, C_n), false, NULL); // First get the result in SpTuples, then convert to UDER
423 for(int i=0; i<tomerge.size(); ++i)
424 {
425 delete tomerge[i];
426 }
427
432
433 (A.spSeq)->Merge(A1seq, A2seq);
434 (B.spSeq)->Merge(B1seq, B2seq);
435
438 const_cast< UDERB* >(B.spSeq)->Transpose(); // transpose back to original
439 return SpParMat<IU,N_promote,DER_promote> (C, GridC); // return the result object
440}
441
447template <typename SR, typename IU, typename NU1, typename NU2, typename UDERA, typename UDERB>
450{
453
454 if(A.getncol() != B.getnrow())
455 {
456 std::cout<<"Can not multiply, dimensions does not match"<<std::endl;
459 }
460
461 int stages, Aoffset, Boffset; // stages = inner dimension of matrix blocks
462 std::shared_ptr<CommGrid> GridC = ProductGrid((A.commGrid).get(), (B.commGrid).get(), stages, Aoffset, Boffset);
463
464 std::ofstream oput;
465 GridC->OpenDebugFile("deb", oput);
466 const_cast< UDERB* >(B.spSeq)->Transpose();
467
468 // set row & col window handles
469 std::vector<MPI_Win> rowwindows, colwindows;
470 std::vector<MPI_Win> rowwinnext, colwinnext;
471 SpParHelper::SetWindows((A.commGrid)->GetRowWorld(), *(A.spSeq), rowwindows);
472 SpParHelper::SetWindows((B.commGrid)->GetColWorld(), *(B.spSeq), colwindows);
473 SpParHelper::SetWindows((A.commGrid)->GetRowWorld(), *(A.spSeq), rowwinnext);
474 SpParHelper::SetWindows((B.commGrid)->GetColWorld(), *(B.spSeq), colwinnext);
475
476 IU ** ARecvSizes = SpHelper::allocate2D<IU>(UDERA::esscount, stages);
477 IU ** BRecvSizes = SpHelper::allocate2D<IU>(UDERB::esscount, stages);
478
479 SpParHelper::GetSetSizes( *(A.spSeq), ARecvSizes, (A.commGrid)->GetRowWorld());
480 SpParHelper::GetSetSizes( *(B.spSeq), BRecvSizes, (B.commGrid)->GetColWorld());
481
482 UDERA * ARecv, * ARecvNext;
483 UDERB * BRecv, * BRecvNext;
484 std::vector< SpTuples<IU,N_promote> *> tomerge;
485
486 // Prefetch first
487 for(int j=0; j< rowwindows.size(); ++j)
489 for(int j=0; j< colwindows.size(); ++j)
491
492 for(int j=0; j< rowwinnext.size(); ++j)
494 for(int j=0; j< colwinnext.size(); ++j)
496
497
498 int Aownind = (0+Aoffset) % stages;
499 int Bownind = (0+Boffset) % stages;
500 if(Aownind == (A.commGrid)->GetRankInProcRow())
501 {
502 ARecv = A.spSeq; // shallow-copy
503 }
504 else
505 {
506 std::vector<IU> ess1(UDERA::esscount); // pack essentials to a vector
507 for(int j=0; j< UDERA::esscount; ++j)
508 {
509 ess1[j] = ARecvSizes[j][Aownind];
510 }
511 ARecv = new UDERA(); // create the object first
512
513 oput << "For A (out), Fetching " << (void*)rowwindows[0] << std::endl;
514 SpParHelper::FetchMatrix(*ARecv, ess1, rowwindows, Aownind); // fetch its elements later
515 }
516 if(Bownind == (B.commGrid)->GetRankInProcCol())
517 {
518 BRecv = B.spSeq; // shallow-copy
519 }
520 else
521 {
522 std::vector<IU> ess2(UDERB::esscount); // pack essentials to a vector
523 for(int j=0; j< UDERB::esscount; ++j)
524 {
525 ess2[j] = BRecvSizes[j][Bownind];
526 }
527 BRecv = new UDERB();
528
529 oput << "For B (out), Fetching " << (void*)colwindows[0] << std::endl;
530 SpParHelper::FetchMatrix(*BRecv, ess2, colwindows, Bownind); // No lock version, only get !
531 }
532
533 int Aownprev = Aownind;
534 int Bownprev = Bownind;
535
536 for(int i = 1; i < stages; ++i)
537 {
538 Aownind = (i+Aoffset) % stages;
539 Bownind = (i+Boffset) % stages;
540
541 if(i % 2 == 1) // Fetch RecvNext via winnext, fence on Recv via windows
542 {
543 if(Aownind == (A.commGrid)->GetRankInProcRow())
544 {
545 ARecvNext = A.spSeq; // shallow-copy
546 }
547 else
548 {
549 std::vector<IU> ess1(UDERA::esscount); // pack essentials to a vector
550 for(int j=0; j< UDERA::esscount; ++j)
551 {
552 ess1[j] = ARecvSizes[j][Aownind];
553 }
554 ARecvNext = new UDERA(); // create the object first
555
556 oput << "For A, Fetching " << (void*) rowwinnext[0] << std::endl;
558 }
559
560 if(Bownind == (B.commGrid)->GetRankInProcCol())
561 {
562 BRecvNext = B.spSeq; // shallow-copy
563 }
564 else
565 {
566 std::vector<IU> ess2(UDERB::esscount); // pack essentials to a vector
567 for(int j=0; j< UDERB::esscount; ++j)
568 {
569 ess2[j] = BRecvSizes[j][Bownind];
570 }
571 BRecvNext = new UDERB();
572
573 oput << "For B, Fetching " << (void*)colwinnext[0] << std::endl;
574 SpParHelper::FetchMatrix(*BRecvNext, ess2, colwinnext, Bownind); // No lock version, only get !
575 }
576
577 oput << "Fencing " << (void*) rowwindows[0] << std::endl;
578 oput << "Fencing " << (void*) colwindows[0] << std::endl;
579
580 for(int j=0; j< rowwindows.size(); ++j)
581 MPI_Win_fence(MPI_MODE_NOSTORE, rowwindows[j]); // Synch using "other" windows
582 for(int j=0; j< colwindows.size(); ++j)
584
586 if(!C_cont->isZero())
587 tomerge.push_back(C_cont);
588
589 if(Aownprev != (A.commGrid)->GetRankInProcRow()) delete ARecv;
590 if(Bownprev != (B.commGrid)->GetRankInProcCol()) delete BRecv;
591
593 Bownprev = Bownind;
594 }
595 else // fetch to Recv via windows, fence on RecvNext via winnext
596 {
597
598 if(Aownind == (A.commGrid)->GetRankInProcRow())
599 {
600 ARecv = A.spSeq; // shallow-copy
601 }
602 else
603 {
604 std::vector<IU> ess1(UDERA::esscount); // pack essentials to a vector
605 for(int j=0; j< UDERA::esscount; ++j)
606 {
607 ess1[j] = ARecvSizes[j][Aownind];
608 }
609 ARecv = new UDERA(); // create the object first
610
611 oput << "For A, Fetching " << (void*) rowwindows[0] << std::endl;
613 }
614
615 if(Bownind == (B.commGrid)->GetRankInProcCol())
616 {
617 BRecv = B.spSeq; // shallow-copy
618 }
619 else
620 {
621 std::vector<IU> ess2(UDERB::esscount); // pack essentials to a vector
622 for(int j=0; j< UDERB::esscount; ++j)
623 {
624 ess2[j] = BRecvSizes[j][Bownind];
625 }
626 BRecv = new UDERB();
627
628 oput << "For B, Fetching " << (void*)colwindows[0] << std::endl;
629 SpParHelper::FetchMatrix(*BRecv, ess2, colwindows, Bownind); // No lock version, only get !
630 }
631
632 oput << "Fencing " << (void*) rowwinnext[0] << std::endl;
633 oput << "Fencing " << (void*) rowwinnext[0] << std::endl;
634
635 for(int j=0; j< rowwinnext.size(); ++j)
636 MPI_Win_fence(MPI_MODE_NOSTORE, rowwinnext[j]); // Synch using "other" windows
637 for(int j=0; j< colwinnext.size(); ++j)
639
641 if(!C_cont->isZero())
642 tomerge.push_back(C_cont);
643
644
645 if(Aownprev != (A.commGrid)->GetRankInProcRow()) delete ARecvNext;
646 if(Bownprev != (B.commGrid)->GetRankInProcCol()) delete BRecvNext;
647
649 Bownprev = Bownind;
650 }
651
652 }
653
654 if(stages % 2 == 1) // fence on Recv via windows
655 {
656 oput << "Fencing " << (void*) rowwindows[0] << std::endl;
657 oput << "Fencing " << (void*) colwindows[0] << std::endl;
658
659 for(int j=0; j< rowwindows.size(); ++j)
660 MPI_Win_fence(MPI_MODE_NOSUCCEED, rowwindows[j]); // Synch using "prev" windows
661 for(int j=0; j< colwindows.size(); ++j)
663
665 if(!C_cont->isZero())
666 tomerge.push_back(C_cont);
667
668 if(Aownprev != (A.commGrid)->GetRankInProcRow()) delete ARecv;
669 if(Bownprev != (B.commGrid)->GetRankInProcRow()) delete BRecv;
670 }
671 else // fence on RecvNext via winnext
672 {
673 oput << "Fencing " << (void*) rowwinnext[0] << std::endl;
674 oput << "Fencing " << (void*) colwinnext[0] << std::endl;
675
676 for(int j=0; j< rowwinnext.size(); ++j)
677 MPI_Win_fence(MPI_MODE_NOSUCCEED, rowwinnext[j]); // Synch using "prev" windows
678 for(int j=0; j< colwinnext.size(); ++j)
680
682 if(!C_cont->isZero())
683 tomerge.push_back(C_cont);
684
685 if(Aownprev != (A.commGrid)->GetRankInProcRow()) delete ARecvNext;
686 if(Bownprev != (B.commGrid)->GetRankInProcRow()) delete BRecvNext;
687 }
688 for(int i=0; i< rowwindows.size(); ++i)
689 {
692 }
693 for(int i=0; i< colwindows.size(); ++i)
694 {
697 }
698 MPI_Barrier(GridC->GetWorld());
699
700 IU C_m = A.spSeq->getnrow();
701 IU C_n = B.spSeq->getncol();
702 DER_promote * C = new DER_promote(MergeAll<SR>(tomerge, C_m, C_n), false, NULL); // First get the result in SpTuples, then convert to UDER
703 for(int i=0; i<tomerge.size(); ++i)
704 {
705 delete tomerge[i];
706 }
707 SpHelper::deallocate2D(ARecvSizes, UDERA::esscount);
708 SpHelper::deallocate2D(BRecvSizes, UDERB::esscount);
709
710 const_cast< UDERB* >(B.spSeq)->Transpose(); // transpose back to original
711 return SpParMat<IU,N_promote,DER_promote> (C, GridC); // return the result object
712}
713
714}
715
716#endif
Definition test.cpp:53
std::shared_ptr< CommGrid > commGrid
static void deallocate2D(T **array, I m)
Definition SpHelper.h:249
static void FreeWindows(std::vector< MPI_Win > &arrwin)
static void GetSetSizes(const SpMat< IT, NT, DER > &Matrix, IT **&sizes, MPI_Comm &comm1d)
static void UnlockWindows(int ownind, std::vector< MPI_Win > &arrwin)
static void SetWindows(MPI_Comm &comm1d, const SpMat< IT, NT, DER > &Matrix, std::vector< MPI_Win > &arrwin)
static void PostExposureEpoch(int self, std::vector< MPI_Win > &arrwin, MPI_Group &group)
static void Print(const std::string &s)
static void WaitNFree(std::vector< MPI_Win > &arrwin)
static void FetchMatrix(SpMat< IT, NT, DER > &MRecv, const std::vector< IT > &essentials, std::vector< MPI_Win > &arrwin, int ownind)
static void LockNFetch(DER *&Matrix, int owner, std::vector< MPI_Win > &arrwin, MPI_Group &group, IT **sizes)
static void AccessNFetch(DER *&Matrix, int owner, std::vector< MPI_Win > &arrwin, MPI_Group &group, IT **sizes)
#define DIMMISMATCH
Definition SpDefs.h:73
SpParMat< IU, typename promote_trait< NU1, NU2 >::T_promote, typename promote_trait< UDERA, UDERB >::T_promote > Mult_AnXBn_Fence(const SpParMat< IU, NU1, UDERA > &A, const SpParMat< IU, NU2, UDERB > &B)
SpParMat< IU, typename promote_trait< NU1, NU2 >::T_promote, typename promote_trait< UDERA, UDERB >::T_promote > Mult_AnXBn_PassiveTarget(const SpParMat< IU, NU1, UDERA > &A, const SpParMat< IU, NU2, UDERB > &B)
SpParMat< IU, typename promote_trait< NU1, NU2 >::T_promote, typename promote_trait< UDERA, UDERB >::T_promote > Mult_AnXBn_ActiveTarget(const SpParMat< IU, NU1, UDERA > &A, const SpParMat< IU, NU2, UDERB > &B)
shared_ptr< CommGrid > ProductGrid(CommGrid *gridA, CommGrid *gridB, int &innerdim, int &Aoffset, int &Boffset)
Definition CommGrid.cpp:164
double A
double C
Definition options.h:15