COMBINATORIAL_BLAS 1.6
 
Loading...
Searching...
No Matches
RCM.cpp
Go to the documentation of this file.
1#ifdef THREADED
2#ifndef _OPENMP
3#define _OPENMP // should be defined before any COMBBLAS header is included
4#endif
5#include <omp.h>
6#endif
7
8#include "CombBLAS/CombBLAS.h"
9#include <mpi.h>
10#include <sys/time.h>
11#include <iostream>
12#include <functional>
13#include <algorithm>
14#include <vector>
15#include <string>
16#include <sstream>
17
18
19#define EDGEFACTOR 16
20
21#ifdef DETERMINISTIC
23#else
24MTRand GlobalMT; // generate random numbers with Mersenne Twister
25#endif
26
32
33
34
35using namespace std;
36using namespace combblas;
37
38
39
42
43
44
45
46template <typename PARMAT>
48{
49 // boolean addition is practically a "logical or"
50 // therefore this doesn't destruct any links
51 PARMAT AT = A;
52 AT.Transpose();
53 AT.RemoveLoops(); // not needed for boolean matrices, but no harm in keeping it
54 A += AT;
55}
56
57
58struct VertexType
59{
60public:
62
63 friend bool operator<(const VertexType & vtx1, const VertexType & vtx2 )
64 {
65 if(vtx1.order==vtx2.order) return vtx1.degree < vtx2.degree;
66 else return vtx1.order<vtx2.order;
67 };
68 friend bool operator<=(const VertexType & vtx1, const VertexType & vtx2 )
69 {
70 if(vtx1.order==vtx2.order) return vtx1.degree <= vtx2.degree;
71 else return vtx1.order<vtx2.order;
72 };
73 friend bool operator>(const VertexType & vtx1, const VertexType & vtx2 )
74 {
75 if(vtx1.order==vtx2.order) return vtx1.degree > vtx2.degree;
76 else return vtx1.order>vtx2.order;
77 };
78 friend bool operator>=(const VertexType & vtx1, const VertexType & vtx2 )
79 {
80 if(vtx1.order==vtx2.order) return vtx1.degree >= vtx2.degree;
81 else return vtx1.order>vtx2.order;
82
83 //if(vtx1.order==vtx2.order) return vtx1.degree <= vtx2.degree;
84 //else return vtx1.order<vtx2.order;
85 };
86 friend bool operator==(const VertexType & vtx1, const VertexType & vtx2 ){return vtx1.order==vtx2.order & vtx1.degree==vtx2.degree;};
87 friend ostream& operator<<(ostream& os, const VertexType & vertex ){os << "(" << vertex.order << "," << vertex.degree << ")"; return os;};
88 //private:
91};
92
93
94
95struct SelectMinSR
96{
98 static T_promote id(){ return -1; };
99 static bool returnedSAID() { return false; }
100 //static MPI_Op mpi_op() { return MPI_MIN; };
101
102 static T_promote add(const T_promote & arg1, const T_promote & arg2)
103 {
104 return std::min(arg1, arg2);
105 }
106
107 static T_promote multiply(const bool & arg1, const T_promote & arg2)
108 {
109 return arg2;
110 }
111
112 static void axpy(bool a, const T_promote & x, T_promote & y)
113 {
114 y = std::min(y, x);
115 }
116};
117
118
123
124
125
126
128{
129
130 int myrank, nprocs;
133
134
135 vector<int64_t> lind = fringeRow.GetLocalInd ();
136 vector<VertexType> lnum = fringeRow.GetLocalNum ();
137 int64_t ploclen = lind.size();
138
139
142
143 int * rdispls = new int[nprocs+1];
144 int * recvcnt = new int[nprocs];
145 int * sendcnt = new int[nprocs](); // initialize to 0
146 int * sdispls = new int[nprocs+1];
147
149
150#ifdef _OPENMP
151#pragma omp parallel for
152#endif
153 for(int64_t k=0; k < ploclen; ++k)
154 {
155 int64_t temp = lnum[k].order-startLabel;
156 int owner;
157 if(perproc==0 || temp/perproc > nprocs-1)
158 owner = nprocs-1;
159 else
161
162#ifdef _OPENMP
164#else
165 sendcnt[owner]++;
166#endif
167 }
168
170
171
172 MPI_Alltoall(sendcnt, 1, MPI_INT, recvcnt, 1, MPI_INT, MPI_COMM_WORLD); // share the request counts
173
174 sdispls[0] = 0;
175 rdispls[0] = 0;
176 for(int i=0; i<nprocs; ++i)
177 {
178 sdispls[i+1] = sdispls[i] + sendcnt[i];
179 rdispls[i+1] = rdispls[i] + recvcnt[i];
180 }
181
182
185 int64_t * indbuf = new int64_t[ploclen];
186 int *count = new int[nprocs](); //current position
187#ifdef _OPENMP
188#pragma omp parallel for
189#endif
190 for(int64_t i=0; i < ploclen; ++i)
191 {
192
193 int64_t temp = lnum[i].order-startLabel;
194 int owner;
195 if(perproc==0 || temp/perproc > nprocs-1)
196 owner = nprocs-1;
197 else
199
200
201 int id;
202#ifdef _OPENMP
203 id = sdispls[owner] + __sync_fetch_and_add(&count[owner], 1);
204#else
205 id = sdispls[owner] + count[owner];
206 count[owner]++;
207#endif
208
209 datbuf1[id] = temp;
210 datbuf2[id] = lnum[i].degree;
211 indbuf[id] = lind[i] + fringeRow.LengthUntil();
212 }
213 delete [] count;
214
215 int64_t totrecv = rdispls[nprocs];
219 delete [] datbuf1;
221 delete [] datbuf2;
222
225 delete [] indbuf;
226
228
229#ifdef _OPENMP
230#pragma omp parallel for
231#endif
232 for(int i=0; i<totrecv; ++i)
233 {
235 }
236
237
238#if defined(GNU_PARALLEL) && defined(_OPENMP)
239 __gnu_parallel::sort(tosort, tosort+totrecv);
240#else
241 std::sort(tosort, tosort+totrecv);
242#endif
243
244 // send order back
245 int * sendcnt1 = new int[nprocs]();
246
247#ifdef _OPENMP
248#pragma omp parallel for
249#endif
250 for(int64_t k=0; k < totrecv; ++k)
251 {
253 int owner = fringeRow.Owner(get<2>(tosort[k]), locind);
254#ifdef _OPENMP
256#else
257 sendcnt1[owner]++;
258#endif
259 }
260
261 MPI_Alltoall(sendcnt1, 1, MPI_INT, recvcnt, 1, MPI_INT, MPI_COMM_WORLD); // share the request counts
262
263 sdispls[0] = 0;
264 rdispls[0] = 0;
265 for(int i=0; i<nprocs; ++i)
266 {
267 sdispls[i+1] = sdispls[i] + sendcnt1[i];
268 rdispls[i+1] = rdispls[i] + recvcnt[i];
269 }
270
271
273 sortperproc[myrank] = totrecv;
275
276 vector<int64_t> disp(nprocs+1);
277 disp[0] = 0;
278 for(int i=0; i<nprocs; ++i)
279 {
280 disp[i+1] = disp[i] + sortperproc[i];
281 }
282
283
284
286
287 int64_t * datbuf = new int64_t[ploclen];
288 indbuf = new int64_t[ploclen];
289 count = new int[nprocs](); //current position
290#ifdef _OPENMP
291#pragma omp parallel for
292#endif
293 for(int64_t i=0; i < ploclen; ++i)
294 {
296 int owner = fringeRow.Owner(get<2>(tosort[i]), locind);
297 int id;
298#ifdef _OPENMP
299 id = sdispls[owner] + __sync_fetch_and_add(&count[owner], 1);
300#else
301 id = sdispls[owner] + count[owner];
302 count[owner]++;
303#endif
304 datbuf[id] = i + disp[myrank] + endLabel + 1;
305 //cout << datbuf[id] << endl;
306 indbuf[id] = locind;
307 }
308 delete [] count;
309
310
311 totrecv = rdispls[nprocs];
314 delete [] datbuf;
315
318 delete [] indbuf;
319
320
321 FullyDistSpVec<int64_t, int64_t> order(fringeRow.getcommgrid(), fringeRow.TotalLength(), recvindbuf3, recvdatbuf3);
323 DeleteAll(sdispls, rdispls, sendcnt, sendcnt1, recvcnt);
324 ::operator delete(tosort);
325
326 return order;
327}
328
330// perform ordering from a pseudo peripheral vertex
331template <typename PARMAT>
333{
334 int myrank;
336 double tSpMV=0, tOrder, tOther, tSpMV1, tsort=0, tsort1;
337 tOrder = MPI_Wtime();
338
339 int64_t nv = A.getnrow();
341 order.SetElement(source, startOrder);
342 fringe.SetElement(source, startOrder);
344
345 if(myrank == 0) cout << " Computing the RCM ordering:" << endl;
346
347
350
351
352 while(startLabel <= endLabel) // continue until the frontier is empty
353 {
354
356 [](int64_t parent_order, int64_t ord){return ord;},
357 [](int64_t parent_order, int64_t ord){return true;},
358 false, (int64_t) -1);
359
360 tSpMV1 = MPI_Wtime();
362 tSpMV += MPI_Wtime() - tSpMV1;
363 fringe = EWiseMult(fringe, order, true, (int64_t) -1);
364
366 [](int64_t parent_order, int64_t degree){return VertexType(parent_order, degree);},
367 [](int64_t parent_order, int64_t degree){return true;},
368 false, (int64_t) -1);
369
370 tsort1 = MPI_Wtime();
373 order.Set(levelOrder);
374 startLabel = endLabel + 1;
375 endLabel += fringe.getnnz();
376 }
377
378 tOrder = MPI_Wtime() - tOrder;
379 tOther = tOrder - tSpMV - tsort;
380 if(myrank == 0)
381 {
382 cout << " Total time: " << tOrder << " seconds [SpMV: " << tSpMV << ", sorting: " << tsort << ", other: " << tOther << "]" << endl << endl;
383 }
384
386}
387
388
389template <typename PARMAT>
391{
392
393
394 int myrank;
396 double tpvSpMV=0, tpvOther=0;
397 double tstart = MPI_Wtime();
398 int64_t prevLevel=-1, curLevel=0; // initialized just to make the first iteration going
399
400
401 // Select a minimum-degree unvisited vertex as the initial source
404
405 //level structure in the current BFS tree
406 //we are not using this information. Currently it is serving as visited flag
407 FullyDistVec<int64_t, int64_t> level ( A.getcommgrid(), A.getnrow(), (int64_t) -1);
408
409 int iterations = 0;
410 double tSpMV=0, tOther=0, tSpMV1;
411 if(myrank == 0) cout << " Computing a pseudo-peripheral vertex:" << endl;
412 while(curLevel > prevLevel)
413 {
414 double tItr = MPI_Wtime();
416 FullyDistSpVec<int64_t, int64_t> fringe(A.getcommgrid(), A.getnrow() );
417 level = (int64_t)-1; // reset level structure in every iteration
418 level.SetElement(source, 1); // place source at level 1
419 fringe.SetElement(source, 1); // include source to the initial fringe
420 curLevel = 2;
421 while(fringe.getnnz() > 0) // continue until the frontier is empty
422 {
423 tSpMV1 = MPI_Wtime();
425 tSpMV += MPI_Wtime() - tSpMV1;
426 fringe = EWiseMult(fringe, level, true, (int64_t) -1);
427 // set value to the current level
429 curLevel++;
430 level.Set(fringe);
431 }
432 curLevel = curLevel-2;
433
434
435 // last non-empty level (we can avoid this by keeping the last nonempty fringe)
436 fringe = level.Find(curLevel);
437 fringe.setNumToInd();
438
439 // find a minimum degree vertex in the last level
442 [](int64_t vtx, int64_t deg){return make_pair(deg, vtx);},
443 [](int64_t vtx, int64_t deg){return true;},
444 false, (int64_t) -1);
446 if (curLevel > prevLevel)
447 source = mindegree_vertex.second;
448 iterations++;
449
450
451 if(myrank == 0)
452 {
453 cout <<" iteration: "<< iterations << " BFS levels: " << curLevel << " Time: " << MPI_Wtime() - tItr << " seconds." << endl;
454 }
455
456 }
457
458 // remove vertices in the current connected component
459 //unvisitedVertices = EWiseMult(unvisitedVertices, level, true, (int64_t) -1);
463 false, make_pair((int64_t)-1, (int64_t)0));
464
466 tpvSpMV += tSpMV;
467 tpvOther += tOther;
468 if(myrank == 0)
469 {
470 cout << " vertex " << source << " is a pseudo peripheral vertex" << endl;
471 cout << " pseudo diameter: " << curLevel << ", #iterations: "<< iterations << endl;
472 cout << " Total time: " << MPI_Wtime() - tstart << " seconds [SpMV: " << tSpMV << ", other: " << tOther << "]" << endl << endl;
473
474 }
475 return source;
476
477}
478
479template <typename PARMAT>
481{
482
483#ifdef TIMING
489#endif
490 int myrank, nprocs;
493
494 FullyDistSpVec<int64_t, int64_t> unvisited ( A.getcommgrid(), A.getnrow());
495 unvisited.iota(A.getnrow(), (int64_t) 0); // index and values become the same
496 // The list of unvisited vertices. The value is (degree, vertex index) pair
499 [](int64_t vtx, int64_t deg){return make_pair(deg, vtx);},
500 [](int64_t vtx, int64_t deg){return true;},
501 false, (int64_t) -1);
502
503
504 // The RCM order will be stored here
505 FullyDistVec<int64_t, int64_t> rcmorder ( A.getcommgrid(), A.getnrow(), (int64_t) -1);
506
507 int cc = 1; // current connected component
509 while(numUnvisited>0) // for each connected component
510 {
511
512 if(myrank==0) cout << "Connected component: " << cc++ << endl;
513 // Get a pseudo-peripheral vertex to start the RCM algorithm
515
516 // Get the RCM ordering in this connected component
517 int64_t curOrder = A.getnrow() - numUnvisited;
519
521 }
522
523#ifdef TIMING
525 if(myrank == 0)
526 {
527 td_ag_all = new double[nprocs];
528 td_a2a_all = new double[nprocs];
529 td_tv_all = new double[nprocs];
530 td_mc_all = new double[nprocs];
531 td_spmv_all = new double[nprocs];
532 }
533
539
540
542
543
544 if(myrank == 0)
545 {
546
548 for(int i=0; i< nprocs; ++i) // find the mean performing guy
549 total_time[i] += td_ag_all[i] + td_a2a_all[i] + td_tv_all[i] + td_mc_all[i] + td_spmv_all[i];
550
551 // order
553 for(int i=0; i<nprocs; i++) tosort.push_back(make_pair(total_time[i], i));
554 sort(tosort.begin(), tosort.end());
555 //vector<int> permutation = SpHelper::order(total_time);
557 for(int i=0; i<nprocs; i++) permutation[i] = tosort[i].second;
558
559 int smallest = permutation[0];
560 int largest = permutation[nprocs-1];
561 int median = permutation[nprocs/2];
562 cout << "------ Detail timing --------" << endl;
563 cout << "TOTAL (accounted) MEAN: " << accumulate( total_time.begin(), total_time.end(), 0.0 )/ static_cast<double> (nprocs) << endl;
564 cout << "TOTAL (accounted) MAX: " << total_time[0] << endl;
565 cout << "TOTAL (accounted) MIN: " << total_time[nprocs-1] << endl;
566 cout << "TOTAL (accounted) MEDIAN: " << total_time[nprocs/2] << endl;
567 cout << "-------------------------------" << endl;
568
569 cout << "allgather median: " << td_ag_all[median] << endl;
570 cout << "all2all median: " << td_a2a_all[median] << endl;
571 cout << "transposevector median: " << td_tv_all[median] << endl;
572 cout << "mergecontributions median: " << td_mc_all[median] << endl;
573 cout << "spmsv median: " << td_spmv_all[median] << endl;
574 cout << "-------------------------------" << endl;
576 td_tv_all1=td_tv_all[median]; td_mc_all1=td_mc_all[median];
577 td_spmv_all1 = td_spmv_all[median];
578
579 cout << "allgather fastest: " << td_ag_all[smallest] << endl;
580 cout << "all2all fastest: " << td_a2a_all[smallest] << endl;
581 cout << "transposevector fastest: " << td_tv_all[smallest] << endl;
582 cout << "mergecontributions fastest: " << td_mc_all[smallest] << endl;
583 cout << "spmsv fastest: " << td_spmv_all[smallest] << endl;
584 cout << "-------------------------------" << endl;
585
586
587 cout << "allgather slowest: " << td_ag_all[largest] << endl;
588 cout << "all2all slowest: " << td_a2a_all[largest] << endl;
589 cout << "transposevector slowest: " << td_tv_all[largest] << endl;
590 cout << "mergecontributions slowest: " << td_mc_all[largest] << endl;
591 cout << "spmsv slowest: " << td_spmv_all[largest] << endl;
592 }
593
594
595
596 if(myrank == 0)
597 {
598
599 cout << "summary statistics" << endl;
600 cout << base_filename << " " << processors << " " << threads << " " << processors * threads << " "<< torderSpMV << " "<< torderSort<< " "<< torderOther<< " "<< td_ag_all1 << " "<< td_a2a_all1 << " "<< td_tv_all1 << " "<< td_mc_all1 << " "<< td_spmv_all1 << " "<< endl;
601
602 }
603 #endif
604
605
606 return rcmorder;
607}
608
609
610int main(int argc, char* argv[])
611{
612 int provided;
615 {
616 printf("ERROR: The MPI library does not have MPI_THREAD_SERIALIZED support\n");
618 }
619
620 int nprocs, myrank;
623
624 if(argc < 3)
625 {
626 if(myrank == 0)
627 {
628
629 cout << "Usage: ./rcm <rmat|er|input> <scale|filename> " << "-permute" << " -savercm" << endl;
630 cout << "Example with a user supplied matrix:" << endl;
631 cout << " mpirun -np 4 ./rcm input a.mtx" << endl;
632 cout << "Example with a user supplied matrix (pre-permute the input matrix for load balance):" << endl;
633 cout << " mpirun -np 4 ./rcm input a.mtx -permute " << endl;
634 cout << "Example with a user supplied matrix (pre-permute the input matrix for load balance) & save rcm order to input_file_name.rcm.txt file:" << endl;
635 cout << " mpirun -np 4 ./rcm input a.mtx -permute -savercm" << endl;
636 cout << "Example with RMAT matrix: mpirun -np 4 ./rcm rmat 20" << endl;
637 cout << "Example with an Erdos-Renyi matrix: mpirun -np 4 ./rcm er 20" << endl;
638
639 }
640 MPI_Finalize();
641 return -1;
642 }
643 {
644
645 string filename="";
646 bool randpermute = false;
647 bool savercm = false;
648 for (int i = 1; i < argc; i++)
649 {
650 if (strcmp(argv[i],"-permute")==0)
651 randpermute = true;
652 if (strcmp(argv[i],"-savercm")==0)
653 savercm = true;
654 }
655
656
659
660 if(string(argv[1]) == string("input")) // input option
661 {
662 ABool = new Par_DCSC_Bool();
663 filename = argv[2];
664 tinfo.str("");
665 tinfo << "**** Reading input matrix: " << filename << " ******* " << endl;
666
667 base_filename = filename.substr(filename.find_last_of("/\\") + 1);
668
670 double t01 = MPI_Wtime();
671 ABool->ParallelReadMM(filename, true, maximum<bool>());
672 double t02 = MPI_Wtime();
674 tinfo.str("");
675 tinfo << "matrix read and symmetricized " << endl;
676 tinfo << "Reader took " << t02-t01 << " seconds" << endl;
678
679 }
680 else if(string(argv[1]) == string("rmat"))
681 {
682 unsigned scale;
683 scale = static_cast<unsigned>(atoi(argv[2]));
684 double initiator[4] = {.57, .19, .19, .05};
686 DEL->GenGraph500Data(initiator, scale, EDGEFACTOR, true, false );
688
689 ABool = new Par_DCSC_Bool(*DEL, false);
691 delete DEL;
692 }
693 else if(string(argv[1]) == string("er"))
694 {
695 unsigned scale;
696 scale = static_cast<unsigned>(atoi(argv[2]));
697 double initiator[4] = {.25, .25, .25, .25};
699 DEL->GenGraph500Data(initiator, scale, EDGEFACTOR, true, false );
701
702 ABool = new Par_DCSC_Bool(*DEL, false);
704 delete DEL;
705 }
706 else
707 {
708 SpParHelper::Print("Unknown input option\n");
709 MPI_Finalize();
710 return -1;
711 }
712
713
714 Par_DCSC_Bool ABoolOld(ABool->getcommgrid());
715 // needed for random permutation
717 if(randpermute && string(argv[1]) == string("input")) // do this only for user provided matrices
718 {
719 if(ABool->getnrow() == ABool->getncol())
720 {
721 ABoolOld = *ABool; // create a copy for bandwidth computation
722 randp.iota(ABool->getnrow(), 0);
723 randp.RandPerm();
724 (*ABool)(randp,randp,true);// in-place permute to save memory
725 SpParHelper::Print("Matrix is randomly permuted for load balance.\n");
726 }
727 else
728 {
729 SpParHelper::Print("Rectangular matrix: Can not apply symmetric permutation.\n");
730 }
731 }
732
733 ABool->RemoveLoops();
736 float balance;
737 balance = ABool->LoadImbalance();
738 ABool->Reduce(degrees, Column, plus<int64_t>(), static_cast<int64_t>(0));
740
741 int nthreads = 1;
742#ifdef THREADED
743#pragma omp parallel
744 {
745 nthreads = omp_get_num_threads();
746 }
747#endif
748
751
753 outs << "--------------------------------------" << endl;
754 outs << "Number of MPI proceses: " << nprocs << endl;
755 outs << "Number of threads per procese: " << nthreads << endl;
756 outs << "Load balance: " << balance << endl;
757 outs << "--------------------------------------" << endl;
759
760
761 // create Pre allocated SPA for SpMSpV
763 // Compute the RCM ordering
765
766
768 // comment out the next two lines if you want the Cuthill-McKee ordering
769 reverseOrder= rcmorder.TotalLength();
771
772
773
774 //revert random permutation if applied before
775 if(randpermute==true && randp.TotalLength() >0)
776 {
777 // inverse permutation
780 }
781
782 // Write the RCM ordering
783 // TODO: should we save the permutation instead?
784 if(savercm && filename!="")
785 {
786 string ofName = filename + ".rcm.txt";
787 reverseOrder.ParallelWrite(ofName, 1, false);
788 }
789
790 // get permutation from the ordering
791 // sort returns permutation from ordering
792 // and make the original vector a sequence (like iota)
793 // TODO: Can we use invert() ?
795
796
797
798
799 // Permute the original matrix with the RCM order
800 // this part is not timed as it is needed for sanity check only
801 if(randpermute==true && randp.TotalLength() >0)
802 {
803 int64_t bw_before1 = ABoolOld.Bandwidth();
804 int64_t bw_before2 = ABool->Bandwidth();
806 int64_t bw_after = ABoolOld.Bandwidth();
807
809 outs1 << "Original Bandwidth: " << bw_before1 << endl;
810 outs1 << "Bandwidth after randomly permuting the matrix: " << bw_before2 << endl;
811 outs1 << "Bandwidth after the matrix is permuted by RCM: " << bw_after << endl << endl;
813 }
814 else
815 {
816 int64_t bw_before1 = ABool->Bandwidth();
817 (*ABool)(rcmorder1,rcmorder1,true);
818 int64_t bw_after = ABool->Bandwidth();
819
821 outs1 << "Original Bandwidth: " << bw_before1 << endl;
822 outs1 << "Bandwidth after the matrix is permuted by RCM: " << bw_after << endl << endl;;
824 }
825
826
827 delete ABool;
828 delete ABoolCSC;
829
830 }
831 MPI_Finalize();
832 return 0;
833}
834
int main()
Definition Driver.cpp:12
double torderSort
Definition RCM.cpp:329
double torderOther
Definition RCM.cpp:329
MTRand GlobalMT
Definition RCM.cpp:24
int64_t PseudoPeripheralVertex(PARMAT &A, FullyDistSpVec< int64_t, pair< int64_t, int64_t > > &unvisitedVertices, FullyDistVec< int64_t, int64_t > degrees, PreAllocatedSPA< int64_t > &SPA)
Definition RCM.cpp:390
double cblas_transvectime
Definition RCM.cpp:31
SpParMat< int64_t, bool, SpDCCols< int64_t, bool > > Par_DCSC_Bool
Definition RCM.cpp:119
double cblas_localspmvtime
Definition RCM.cpp:29
double cblas_allgathertime
Definition RCM.cpp:28
double cblas_alltoalltime
Definition RCM.cpp:27
int threads
Definition RCM.cpp:40
int processors
Definition RCM.cpp:40
SpParMat< int64_t, bool, SpCCols< int64_t, bool > > Par_CSC_Bool
Definition RCM.cpp:122
void RCMOrder(PARMAT &A, int64_t source, FullyDistVec< int64_t, int64_t > &order, int64_t startOrder, FullyDistVec< int64_t, int64_t > degrees, PreAllocatedSPA< int64_t > &SPA)
Definition RCM.cpp:332
FullyDistVec< int64_t, int64_t > RCM(PARMAT &A, FullyDistVec< int64_t, int64_t > degrees, PreAllocatedSPA< int64_t > &SPA)
Definition RCM.cpp:480
SpParMat< int64_t, double, SpDCCols< int64_t, double > > Par_DCSC_Double
Definition RCM.cpp:121
double torderSpMV
Definition RCM.cpp:329
FullyDistSpVec< int64_t, int64_t > getOrder(FullyDistSpVec< int64_t, VertexType > &fringeRow, int64_t startLabel, int64_t endLabel)
Definition RCM.cpp:127
#define EDGEFACTOR
Definition RCM.cpp:19
string base_filename
Definition RCM.cpp:41
SpParMat< int64_t, int64_t, SpDCCols< int64_t, int64_t > > Par_DCSC_int64_t
Definition RCM.cpp:120
double cblas_mergeconttime
Definition RCM.cpp:30
void GenGraph500Data(double initiator[4], int log_numverts, int edgefactor, bool scramble=false, bool packed=false)
static void Print(const std::string &s)
int nprocs
Definition comms.cpp:55
long int64_t
Definition compat.h:21
@ Column
Definition SpDefs.h:115
void Symmetricize(PARMAT &A)
Definition ReadMatDist.h:29
void DeleteAll(A arr1)
Definition Deleter.h:48
Dcsc< IU, typename promote_trait< NU1, NU2 >::T_promote > EWiseMult(const Dcsc< IU, NU1 > &A, const Dcsc< IU, NU2 > *B, bool exclude)
Definition Friends.h:834
double A
static void axpy(bool a, const T_promote &x, T_promote &y)
Definition RCM.cpp:112
static T_promote add(const T_promote &arg1, const T_promote &arg2)
Definition RCM.cpp:102
int64_t T_promote
Definition RCM.cpp:97
static T_promote id()
Definition RCM.cpp:98
static T_promote multiply(const bool &arg1, const T_promote &arg2)
Definition RCM.cpp:107
static bool returnedSAID()
Definition RCM.cpp:99
friend bool operator<(const VertexType &vtx1, const VertexType &vtx2)
Definition RCM.cpp:63
friend bool operator==(const VertexType &vtx1, const VertexType &vtx2)
Definition RCM.cpp:86
int64_t degree
Definition RCM.cpp:90
int64_t order
Definition RCM.cpp:89
VertexType(int64_t ord=-1, int64_t deg=-1)
Definition RCM.cpp:61
friend bool operator>(const VertexType &vtx1, const VertexType &vtx2)
Definition RCM.cpp:73
friend bool operator<=(const VertexType &vtx1, const VertexType &vtx2)
Definition RCM.cpp:68
friend ostream & operator<<(ostream &os, const VertexType &vertex)
Definition RCM.cpp:87
friend bool operator>=(const VertexType &vtx1, const VertexType &vtx2)
Definition RCM.cpp:78
Compute the minimum of two values.
Definition Operations.h:173