COMBINATORIAL_BLAS 1.6
 
Loading...
Searching...
No Matches
TopDownBFS.cpp
Go to the documentation of this file.
1/****************************************************************/
2/* Parallel Combinatorial BLAS Library (for Graph Computations) */
3/* version 1.6 -------------------------------------------------*/
4/* date: 6/15/2017 ---------------------------------------------*/
5/* authors: Ariful Azad, Aydin Buluc --------------------------*/
6/****************************************************************/
7/*
8 Copyright (c) 2010-2017, The Regents of the University of California
9
10 Permission is hereby granted, free of charge, to any person obtaining a copy
11 of this software and associated documentation files (the "Software"), to deal
12 in the Software without restriction, including without limitation the rights
13 to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
14 copies of the Software, and to permit persons to whom the Software is
15 furnished to do so, subject to the following conditions:
16
17 The above copyright notice and this permission notice shall be included in
18 all copies or substantial portions of the Software.
19
20 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
21 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
22 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
23 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
24 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
25 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
26 THE SOFTWARE.
27 */
28
29#define DETERMINISTIC
30#include "CombBLAS/CombBLAS.h"
31#include <mpi.h>
32#include <sys/time.h>
33#include <iostream>
34#include <functional>
35#include <algorithm>
36#include <vector>
37#include <string>
38#include <sstream>
39
41
47
48#define ITERS 16
49#define EDGEFACTOR 16
50using namespace std;
51using namespace combblas;
52
53// 64-bit floor(log2(x)) function
54// note: least significant bit is the "zeroth" bit
55// pre: v > 0
56unsigned int highestbitset(uint64_t v)
57{
58 // b in binary is {10,1100, 11110000, 1111111100000000 ...}
59 const uint64_t b[] = {0x2ULL, 0xCULL, 0xF0ULL, 0xFF00ULL, 0xFFFF0000ULL, 0xFFFFFFFF00000000ULL};
60 const unsigned int S[] = {1, 2, 4, 8, 16, 32};
61 int i;
62
63 unsigned int r = 0; // result of log2(v) will go here
64 for (i = 5; i >= 0; i--)
65 {
66 if (v & b[i]) // highestbitset is on the left half (i.e. v > S[i] for sure)
67 {
68 v >>= S[i];
69 r |= S[i];
70 }
71 }
72 return r;
73}
74
75template <class T>
76bool from_string(T & t, const string& s, std::ios_base& (*f)(std::ios_base&))
77{
79 return !(iss >> f >> t).fail();
80}
81
82
83template <typename PARMAT>
85{
86 // boolean addition is practically a "logical or"
87 // therefore this doesn't destruct any links
88 PARMAT AT = A;
89 AT.Transpose();
90 A += AT;
91}
92
97struct prunediscovered: public std::binary_function<int64_t, int64_t, int64_t >
98{
100 {
101 return ( y == -1 ) ? x: -1;
102 }
103};
104
105int main(int argc, char* argv[])
106{
107 int nprocs, myrank;
108#ifdef _OPENMP
110 int provided, flag, claimed;
113 if (!flag)
114 SpParHelper::Print("This thread called init_thread but Is_thread_main gave false\n");
116 if (claimed != provided)
117 SpParHelper::Print("Query thread gave different thread level than requested\n");
118#else
119 MPI_Init(&argc, &argv);
120 int cblas_splits = 1;
121#endif
122
125 if(argc < 3)
126 {
127 if(myrank == 0)
128 {
129 cout << "Usage: ./tdbfs <Force,Input> <Scale Forced | Input Name> {FastGen}" << endl;
130 cout << "Example: ./tdbfs Force 25 FastGen" << endl;
131 }
132 MPI_Finalize();
133 return -1;
134 }
135 {
138 typedef SpParMat < int64_t, bool, SpDCCols<int32_t,bool> > PSpMat_s32p64; // sequentially use 32-bits for local matrices, but parallel semantics are 64-bits
139 typedef SpParMat < int64_t, int, SpDCCols<int32_t,int> > PSpMat_s32p64_Int; // similarly mixed, but holds integers as upposed to booleans
142 fullWorld.reset( new CommGrid(MPI_COMM_WORLD, 0, 0) );
143
144 // Declare objects
147 FullyDistVec<int64_t, int64_t> degrees(fullWorld); // degrees of vertices (including multi-edges and self-loops)
148 FullyDistVec<int64_t, int64_t> nonisov(fullWorld); // id's of non-isolated (connected) vertices
149 unsigned scale;
150 OptBuf<int32_t, int64_t> optbuf; // let indices be 32-bits
151 bool scramble = false;
152
153 if(string(argv[1]) == string("Input")) // input option
154 {
155 A.ReadDistribute(string(argv[2]), 0); // read it from file
156 SpParHelper::Print("Read input\n");
157
158 PSpMat_Int64 * G = new PSpMat_Int64(A);
159 G->Reduce(degrees, Row, plus<int64_t>(), static_cast<int64_t>(0)); // identity is 0
160 delete G;
161
162 Symmetricize(A); // A += A';
164 A.Reduce(*ColSums, Column, plus<int64_t>(), static_cast<int64_t>(0)); // plus<int64_t> matches the type of the output vector
165 nonisov = ColSums->FindInds(bind2nd(greater<int64_t>(), 0)); // only the indices of non-isolated vertices
166 delete ColSums;
167 A = A(nonisov, nonisov);
169 A.FreeMemory();
170 SpParHelper::Print("Symmetricized and pruned\n");
171
172 Aeff.OptimizeForGraph500(optbuf); // Should be called before threading is activated
173 #ifdef THREADED
175 tinfo << "Threading activated with " << cblas_splits << " threads" << endl;
177 Aeff.ActivateThreading(cblas_splits);
178 #endif
179 }
180 else if(string(argv[1]) == string("Binary"))
181 {
182 uint64_t n, m;
183 from_string(n,string(argv[3]),std::dec);
184 from_string(m,string(argv[4]),std::dec);
185
187 outs << "Reading " << argv[2] << " with " << n << " vertices and " << m << " edges" << endl;
190 SpParHelper::Print("Read binary input to distributed edge list\n");
191
192 PermEdges(*DEL);
193 SpParHelper::Print("Permuted Edges\n");
194
196 //DEL->Dump32bit("graph_permuted");
197 SpParHelper::Print("Renamed Vertices\n");
198
199 // conversion from distributed edge list, keeps self-loops, sums duplicates
200 PSpMat_Int64 * G = new PSpMat_Int64(*DEL, false);
201 delete DEL; // free memory before symmetricizing
202 SpParHelper::Print("Created Int64 Sparse Matrix\n");
203
204 G->Reduce(degrees, Row, plus<int64_t>(), static_cast<int64_t>(0)); // Identity is 0
205
206 A = PSpMat_Bool(*G); // Convert to Boolean
207 delete G;
208 int64_t removed = A.RemoveLoops();
209
211 loopinfo << "Converted to Boolean and removed " << removed << " loops" << endl;
213 A.PrintInfo();
214
217 A.Reduce(*ColSums, Column, plus<int64_t>(), static_cast<int64_t>(0));
218 A.Reduce(*RowSums, Row, plus<int64_t>(), static_cast<int64_t>(0));
219 ColSums->EWiseApply(*RowSums, plus<int64_t>());
220 delete RowSums;
221
222 nonisov = ColSums->FindInds(bind2nd(greater<int64_t>(), 0)); // only the indices of non-isolated vertices
223 delete ColSums;
224
225 SpParHelper::Print("Found (and permuted) non-isolated vertices\n");
226 nonisov.RandPerm(); // so that A(v,v) is load-balanced (both memory and time wise)
227 A.PrintInfo();
228 #ifndef NOPERMUTE
229 A(nonisov, nonisov, true); // in-place permute to save memory
230 SpParHelper::Print("Dropped isolated vertices from input\n");
231 A.PrintInfo();
232 #endif
233 Aeff = PSpMat_s32p64(A); // Convert to 32-bit local integers
234 A.FreeMemory();
235
236 Symmetricize(Aeff); // A += A';
237 SpParHelper::Print("Symmetricized\n");
238 //A.Dump("graph_symmetric");
239
240 Aeff.OptimizeForGraph500(optbuf); // Should be called before threading is activated
241 #ifdef THREADED
243 tinfo << "Threading activated with " << cblas_splits << " threads" << endl;
245 Aeff.ActivateThreading(cblas_splits);
246 #endif
247 }
248 else
249 {
250 if(string(argv[1]) == string("Force"))
251 {
252 scale = static_cast<unsigned>(atoi(argv[2]));
254 outs << "Forcing scale to : " << scale << endl;
256
257 if(argc > 3 && string(argv[3]) == string("FastGen"))
258 {
259 SpParHelper::Print("Using fast vertex permutations; skipping edge permutations (like v2.1)\n");
260 scramble = true;
261 }
262 }
263 else
264 {
265 SpParHelper::Print("Unknown option\n");
266 MPI_Finalize();
267 return -1;
268 }
269 // this is an undirected graph, so A*x does indeed BFS
270 double initiator[4] = {.57, .19, .19, .05};
271
272 double t01 = MPI_Wtime();
273 double t02;
275 if(!scramble)
276 {
278 SpParHelper::Print("Generated edge lists\n");
279 t02 = MPI_Wtime();
281 tinfo << "Generation took " << t02-t01 << " seconds" << endl;
283
284 PermEdges(*DEL);
285 SpParHelper::Print("Permuted Edges\n");
286 //DEL->Dump64bit("edges_permuted");
287 //SpParHelper::Print("Dumped\n");
288
289 RenameVertices(*DEL); // intermediate: generates RandPerm vector, using MemoryEfficientPSort
290 SpParHelper::Print("Renamed Vertices\n");
291 }
292 else // fast generation
293 {
294 DEL->GenGraph500Data(initiator, scale, EDGEFACTOR, true, true ); // generate packed edges
295 SpParHelper::Print("Generated renamed edge lists\n");
296 t02 = MPI_Wtime();
298 tinfo << "Generation took " << t02-t01 << " seconds" << endl;
300 }
301
302 // Start Kernel #1
304 double t1 = MPI_Wtime();
305
306 // conversion from distributed edge list, keeps self-loops, sums duplicates
307 PSpMat_s32p64_Int * G = new PSpMat_s32p64_Int(*DEL, false);
308 delete DEL; // free memory before symmetricizing
309 SpParHelper::Print("Created Sparse Matrix (with int32 local indices and values)\n");
310
312 double redts = MPI_Wtime();
313 G->Reduce(degrees, Row, plus<int64_t>(), static_cast<int64_t>(0)); // Identity is 0
315 double redtf = MPI_Wtime();
316
318 redtimeinfo << "Calculated degrees in " << redtf-redts << " seconds" << endl;
320 A = PSpMat_Bool(*G); // Convert to Boolean
321 delete G;
322 int64_t removed = A.RemoveLoops();
323
325 loopinfo << "Converted to Boolean and removed " << removed << " loops" << endl;
327 A.PrintInfo();
328
331 A.Reduce(*ColSums, Column, plus<int64_t>(), static_cast<int64_t>(0));
332 A.Reduce(*RowSums, Row, plus<int64_t>(), static_cast<int64_t>(0));
333 SpParHelper::Print("Reductions done\n");
334 ColSums->EWiseApply(*RowSums, plus<int64_t>());
335 delete RowSums;
336 SpParHelper::Print("Intersection of colsums and rowsums found\n");
337
338 nonisov = ColSums->FindInds(bind2nd(greater<int64_t>(), 0)); // only the indices of non-isolated vertices
339 delete ColSums;
340
341 SpParHelper::Print("Found (and permuted) non-isolated vertices\n");
342 nonisov.RandPerm(); // so that A(v,v) is load-balanced (both memory and time wise)
343 A.PrintInfo();
344 #ifndef NOPERMUTE
345 A(nonisov, nonisov, true); // in-place permute to save memory
346 SpParHelper::Print("Dropped isolated vertices from input\n");
347 A.PrintInfo();
348 #endif
349
350 Aeff = PSpMat_s32p64(A); // Convert to 32-bit local integers
351 A.FreeMemory();
352 Symmetricize(Aeff); // A += A';
353 SpParHelper::Print("Symmetricized\n");
354
355 Aeff.OptimizeForGraph500(optbuf); // Should be called before threading is activated
356 #ifdef THREADED
358 tinfo << "Threading activated with " << cblas_splits << " threads" << endl;
360 Aeff.ActivateThreading(cblas_splits);
361 #endif
362 Aeff.PrintInfo();
363
365 double t2=MPI_Wtime();
366
368 k1timeinfo << (t2-t1) - (redtf-redts) << " seconds elapsed for Kernel #1" << endl;
370 }
371 Aeff.PrintInfo();
372 float balance = Aeff.LoadImbalance();
374 outs << "Load balance: " << balance << endl;
376
378 double t1 = MPI_Wtime();
379
380 // Now that every remaining vertex is non-isolated, randomly pick ITERS many of them as starting vertices
381 #ifndef NOPERMUTE
382 degrees = degrees(nonisov); // fix the degrees array too
383 degrees.PrintInfo("Degrees array");
384 #endif
385 // degrees.DebugPrint();
387 double nver = (double) degrees.TotalLength();
388
389#ifdef DETERMINISTIC
390 MTRand M(1);
391#else
392 MTRand M; // generate random numbers with Mersenne Twister
393#endif
396 if(myrank == 0)
397 {
398 for(int i=0; i<ITERS; ++i)
399 loccands[i] = M.rand();
400 copy(loccands.begin(), loccands.end(), ostream_iterator<double>(cout," ")); cout << endl;
401 transform(loccands.begin(), loccands.end(), loccands.begin(), bind2nd( multiplies<double>(), nver ));
402
403 for(int i=0; i<ITERS; ++i)
404 loccandints[i] = static_cast<int64_t>(loccands[i]);
405 copy(loccandints.begin(), loccandints.end(), ostream_iterator<double>(cout," ")); cout << endl;
406 }
408 for(int i=0; i<ITERS; ++i)
409 {
410 Cands.SetElement(i,loccandints[i]);
411 }
412
413 #define MAXTRIALS 1
414 for(int trials =0; trials < MAXTRIALS; trials++) // try different algorithms for BFS
415 {
421 MPI_Pcontrol(1,"BFS");
422
423 double MTEPS[ITERS]; double INVMTEPS[ITERS]; double TIMES[ITERS]; double EDGES[ITERS];
424 for(int i=0; i<ITERS; ++i)
425 {
426 // FullyDistVec ( shared_ptr<CommGrid> grid, IT globallen, NT initval);
427 FullyDistVec<int64_t, int64_t> parents ( Aeff.getcommgrid(), Aeff.getncol(), (int64_t) -1); // identity is -1
428
429 // FullyDistSpVec ( shared_ptr<CommGrid> grid, IT glen);
430 FullyDistSpVec<int64_t, int64_t> fringe(Aeff.getcommgrid(), Aeff.getncol()); // numerical values are stored 0-based
431
433 double t1 = MPI_Wtime();
434
435 fringe.SetElement(Cands[i], Cands[i]);
436 int iterations = 0;
437 while(fringe.getnnz() > 0)
438 {
439 fringe.setNumToInd();
440 fringe = SpMV(Aeff, fringe,optbuf); // SpMV with sparse vector (with indexisvalue flag preset), optimization enabled
441 fringe = EWiseMult(fringe, parents, true, (int64_t) -1); // clean-up vertices that already has parents
442 parents.Set(fringe);
443 iterations++;
444 }
446 double t2 = MPI_Wtime();
447
449 parentsp.Apply(myset<int64_t>(1));
450
451 // we use degrees on the directed graph, so that we don't count the reverse edges in the teps score
452 int64_t nedges = EWiseMult(parentsp, degrees, false, (int64_t) 0).Reduce(plus<int64_t>(), (int64_t) 0);
453
455 outnew << i << "th starting vertex was " << Cands[i] << endl;
456 outnew << "Number iterations: " << iterations << endl;
457 outnew << "Number of vertices found: " << parentsp.Reduce(plus<int64_t>(), (int64_t) 0) << endl;
458 outnew << "Number of edges traversed: " << nedges << endl;
459 outnew << "BFS time: " << t2-t1 << " seconds" << endl;
460 outnew << "MTEPS: " << static_cast<double>(nedges) / (t2-t1) / 1000000.0 << endl;
461 outnew << "Total communication (average so far): " << (cblas_allgathertime + cblas_alltoalltime) / (i+1) << endl;
462 TIMES[i] = t2-t1;
463 EDGES[i] = nedges;
464 MTEPS[i] = static_cast<double>(nedges) / (t2-t1) / 1000000.0;
466 }
467 SpParHelper::Print("Finished\n");
469 MPI_Pcontrol(-1,"BFS");
470
471
472 os << "Per iteration communication times: " << endl;
473 os << "AllGatherv: " << cblas_allgathertime / ITERS << endl;
474 os << "AlltoAllv: " << cblas_alltoalltime / ITERS << endl;
475 os << "Transvec: " << cblas_transvectime / ITERS << endl;
476
477 os << "Per iteration computation times: " << endl;
478 os << "MergeCont: " << cblas_mergeconttime / ITERS << endl;
479 os << "LocalSpmv: " << cblas_localspmvtime / ITERS << endl;
480
481 sort(EDGES, EDGES+ITERS);
482 os << "--------------------------" << endl;
483 os << "Min nedges: " << EDGES[0] << endl;
484 os << "First Quartile nedges: " << (EDGES[(ITERS/4)-1] + EDGES[ITERS/4])/2 << endl;
485 os << "Median nedges: " << (EDGES[(ITERS/2)-1] + EDGES[ITERS/2])/2 << endl;
486 os << "Third Quartile nedges: " << (EDGES[(3*ITERS/4) -1 ] + EDGES[3*ITERS/4])/2 << endl;
487 os << "Max nedges: " << EDGES[ITERS-1] << endl;
488 double mean = accumulate( EDGES, EDGES+ITERS, 0.0 )/ ITERS;
489 vector<double> zero_mean(ITERS); // find distances to the mean
491 // self inner-product is sum of sum of squares
492 double deviation = inner_product( zero_mean.begin(),zero_mean.end(), zero_mean.begin(), 0.0 );
493 deviation = sqrt( deviation / (ITERS-1) );
494 os << "Mean nedges: " << mean << endl;
495 os << "STDDEV nedges: " << deviation << endl;
496 os << "--------------------------" << endl;
497
498 sort(TIMES,TIMES+ITERS);
499 os << "Min time: " << TIMES[0] << " seconds" << endl;
500 os << "First Quartile time: " << (TIMES[(ITERS/4)-1] + TIMES[ITERS/4])/2 << " seconds" << endl;
501 os << "Median time: " << (TIMES[(ITERS/2)-1] + TIMES[ITERS/2])/2 << " seconds" << endl;
502 os << "Third Quartile time: " << (TIMES[(3*ITERS/4)-1] + TIMES[3*ITERS/4])/2 << " seconds" << endl;
503 os << "Max time: " << TIMES[ITERS-1] << " seconds" << endl;
504 mean = accumulate( TIMES, TIMES+ITERS, 0.0 )/ ITERS;
506 deviation = inner_product( zero_mean.begin(),zero_mean.end(), zero_mean.begin(), 0.0 );
507 deviation = sqrt( deviation / (ITERS-1) );
508 os << "Mean time: " << mean << " seconds" << endl;
509 os << "STDDEV time: " << deviation << " seconds" << endl;
510 os << "--------------------------" << endl;
511
512 sort(MTEPS, MTEPS+ITERS);
513 os << "Min MTEPS: " << MTEPS[0] << endl;
514 os << "First Quartile MTEPS: " << (MTEPS[(ITERS/4)-1] + MTEPS[ITERS/4])/2 << endl;
515 os << "Median MTEPS: " << (MTEPS[(ITERS/2)-1] + MTEPS[ITERS/2])/2 << endl;
516 os << "Third Quartile MTEPS: " << (MTEPS[(3*ITERS/4)-1] + MTEPS[3*ITERS/4])/2 << endl;
517 os << "Max MTEPS: " << MTEPS[ITERS-1] << endl;
518 transform(MTEPS, MTEPS+ITERS, INVMTEPS, safemultinv<double>()); // returns inf for zero teps
519 double hteps = static_cast<double>(ITERS) / accumulate(INVMTEPS, INVMTEPS+ITERS, 0.0);
520 os << "Harmonic mean of MTEPS: " << hteps << endl;
522 deviation = inner_product( zero_mean.begin(),zero_mean.end(), zero_mean.begin(), 0.0 );
523 deviation = sqrt( deviation / (ITERS-1) ) * (hteps*hteps); // harmonic_std_dev
524 os << "Harmonic standard deviation of MTEPS: " << deviation << endl;
525 SpParHelper::Print(os.str());
526 }
527 }
528 MPI_Finalize();
529 return 0;
530}
531
int main()
Definition Driver.cpp:12
SelectMaxSRing< bool, int64_t > SR
Definition SpMMError.cpp:18
SpParMat< int64_t, int, SpDCCols< int32_t, int > > PSpMat_s32p64_Int
#define ITERS
double cblas_transvectime
double cblas_localspmvtime
int cblas_splits
double cblas_allgathertime
double cblas_alltoalltime
unsigned int highestbitset(uint64_t v)
#define EDGEFACTOR
double cblas_mergeconttime
bool from_string(T &t, const string &s, std::ios_base &(*f)(std::ios_base &))
#define MAXTRIALS
SpParMat< int64_t, bool, SpDCCols< int64_t, bool > > PSpMat_Bool
Definition auction.cpp:112
SpParMat< int64_t, bool, SpDCCols< int32_t, bool > > PSpMat_s32p64
Definition auction.cpp:113
SpParMat< int64_t, int64_t, SpDCCols< int64_t, int64_t > > PSpMat_Int64
Definition auction.cpp:114
double rand()
void GenGraph500Data(double initiator[4], int log_numverts, int edgefactor, bool scramble=false, bool packed=false)
static void Print(const std::string &s)
int nprocs
Definition comms.cpp:55
@ Column
Definition SpDefs.h:115
void PermEdges(DistEdgeList< IT > &DEL)
void RenameVertices(DistEdgeList< IU > &DEL)
void Symmetricize(PARMAT &A)
Definition ReadMatDist.h:29
Dcsc< IU, typename promote_trait< NU1, NU2 >::T_promote > EWiseMult(const Dcsc< IU, NU1 > &A, const Dcsc< IU, NU2 > *B, bool exclude)
Definition Friends.h:834
double A
unsigned __int64 uint64_t
Definition stdint.h:90
int64_t operator()(int64_t x, const int64_t &y) const