COMBINATORIAL_BLAS 1.6
 
Loading...
Searching...
No Matches
SingleChildBFS.cpp
Go to the documentation of this file.
1#define DETERMINISTIC
2#include "CombBLAS/CombBLAS.h"
3#include <mpi.h>
4#include <sys/time.h>
5#include <iostream>
6#include <functional>
7#include <algorithm>
8#include <vector>
9#include <string>
10#include <sstream>
11#ifdef THREADED
12 #ifndef _OPENMP
13 #define _OPENMP
14 #endif
15#endif
16
17#ifdef _OPENMP
18 #include <omp.h>
19#endif
20
21
27#ifdef _OPENMP
29#else
31#endif
32
33#define ITERS 16
34#define EDGEFACTOR 16
35using namespace std;
36using namespace combblas;
37
38// 64-bit floor(log2(x)) function
39// note: least significant bit is the "zeroth" bit
40// pre: v > 0
41unsigned int highestbitset(uint64_t v)
42{
43 // b in binary is {10,1100, 11110000, 1111111100000000 ...}
44 const uint64_t b[] = {0x2ULL, 0xCULL, 0xF0ULL, 0xFF00ULL, 0xFFFF0000ULL, 0xFFFFFFFF00000000ULL};
45 const unsigned int S[] = {1, 2, 4, 8, 16, 32};
46 int i;
47
48 unsigned int r = 0; // result of log2(v) will go here
49 for (i = 5; i >= 0; i--)
50 {
51 if (v & b[i]) // highestbitset is on the left half (i.e. v > S[i] for sure)
52 {
53 v >>= S[i];
54 r |= S[i];
55 }
56 }
57 return r;
58}
59
60template <class T>
61bool from_string(T & t, const string& s, std::ios_base& (*f)(std::ios_base&))
62{
64 return !(iss >> f >> t).fail();
65}
66
67
68template <typename PARMAT>
70{
71 // boolean addition is practically a "logical or"
72 // therefore this doesn't destruct any links
73 PARMAT AT = A;
74 AT.Transpose();
75 A += AT;
76}
77
82struct prunediscovered: public std::binary_function<int64_t, int64_t, int64_t >
83{
85 {
86 return ( y == -1 ) ? x: -1;
87 }
88};
89
90static void MPI_randuniq(void * invec, void * inoutvec, int * len, MPI_Datatype *datatype)
91{
95 for (int i=0; i<*len; i++ )
97}
98
99int main(int argc, char* argv[])
100{
101 int nprocs, myrank;
102 MPI_Init(&argc, &argv);
105 if(argc < 2)
106 {
107 if(myrank == 0)
108 {
109 cout << "Usage: ./scbfs <Scale>" << endl;
110 cout << "Example: mpirun -np 4 ./scbfs 20" << endl;
111 }
112 MPI_Finalize();
113 return -1;
114 }
115 {
118 typedef SpParMat < int64_t, bool, SpDCCols<int32_t,bool> > PSpMat_s32p64; // sequentially use 32-bits for local matrices, but parallel semantics are 64-bits
119 typedef SpParMat < int64_t, int, SpDCCols<int32_t,int> > PSpMat_s32p64_Int; // similarly mixed, but holds integers as upposed to booleans
121
122 // Declare objects
123 PSpMat_Bool A;
125 FullyDistVec<int64_t, int64_t> degrees; // degrees of vertices (including multi-edges and self-loops)
126 FullyDistVec<int64_t, int64_t> nonisov; // id's of non-isolated (connected) vertices
127 unsigned scale;
128 OptBuf<int32_t, int64_t> optbuf; // let indices be 32-bits
129
130 scale = static_cast<unsigned>(atoi(argv[1]));
132 outs << "Forcing scale to : " << scale << endl;
133
134 // this is an undirected graph, so A*x does indeed BFS
135 double initiator[4] = {.57, .19, .19, .05};
136
137 double t01 = MPI_Wtime();
138 double t02;
140 DEL->GenGraph500Data(initiator, scale, EDGEFACTOR, true, true ); // generate packed edges
141 SpParHelper::Print("Generated renamed edge lists\n");
142 t02 = MPI_Wtime();
144 tinfo << "Generation took " << t02-t01 << " seconds" << endl;
146
147
148 // Start Kernel #1
150 double t1 = MPI_Wtime();
151
152 // conversion from distributed edge list, keeps self-loops, sums duplicates
153 PSpMat_s32p64_Int * G = new PSpMat_s32p64_Int(*DEL, false);
154 delete DEL; // free memory before symmetricizing
155 SpParHelper::Print("Created Sparse Matrix (with int32 local indices and values)\n");
156
158 double redts = MPI_Wtime();
159 G->Reduce(degrees, Row, plus<int64_t>(), static_cast<int64_t>(0)); // Identity is 0
161 double redtf = MPI_Wtime();
162
164 redtimeinfo << "Calculated degrees in " << redtf-redts << " seconds" << endl;
166 A = PSpMat_Bool(*G); // Convert to Boolean
167 delete G;
168 int64_t removed = A.RemoveLoops();
169
171 loopinfo << "Converted to Boolean and removed " << removed << " loops" << endl;
173 A.PrintInfo();
174
177 A.Reduce(*ColSums, Column, plus<int64_t>(), static_cast<int64_t>(0));
178 A.Reduce(*RowSums, Row, plus<int64_t>(), static_cast<int64_t>(0));
179 SpParHelper::Print("Reductions done\n");
180 ColSums->EWiseApply(*RowSums, plus<int64_t>());
181 delete RowSums;
182 SpParHelper::Print("Intersection of colsums and rowsums found\n");
183
184 nonisov = ColSums->FindInds(bind2nd(greater<int64_t>(), 0)); // only the indices of non-isolated vertices
185 delete ColSums;
186
187 SpParHelper::Print("Found (and permuted) non-isolated vertices\n");
188 nonisov.RandPerm(); // so that A(v,v) is load-balanced (both memory and time wise)
189 A.PrintInfo();
190 #ifndef NOPERMUTE
191 A(nonisov, nonisov, true); // in-place permute to save memory
192 SpParHelper::Print("Dropped isolated vertices from input\n");
193 A.PrintInfo();
194 #endif
195
196 Aeff = PSpMat_s32p64(A); // Convert to 32-bit local integers
197 A.FreeMemory();
198 Symmetricize(Aeff); // A += A';
199 SpParHelper::Print("Symmetricized\n");
200
201 Aeff.OptimizeForGraph500(optbuf); // Should be called before threading is activated
202 #ifdef THREADED
204 tinfo << "Threading activated with " << cblas_splits << " threads" << endl;
206 Aeff.ActivateThreading(cblas_splits);
207 #endif
208 Aeff.PrintInfo();
209
211 double t2=MPI_Wtime();
212
214 k1timeinfo << (t2-t1) - (redtf-redts) << " seconds elapsed for Kernel #1" << endl;
216
217 Aeff.PrintInfo();
218 float balance = Aeff.LoadImbalance();
220 outs2 << "Load balance: " << balance << endl;
222
224
225 // Now that every remaining vertex is non-isolated, randomly pick ITERS many of them as starting vertices
226 #ifndef NOPERMUTE
227 degrees = degrees(nonisov); // fix the degrees array too
228 degrees.PrintInfo("Degrees array");
229 #endif
230 // degrees.DebugPrint();
232 double nver = (double) degrees.TotalLength();
233
234#ifdef DETERMINISTIC
235 MTRand M(1);
236#else
237 MTRand M; // generate random numbers with Mersenne Twister
238#endif
241 if(myrank == 0)
242 {
243 for(int i=0; i<ITERS; ++i)
244 loccands[i] = M.rand();
245 copy(loccands.begin(), loccands.end(), ostream_iterator<double>(cout," ")); cout << endl;
246 transform(loccands.begin(), loccands.end(), loccands.begin(), bind2nd( multiplies<double>(), nver ));
247
248 for(int i=0; i<ITERS; ++i)
249 loccandints[i] = static_cast<int64_t>(loccands[i]);
250 copy(loccandints.begin(), loccandints.end(), ostream_iterator<double>(cout," ")); cout << endl;
251 }
253 for(int i=0; i<ITERS; ++i)
254 Cands.SetElement(i,loccandints[i]);
255
256 double MTEPS[ITERS]; double INVMTEPS[ITERS]; double TIMES[ITERS]; double EDGES[ITERS];
257 for(int i=0; i<ITERS; ++i)
258 {
259 // FullyDistVec ( shared_ptr<CommGrid> grid, IT globallen, NT initval);
260 FullyDistVec<int64_t, int64_t> parents ( Aeff.getcommgrid(), Aeff.getncol(), (int64_t) -1); // identity is -1
261
262 // FullyDistSpVec ( shared_ptr<CommGrid> grid, IT glen);
263 FullyDistSpVec<int64_t, int64_t> fringe(Aeff.getcommgrid(), Aeff.getncol()); // numerical values are stored 0-based
264
266 double t1 = MPI_Wtime();
267
268 fringe.SetElement(Cands[i], Cands[i]);
269 int iterations = 0;
270
272 MPI_Op_create(MPI_randuniq, true, &randreducempiop);
273
274 while(fringe.getnnz() > 0)
275 {
276 fringe.setNumToInd();
277 fringe = SpMV(Aeff, fringe,optbuf); // SpMV with sparse vector (with indexisvalue flag preset), optimization enabled
278 fringe = EWiseMult(fringe, parents, true, (int64_t) -1); // clean-up vertices that already has parents
279 fringe.PrintInfo("Frontier");
281 singlechild.PrintInfo("Single child frontier");
282
283 parents.Set(fringe);
284 iterations++;
285 }
287 double t2 = MPI_Wtime();
288
290 parentsp.Apply(myset<int64_t>(1));
291
292 // we use degrees on the directed graph, so that we don't count the reverse edges in the teps score
293 int64_t nedges = EWiseMult(parentsp, degrees, false, (int64_t) 0).Reduce(plus<int64_t>(), (int64_t) 0);
294
296 outnew << i << "th starting vertex was " << Cands[i] << endl;
297 outnew << "Number iterations: " << iterations << endl;
298 outnew << "Number of vertices found: " << parentsp.Reduce(plus<int64_t>(), (int64_t) 0) << endl;
299 outnew << "Number of edges traversed: " << nedges << endl;
300 outnew << "BFS time: " << t2-t1 << " seconds" << endl;
301 outnew << "MTEPS: " << static_cast<double>(nedges) / (t2-t1) / 1000000.0 << endl;
302 outnew << "Total communication (average so far): " << (cblas_allgathertime + cblas_alltoalltime) / (i+1) << endl;
303 TIMES[i] = t2-t1;
304 EDGES[i] = nedges;
305 MTEPS[i] = static_cast<double>(nedges) / (t2-t1) / 1000000.0;
307 }
308 SpParHelper::Print("Finished\n");
310
311
312 sort(EDGES, EDGES+ITERS);
313 os << "--------------------------" << endl;
314 os << "Min nedges: " << EDGES[0] << endl;
315 os << "First Quartile nedges: " << (EDGES[(ITERS/4)-1] + EDGES[ITERS/4])/2 << endl;
316 os << "Median nedges: " << (EDGES[(ITERS/2)-1] + EDGES[ITERS/2])/2 << endl;
317 os << "Third Quartile nedges: " << (EDGES[(3*ITERS/4) -1 ] + EDGES[3*ITERS/4])/2 << endl;
318 os << "Max nedges: " << EDGES[ITERS-1] << endl;
319 double mean = accumulate( EDGES, EDGES+ITERS, 0.0 )/ ITERS;
320 vector<double> zero_mean(ITERS); // find distances to the mean
322 // self inner-product is sum of sum of squares
323 double deviation = inner_product( zero_mean.begin(),zero_mean.end(), zero_mean.begin(), 0.0 );
324 deviation = sqrt( deviation / (ITERS-1) );
325 os << "Mean nedges: " << mean << endl;
326 os << "STDDEV nedges: " << deviation << endl;
327 os << "--------------------------" << endl;
328
329 sort(TIMES,TIMES+ITERS);
330 os << "Min time: " << TIMES[0] << " seconds" << endl;
331 os << "First Quartile time: " << (TIMES[(ITERS/4)-1] + TIMES[ITERS/4])/2 << " seconds" << endl;
332 os << "Median time: " << (TIMES[(ITERS/2)-1] + TIMES[ITERS/2])/2 << " seconds" << endl;
333 os << "Third Quartile time: " << (TIMES[(3*ITERS/4)-1] + TIMES[3*ITERS/4])/2 << " seconds" << endl;
334 os << "Max time: " << TIMES[ITERS-1] << " seconds" << endl;
335 mean = accumulate( TIMES, TIMES+ITERS, 0.0 )/ ITERS;
337 deviation = inner_product( zero_mean.begin(),zero_mean.end(), zero_mean.begin(), 0.0 );
338 deviation = sqrt( deviation / (ITERS-1) );
339 os << "Mean time: " << mean << " seconds" << endl;
340 os << "STDDEV time: " << deviation << " seconds" << endl;
341 os << "--------------------------" << endl;
342
343 sort(MTEPS, MTEPS+ITERS);
344 os << "Min MTEPS: " << MTEPS[0] << endl;
345 os << "First Quartile MTEPS: " << (MTEPS[(ITERS/4)-1] + MTEPS[ITERS/4])/2 << endl;
346 os << "Median MTEPS: " << (MTEPS[(ITERS/2)-1] + MTEPS[ITERS/2])/2 << endl;
347 os << "Third Quartile MTEPS: " << (MTEPS[(3*ITERS/4)-1] + MTEPS[3*ITERS/4])/2 << endl;
348 os << "Max MTEPS: " << MTEPS[ITERS-1] << endl;
349 transform(MTEPS, MTEPS+ITERS, INVMTEPS, safemultinv<double>()); // returns inf for zero teps
350 double hteps = static_cast<double>(ITERS) / accumulate(INVMTEPS, INVMTEPS+ITERS, 0.0);
351 os << "Harmonic mean of MTEPS: " << hteps << endl;
353 deviation = inner_product( zero_mean.begin(),zero_mean.end(), zero_mean.begin(), 0.0 );
354 deviation = sqrt( deviation / (ITERS-1) ) * (hteps*hteps); // harmonic_std_dev
355 os << "Harmonic standard deviation of MTEPS: " << deviation << endl;
356 SpParHelper::Print(os.str());
357 }
358 MPI_Finalize();
359 return 0;
360}
361
int main()
Definition Driver.cpp:12
#define ITERS
double cblas_transvectime
double cblas_localspmvtime
int cblas_splits
double cblas_allgathertime
double cblas_alltoalltime
unsigned int highestbitset(uint64_t v)
#define EDGEFACTOR
double cblas_mergeconttime
bool from_string(T &t, const string &s, std::ios_base &(*f)(std::ios_base &))
SelectMaxSRing< bool, int64_t > SR
Definition SpMMError.cpp:18
SpParMat< int64_t, int, SpDCCols< int32_t, int > > PSpMat_s32p64_Int
SpParMat< int64_t, bool, SpDCCols< int64_t, bool > > PSpMat_Bool
Definition auction.cpp:112
SpParMat< int64_t, bool, SpDCCols< int32_t, bool > > PSpMat_s32p64
Definition auction.cpp:113
SpParMat< int64_t, int64_t, SpDCCols< int64_t, int64_t > > PSpMat_Int64
Definition auction.cpp:114
double rand()
void GenGraph500Data(double initiator[4], int log_numverts, int edgefactor, bool scramble=false, bool packed=false)
static void Print(const std::string &s)
int nprocs
Definition comms.cpp:55
@ Column
Definition SpDefs.h:115
void Symmetricize(PARMAT &A)
Definition ReadMatDist.h:29
Dcsc< IU, typename promote_trait< NU1, NU2 >::T_promote > EWiseMult(const Dcsc< IU, NU1 > &A, const Dcsc< IU, NU2 > *B, bool exclude)
Definition Friends.h:834
double A
unsigned __int64 uint64_t
Definition stdint.h:90
int64_t operator()(int64_t x, const int64_t &y) const