COMBINATORIAL_BLAS 1.6
 
Loading...
Searching...
No Matches
DirOptBFS.cpp
Go to the documentation of this file.
1/****************************************************************/
2/* Parallel Combinatorial BLAS Library (for Graph Computations) */
3/* version 1.6 -------------------------------------------------*/
4/* date: 6/15/2017 ---------------------------------------------*/
5/* authors: Ariful Azad, Aydin Buluc --------------------------*/
6/****************************************************************/
7/*
8 Copyright (c) 2010-2017, The Regents of the University of California
9
10 Permission is hereby granted, free of charge, to any person obtaining a copy
11 of this software and associated documentation files (the "Software"), to deal
12 in the Software without restriction, including without limitation the rights
13 to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
14 copies of the Software, and to permit persons to whom the Software is
15 furnished to do so, subject to the following conditions:
16
17 The above copyright notice and this permission notice shall be included in
18 all copies or substantial portions of the Software.
19
20 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
21 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
22 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
23 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
24 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
25 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
26 THE SOFTWARE.
27 */
28
29
30#define DETERMINISTIC
31#define BOTTOMUPTIME
32#include <mpi.h>
33#include <sys/time.h>
34#include <iostream>
35#include <iomanip>
36#include <functional>
37#include <algorithm>
38#include <vector>
39#include <string>
40#include <sstream>
41#ifdef THREADED
42 #ifndef _OPENMP
43 #define _OPENMP
44 #endif
45 #include <omp.h>
46#endif
47
48// These macros should be defined before stdint.h is included
49#ifndef __STDC_CONSTANT_MACROS
50#define __STDC_CONSTANT_MACROS
51#endif
52#ifndef __STDC_LIMIT_MACROS
53#define __STDC_LIMIT_MACROS
54#endif
55#include <stdint.h>
56
63
68
69double bu_local;
70double bu_update;
71double bu_rotate;
73
74
75#include "CombBLAS/CombBLAS.h"
76
77using namespace combblas;
78using namespace std;
79
80#define ITERS 64
81#define EDGEFACTOR 16
82
83// 64-bit floor(log2(x)) function
84// note: least significant bit is the "zeroth" bit
85// pre: v > 0
86unsigned int highestbitset(uint64_t v)
87{
88 // b in binary is {10,1100, 11110000, 1111111100000000 ...}
89 const uint64_t b[] = {0x2ULL, 0xCULL, 0xF0ULL, 0xFF00ULL, 0xFFFF0000ULL, 0xFFFFFFFF00000000ULL};
90 const unsigned int S[] = {1, 2, 4, 8, 16, 32};
91 int i;
92
93 unsigned int r = 0; // result of log2(v) will go here
94 for (i = 5; i >= 0; i--)
95 {
96 if (v & b[i]) // highestbitset is on the left half (i.e. v > S[i] for sure)
97 {
98 v >>= S[i];
99 r |= S[i];
100 }
101 }
102 return r;
103}
104
105template <class T>
106bool from_string(T & t, const string& s, ios_base& (*f)(ios_base&))
107{
109 return !(iss >> f >> t).fail();
110}
111
112
113template <typename PARMAT>
115{
116 // boolean addition is practically a "logical or"
117 // therefore this doesn't destruct any links
118 PARMAT AT = A;
119 AT.Transpose();
120 A += AT;
121}
122
127struct prunediscovered: public binary_function<int64_t, int64_t, int64_t >
128{
130 {
131 return ( y == -1 ) ? x: -1;
132 }
133};
134
135int main(int argc, char* argv[])
136{
137#ifdef THREADED
139#else
140 cblas_splits = 1;
141#endif
142
143
144 int nprocs, myrank;
145#ifdef _OPENMP
146 int provided, flag, claimed;
149 if (!flag)
150 SpParHelper::Print("This thread called init_thread but Is_thread_main gave false\n");
152 if (claimed != provided)
153 SpParHelper::Print("Query thread gave different thread level than requested\n");
154#else
155 MPI_Init(&argc, &argv);
156#endif
157
160 if(argc < 2)
161 {
162 if(myrank == 0)
163 {
164 cout << "Usage: ./dobfs <Scale>" << endl;
165 cout << "Example: ./dobfs 25" << endl;
166 }
167 MPI_Finalize();
168 return -1;
169 }
170 {
172 typedef SpParMat < int64_t, bool, SpDCCols<int32_t,bool> > PSpMat_s32p64; // sequentially use 32-bits for local matrices, but parallel semantics are 64-bits
173 typedef SpParMat < int64_t, int, SpDCCols<int32_t,int> > PSpMat_s32p64_Int; // similarly mixed, but holds integers as upposed to booleans
174
175 // Declare objects
176 PSpMat_Bool A;
180 fullWorld.reset( new CommGrid(MPI_COMM_WORLD, 0, 0) );
181 FullyDistVec<int64_t, int64_t> degrees(fullWorld); // degrees of vertices (including multi-edges and self-loops)
182 FullyDistVec<int64_t, int64_t> nonisov(fullWorld); // id's of non-isolated (connected) vertices
183 unsigned scale;
184 OptBuf<int32_t, int64_t> optbuf; // let indices be 32-bits
185
186 scale = static_cast<unsigned>(atoi(argv[1]));
188 outs << "Forcing scale to : " << scale << endl;
190
191 SpParHelper::Print("Using fast vertex permutations; skipping edge permutations (like v2.1)\n");
192
193 // this is an undirected graph, so A*x does indeed BFS
194 double initiator[4] = {.57, .19, .19, .05};
195
196 double t01 = MPI_Wtime();
197 double t02;
199 DEL->GenGraph500Data(initiator, scale, EDGEFACTOR, true, true ); // generate packed edges
200 SpParHelper::Print("Generated renamed edge lists\n");
201 t02 = MPI_Wtime();
203 tinfo << "Generation took " << t02-t01 << " seconds" << endl;
205
206
207 // Start Kernel #1
209 double t1 = MPI_Wtime();
210
211 // conversion from distributed edge list, keeps self-loops, sums duplicates
212 PSpMat_s32p64_Int * G = new PSpMat_s32p64_Int(*DEL, false);
213 delete DEL; // free memory before symmetricizing
214 SpParHelper::Print("Created Sparse Matrix (with int32 local indices and values)\n");
215
217 double redts = MPI_Wtime();
218 G->Reduce(degrees, Row, plus<int64_t>(), static_cast<int64_t>(0)); // Identity is 0
220 double redtf = MPI_Wtime();
221
223 redtimeinfo << "Calculated degrees in " << redtf-redts << " seconds" << endl;
225 A = PSpMat_Bool(*G); // Convert to Boolean
226 delete G;
227 int64_t removed = A.RemoveLoops();
228
230 loopinfo << "Converted to Boolean and removed " << removed << " loops" << endl;
232 A.PrintInfo();
233
236 A.Reduce(*ColSums, Column, plus<int64_t>(), static_cast<int64_t>(0));
237 A.Reduce(*RowSums, Row, plus<int64_t>(), static_cast<int64_t>(0));
238 SpParHelper::Print("Reductions done\n");
239 ColSums->EWiseApply(*RowSums, plus<int64_t>());
240 SpParHelper::Print("Intersection of colsums and rowsums found\n");
241 delete RowSums;
242
243 nonisov = ColSums->FindInds(bind2nd(greater<int64_t>(), 0)); // only the indices of non-isolated vertices
244 delete ColSums;
245
246 nonisov.RandPerm(); // so that A(v,v) is load-balanced (both memory and time wise)
247 SpParHelper::Print("Found non-isolated vertices\n");
248 A.PrintInfo();
249
250#ifndef NOPERMUTE
251 A(nonisov, nonisov, true); // in-place permute to save memory
252 SpParHelper::Print("Dropped isolated vertices from input\n");
253 A.PrintInfo();
254#endif
255
256 Aeff = PSpMat_s32p64(A); // Convert to 32-bit local integers
257 A.FreeMemory();
258 SpParHelper::Print("Converted to 32-bit integers\n");
259
260 Symmetricize(Aeff); // A += A';
261 SpParHelper::Print("Symmetricized\n");
262
263 Aeff.OptimizeForGraph500(optbuf); // Should be called before threading is activated
264 ALocalT = PSpMat_s32p64(Aeff.seq().TransposeConstPtr(), Aeff.getcommgrid()); // this should be copied before the threading is activated
265 #ifdef THREADED
266 tinfo << "Threading activated with " << cblas_splits << " threads" << endl;
268 Aeff.ActivateThreading(cblas_splits);
269 #endif
270 Aeff.PrintInfo();
271
273 double t2=MPI_Wtime();
274
276 k1timeinfo << (t2-t1) - (redtf-redts) << " seconds elapsed for Kernel #1" << endl;
278
279 Aeff.PrintInfo();
280 float balance = Aeff.LoadImbalance();
282 lbout << "Load balance: " << balance << endl;
284
286 t1 = MPI_Wtime();
287
288 // Now that every remaining vertex is non-isolated, randomly pick ITERS many of them as starting vertices
289 #ifndef NOPERMUTE
290 degrees = degrees(nonisov); // fix the degrees array too
291 degrees.PrintInfo("Degrees array");
292 #endif
293 // degrees.DebugPrint();
294 FullyDistVec<int64_t, int64_t> Cands(A.getcommgrid(), ITERS, 0);
295 double nver = (double) degrees.TotalLength();
296
297 #ifdef DETERMINISTIC
298 uint64_t seed = 1383098845;
299 #else
300 uint64_t seed= time(NULL);
301 #endif
302 MTRand M(seed); // generate random numbers with Mersenne Twister
303
306 if(myrank == 0)
307 {
308 for(int i=0; i<ITERS; ++i)
309 loccands[i] = M.rand();
310 copy(loccands.begin(), loccands.end(), ostream_iterator<double>(cout," ")); cout << endl;
311 transform(loccands.begin(), loccands.end(), loccands.begin(), bind2nd( multiplies<double>(), nver ));
312
313 for(int i=0; i<ITERS; ++i)
314 loccandints[i] = static_cast<int64_t>(loccands[i]);
315 copy(loccandints.begin(), loccandints.end(), ostream_iterator<double>(cout," ")); cout << endl;
316 }
317
319 for(int i=0; i<ITERS; ++i)
320 {
321 Cands.SetElement(i,loccandints[i]);
322 }
323
324 #define MAXTRIALS 1
325 for(int trials =0; trials < MAXTRIALS; trials++) // try different algorithms for BFS if MAXTRIALS > 1
326 {
335 bottomup_total = 0;
337
338 bu_local = 0;
339 bu_update = 0;
340 bu_rotate = 0;
341
342 MPI_Pcontrol(1,"BFS");
343
344 double MTEPS[ITERS]; double INVMTEPS[ITERS]; double TIMES[ITERS]; double EDGES[ITERS];
345
346 for(int i=0; i<ITERS; ++i)
347 {
348 SpParHelper::Print("A BFS iteration is starting\n");
349
350 // FullyDistVec ( shared_ptr<CommGrid> grid, IT globallen, NT initval);
351 FullyDistVec<int64_t, int64_t> parents ( Aeff.getcommgrid(), Aeff.getncol(), (int64_t) -1); // identity is -1
352
353 // FullyDistSpVec ( shared_ptr<CommGrid> grid, IT glen);
354 FullyDistSpVec<int64_t, int64_t> fringe(Aeff.getcommgrid(), Aeff.getncol()); // numerical values are stored 0-based
355
357 devout.setf(ios::fixed);
358
360 double t1 = MPI_Wtime();
361
362 int64_t num_edges = Aeff.getnnz();
363 int64_t num_nodes = Aeff.getncol();
364 int64_t up_cutoff = num_edges / 20;
365 int64_t down_cutoff = (((double) num_nodes) * ((double)num_nodes)) / ((double) num_edges * 12.0);
366
367 devout << "param " << num_nodes << " vertices with " << num_edges << " edges" << endl;
368 devout << up_cutoff << " up and " << down_cutoff << " down" << endl;
369
370 fringe.SetElement(Cands[i], Cands[i]);
371 parents.SetElement(Cands[i], Cands[i]);
372 int iterations = 0;
373
375 BitMapCarousel<int64_t,int64_t> done(Aeff.getcommgrid(), parents.TotalLength(), bm_fringe.GetSubWordDisp());
377 int64_t fringe_size = fringe.getnnz();
379 double pred_start = MPI_Wtime();
380 fringe.Apply(myset<int64_t>(1));
381 int64_t pred = EWiseMult(fringe, degrees, false, (int64_t) 0).Reduce(plus<int64_t>(), (int64_t) 0);
382 double pred_end = MPI_Wtime();
383 devout << " s" << setw(15) << pred << setw(15) << setprecision(5) << (pred_end - pred_start) << endl;
385
386 while(fringe_size > 0)
387 {
389 { // Bottom-up
391 double conv_start = MPI_Wtime();
392 done.LoadVec(parents);
393 bm_fringe.LoadFromSpVec(fringe);
394 double conv_end = MPI_Wtime();
395 devout << " c" << setw(30) << setprecision(5) << (conv_end - conv_start) << endl;
397
398 while (fringe_size > 0)
399 {
400 double step_start = MPI_Wtime();
402 double step_end = MPI_Wtime();
403
404 devout << setw(2) << iterations << "u" << setw(15) << fringe_size << setprecision(5) << setw(15) << (step_end-step_start) << endl;
406 iterations++;
408 fringe_size = bm_fringe.GetNumSet();
410 {
412 bm_fringe.UpdateSpVec(fringe);
414 devout << " c" << setw(30) << setprecision(5) << (conv_end - conv_start) << endl;
416 break;
417 }
418 }
419 }
420 else
421 { // Top-down
422 double step_start = MPI_Wtime();
423 fringe.setNumToInd();
425 double ewise_start = MPI_Wtime();
426 fringe = EWiseMult(fringe, parents, true, (int64_t) -1);
427 parents.Set(fringe);
428 double step_end = MPI_Wtime();
429 devout << setw(2) << iterations << "d" << setw(15) << fringe.getnnz() << setw(15) << setprecision(5) << (step_end-step_start) << endl;
431
433 fringe.Apply(myset<int64_t>(1));
434 pred = EWiseMult(fringe, degrees, false, (int64_t) 0).Reduce(plus<int64_t>(), (int64_t) 0);
436 devout << " s" << setw(15) << pred << setw(15) << setprecision(5) << (pred_end - pred_start) << endl;
438 iterations++;
440 fringe_size = fringe.getnnz();
441 }
442 }
444 double t2 = MPI_Wtime();
445 delete[] starts;
447
449 parentsp.Apply(myset<int64_t>(1));
450 // we use degrees on the directed graph, so that we don't count the reverse edges in the teps score
451 int64_t nedges = EWiseMult(parentsp, degrees, false, (int64_t) 0).Reduce(plus<int64_t>(), (int64_t) 0);
453
455 outnew << i << "th starting vertex was " << Cands[i] << endl;
456 outnew << "Number iterations: " << iterations << endl;
457 outnew << "Number of vertices found: " << nverts << endl;
458 outnew << "Number of edges traversed: " << nedges << endl;
459 outnew << "BFS time: " << t2-t1 << " seconds" << endl;
460 outnew << "MTEPS: " << static_cast<double>(nedges) / (t2-t1) / 1000000.0 << endl;
461 outnew << "Total communication (average so far): " << (cblas_allgathertime + cblas_alltoalltime) / (i+1) << endl;
462 TIMES[i] = t2-t1;
463 EDGES[i] = nedges;
464 MTEPS[i] = static_cast<double>(nedges) / (t2-t1) / 1000000.0;
466 }
467 MPI_Pcontrol(-1,"BFS");
468 SpParHelper::Print("Finished\n");
469#ifdef TIMING
471 if(myrank == 0)
472 {
473 bu_total = new double[nprocs];
474 bu_ag_all = new double[nprocs];
475 bu_sr_all = new double[nprocs];
476 bu_convert = new double[nprocs];
477 td_ag_all = new double[nprocs];
478 td_a2a_all = new double[nprocs];
479 td_tv_all = new double[nprocs];
480 td_mc_all = new double[nprocs];
481 td_spmv_all = new double[nprocs];
482 td_ewm_all = new double[nprocs];
483 }
484 bottomup_allgather /= static_cast<double>(ITERS);
485 bottomup_sendrecv /= static_cast<double>(ITERS);
486 bottomup_total /= static_cast<double>(ITERS);
487 bottomup_convert /= static_cast<double>(ITERS); // conversion not included in total time
488
489 cblas_allgathertime /= static_cast<double>(ITERS);
490 cblas_alltoalltime /= static_cast<double>(ITERS);
491 cblas_transvectime /= static_cast<double>(ITERS);
492 cblas_mergeconttime /= static_cast<double>(ITERS);
493 cblas_localspmvtime /= static_cast<double>(ITERS);
494 cblas_ewisemulttime /= static_cast<double>(ITERS);
495
506
507 double bu_local_total = 0;
508 double bu_update_total = 0;
509 double bu_rotate_total = 0;
510
514
515 if(myrank == 0)
516 {
517 cout << "BU Local: " << bu_local_total/nprocs << endl;
518 cout << "BU Update: " << bu_update_total/nprocs << endl;
519 cout << "BU Rotate: " << bu_rotate_total/nprocs << endl;
520
522 for(int i=0; i< nprocs; ++i) // find the mean performing guy
523 total_time[i] += bu_total[i] + bu_convert[i] + td_ag_all[i] + td_a2a_all[i] + td_tv_all[i] + td_mc_all[i] + td_spmv_all[i] + td_ewm_all[i];
524
526 size_t smallest = permutation[0];
527 size_t largest = permutation[nprocs-1];
528 size_t median = permutation[nprocs/2];
529
530 cout << "TOTAL (accounted) MEAN: " << accumulate( total_time.begin(), total_time.end(), 0.0 )/ static_cast<double> (nprocs) << endl;
531 cout << "TOTAL (accounted) MAX: " << total_time[0] << endl;
532 cout << "TOTAL (accounted) MIN: " << total_time[nprocs-1] << endl;
533 cout << "TOTAL (accounted) MEDIAN: " << total_time[nprocs/2] << endl;
534 cout << "-------------------------------" << endl;
535
536 cout << "Convert median: " << bu_convert[median] << endl;
537 cout << "Bottom-up allgather median: " << bu_ag_all[median] << endl;
538 cout << "Bottom-up send-recv median: " << bu_sr_all[median] << endl;
539 cout << "Bottom-up compute median: " << bu_total[median] - (bu_ag_all[median] + bu_sr_all[median]) << endl;
540 cout << "Top-down allgather median: " << td_ag_all[median] << endl;
541 cout << "Top-down all2all median: " << td_a2a_all[median] << endl;
542 cout << "Top-down transposevector median: " << td_tv_all[median] << endl;
543 cout << "Top-down mergecontributions median: " << td_mc_all[median] << endl;
544 cout << "Top-down spmsv median: " << td_spmv_all[median] << endl;
545 cout << "-------------------------------" << endl;
546
547 cout << "Convert MEAN: " << accumulate( bu_convert, bu_convert+nprocs, 0.0 )/ static_cast<double> (nprocs) << endl;
548 cout << "Bottom-up total MEAN: " << accumulate( bu_total, bu_total+nprocs, 0.0 )/ static_cast<double> (nprocs) << endl;
549 cout << "Bottom-up allgather MEAN: " << accumulate( bu_ag_all, bu_ag_all+nprocs, 0.0 )/ static_cast<double> (nprocs) << endl;
550 cout << "Bottom-up send-recv MEAN: " << accumulate( bu_sr_all, bu_sr_all+nprocs, 0.0 )/ static_cast<double> (nprocs) << endl;
551 cout << "Top-down allgather MEAN: " << accumulate( td_ag_all, td_ag_all+nprocs, 0.0 )/ static_cast<double> (nprocs) << endl;
552 cout << "Top-down all2all MEAN: " << accumulate( td_a2a_all, td_a2a_all+nprocs, 0.0 )/ static_cast<double> (nprocs) << endl;
553 cout << "Top-down transposevector MEAN: " << accumulate( td_tv_all, td_tv_all+nprocs, 0.0 )/ static_cast<double> (nprocs) << endl;
554 cout << "Top-down mergecontributions MEAN: " << accumulate( td_mc_all, td_mc_all+nprocs, 0.0 )/ static_cast<double> (nprocs) << endl;
555 cout << "Top-down spmsv MEAN: " << accumulate( td_spmv_all, td_spmv_all+nprocs, 0.0 )/ static_cast<double> (nprocs) << endl;
556 cout << "-------------------------------" << endl;
557
558
559 cout << "Bottom-up allgather fastest: " << bu_ag_all[smallest] << endl;
560 cout << "Bottom-up send-recv fastest: " << bu_sr_all[smallest] << endl;
561 cout << "Bottom-up compute fastest: " << bu_total[smallest] - (bu_ag_all[smallest] + bu_sr_all[smallest]) << endl;
562 cout << "Top-down allgather fastest: " << td_ag_all[smallest] << endl;
563 cout << "Top-down all2all fastest: " << td_a2a_all[smallest] << endl;
564 cout << "Top-down transposevector fastest: " << td_tv_all[smallest] << endl;
565 cout << "Top-down mergecontributions fastest: " << td_mc_all[smallest] << endl;
566 cout << "Top-down spmsv fastest: " << td_spmv_all[smallest] << endl;
567 cout << "-------------------------------" << endl;
568
569
570 cout << "Bottom-up allgather slowest: " << bu_ag_all[largest] << endl;
571 cout << "Bottom-up send-recv slowest: " << bu_sr_all[largest] << endl;
572 cout << "Bottom-up compute slowest: " << bu_total[largest] - (bu_ag_all[largest] + bu_sr_all[largest]) << endl;
573 cout << "Top-down allgather slowest: " << td_ag_all[largest] << endl;
574 cout << "Top-down all2all slowest: " << td_a2a_all[largest] << endl;
575 cout << "Top-down transposevector slowest: " << td_tv_all[largest] << endl;
576 cout << "Top-down mergecontributions slowest: " << td_mc_all[largest] << endl;
577 cout << "Top-down spmsv slowest: " << td_spmv_all[largest] << endl;
578 }
579#endif
581 sort(EDGES, EDGES+ITERS);
582 os << "--------------------------" << endl;
583 os << "Min nedges: " << EDGES[0] << endl;
584 os << "First Quartile nedges: " << (EDGES[(ITERS/4)-1] + EDGES[ITERS/4])/2 << endl;
585 os << "Median nedges: " << (EDGES[(ITERS/2)-1] + EDGES[ITERS/2])/2 << endl;
586 os << "Third Quartile nedges: " << (EDGES[(3*ITERS/4) -1 ] + EDGES[3*ITERS/4])/2 << endl;
587 os << "Max nedges: " << EDGES[ITERS-1] << endl;
588 double mean = accumulate( EDGES, EDGES+ITERS, 0.0 )/ ITERS;
589 vector<double> zero_mean(ITERS); // find distances to the mean
591 // self inner-product is sum of sum of squares
592 double deviation = inner_product( zero_mean.begin(),zero_mean.end(), zero_mean.begin(), 0.0 );
593 deviation = sqrt( deviation / (ITERS-1) );
594 os << "Mean nedges: " << mean << endl;
595 os << "STDDEV nedges: " << deviation << endl;
596 os << "--------------------------" << endl;
597
598 sort(TIMES,TIMES+ITERS);
599 os << "Min time: " << TIMES[0] << " seconds" << endl;
600 os << "First Quartile time: " << (TIMES[(ITERS/4)-1] + TIMES[ITERS/4])/2 << " seconds" << endl;
601 os << "Median time: " << (TIMES[(ITERS/2)-1] + TIMES[ITERS/2])/2 << " seconds" << endl;
602 os << "Third Quartile time: " << (TIMES[(3*ITERS/4)-1] + TIMES[3*ITERS/4])/2 << " seconds" << endl;
603 os << "Max time: " << TIMES[ITERS-1] << " seconds" << endl;
604 mean = accumulate( TIMES, TIMES+ITERS, 0.0 )/ ITERS;
606 deviation = inner_product( zero_mean.begin(),zero_mean.end(), zero_mean.begin(), 0.0 );
607 deviation = sqrt( deviation / (ITERS-1) );
608 os << "Mean time: " << mean << " seconds" << endl;
609 os << "STDDEV time: " << deviation << " seconds" << endl;
610 os << "--------------------------" << endl;
611
612 sort(MTEPS, MTEPS+ITERS);
613 os << "Min MTEPS: " << MTEPS[0] << endl;
614 os << "First Quartile MTEPS: " << (MTEPS[(ITERS/4)-1] + MTEPS[ITERS/4])/2 << endl;
615 os << "Median MTEPS: " << (MTEPS[(ITERS/2)-1] + MTEPS[ITERS/2])/2 << endl;
616 os << "Third Quartile MTEPS: " << (MTEPS[(3*ITERS/4)-1] + MTEPS[3*ITERS/4])/2 << endl;
617 os << "Max MTEPS: " << MTEPS[ITERS-1] << endl;
618 transform(MTEPS, MTEPS+ITERS, INVMTEPS, safemultinv<double>()); // returns inf for zero teps
619 double hteps = static_cast<double>(ITERS) / accumulate(INVMTEPS, INVMTEPS+ITERS, 0.0);
620 os << "Harmonic mean of MTEPS: " << hteps << endl;
622 deviation = inner_product( zero_mean.begin(),zero_mean.end(), zero_mean.begin(), 0.0 );
623 deviation = sqrt( deviation / (ITERS-1) ) * (hteps*hteps); // harmonic_std_dev
624 os << "Harmonic standard deviation of MTEPS: " << deviation << endl;
625 SpParHelper::Print(os.str());
626 }
627 }
628 MPI_Finalize();
629 return 0;
630}
631
double bu_local
Definition DirOptBFS.cpp:69
#define ITERS
Definition DirOptBFS.cpp:80
double cblas_transvectime
Definition DirOptBFS.cpp:60
double cblas_localspmvtime
Definition DirOptBFS.cpp:61
double bottomup_convert
Definition DirOptBFS.cpp:67
int cblas_splits
Definition DirOptBFS.cpp:72
double bu_rotate
Definition DirOptBFS.cpp:71
double cblas_allgathertime
Definition DirOptBFS.cpp:58
double cblas_alltoalltime
Definition DirOptBFS.cpp:57
double bu_update
Definition DirOptBFS.cpp:70
double bottomup_sendrecv
Definition DirOptBFS.cpp:64
unsigned int highestbitset(uint64_t v)
Definition DirOptBFS.cpp:86
double bottomup_allgather
Definition DirOptBFS.cpp:65
double bottomup_total
Definition DirOptBFS.cpp:66
double cblas_ewisemulttime
Definition DirOptBFS.cpp:62
#define EDGEFACTOR
Definition DirOptBFS.cpp:81
bool from_string(T &t, const string &s, ios_base &(*f)(ios_base &))
double cblas_mergeconttime
Definition DirOptBFS.cpp:59
#define MAXTRIALS
int main()
Definition Driver.cpp:12
SpParMat< int64_t, int, SpDCCols< int32_t, int > > PSpMat_s32p64_Int
SpParMat< int64_t, bool, SpDCCols< int64_t, bool > > PSpMat_Bool
Definition auction.cpp:112
SpParMat< int64_t, bool, SpDCCols< int32_t, bool > > PSpMat_s32p64
Definition auction.cpp:113
double rand()
void GenGraph500Data(double initiator[4], int log_numverts, int edgefactor, bool scramble=false, bool packed=false)
Iterate over (sparse) columns of the sparse matrix.
Definition SpDCCols.h:85
static std::vector< size_t > find_order(const std::vector< T > &values)
Definition SpHelper.h:57
static void Print(const std::string &s)
int nprocs
Definition comms.cpp:55
@ Column
Definition SpDefs.h:115
SpDCCols< int, bool >::SpColIter * CalcSubStarts(SpParMat< IT, bool, UDER > &A, FullyDistSpVec< IT, VT > &x, BitMapCarousel< IT, VT > &done)
Definition BFSFriends.h:397
void BottomUpStep(SpParMat< IT, bool, UDER > &A, FullyDistSpVec< IT, VT > &x, BitMapFringe< int64_t, int64_t > &bm_fringe, FullyDistVec< IT, VT > &parents, BitMapCarousel< IT, VT > &done, SpDCCols< int, bool >::SpColIter *starts)
Definition BFSFriends.h:458
void Symmetricize(PARMAT &A)
Definition ReadMatDist.h:29
Dcsc< IU, typename promote_trait< NU1, NU2 >::T_promote > EWiseMult(const Dcsc< IU, NU1 > &A, const Dcsc< IU, NU2 > *B, bool exclude)
Definition Friends.h:834
double A
unsigned __int64 uint64_t
Definition stdint.h:90
int64_t operator()(int64_t x, const int64_t &y) const