COMBINATORIAL_BLAS 1.6
 
Loading...
Searching...
No Matches
SpMSpVBench.cpp
Go to the documentation of this file.
1/****************************************************************/
2/* Parallel Combinatorial BLAS Library (for Graph Computations) */
3/* version 1.5 -------------------------------------------------*/
4/* date: 10/09/2015 ---------------------------------------------*/
5/* authors: Ariful Azad, Aydin Buluc, Adam Lugowski ------------*/
6/****************************************************************/
7/*
8 Copyright (c) 2010-2015, The Regents of the University of California
9
10 Permission is hereby granted, free of charge, to any person obtaining a copy
11 of this software and associated documentation files (the "Software"), to deal
12 in the Software without restriction, including without limitation the rights
13 to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
14 copies of the Software, and to permit persons to whom the Software is
15 furnished to do so, subject to the following conditions:
16
17 The above copyright notice and this permission notice shall be included in
18 all copies or substantial portions of the Software.
19
20 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
21 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
22 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
23 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
24 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
25 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
26 THE SOFTWARE.
27 */
28
29#include "CombBLAS/CombBLAS.h"
30#include <mpi.h>
31#include <sys/time.h>
32#include <iostream>
33#include <functional>
34#include <algorithm>
35#include <vector>
36#include <string>
37#include <sstream>
38
39using namespace combblas;
40
41#ifdef _OPENMP
43#else
45#endif
46
52
53
54#define EDGEFACTOR 16
56using namespace std;
57
58
61typedef SpParMat < int64_t, bool, SpDCCols<int32_t,bool> > PSpMat_s32p64; // sequentially use 32-bits for local matrices, but parallel semantics are 64-bits
62typedef SpParMat < int64_t, int, SpDCCols<int32_t,int> > PSpMat_s32p64_Int; // similarly mixed, but holds integers as upposed to booleans
65
66
67template <typename PARMAT>
69{
70 // boolean addition is practically a "logical or"
71 // therefore this doesn't destruct any links
72 PARMAT AT = A;
73 AT.Transpose();
74 A += AT;
75}
76
77
78
79struct SelectMinSR
80{
82 static T_promote id(){ return -1; };
83 static bool returnedSAID() { return false; }
84 //static MPI_Op mpi_op() { return MPI_MIN; };
85
86 static T_promote add(const T_promote & arg1, const T_promote & arg2)
87 {
88 return std::min(arg1, arg2);
89 }
90
91 static T_promote multiply(const bool & arg1, const T_promote & arg2)
92 {
93 return arg2;
94 }
95
96 static void axpy(bool a, const T_promote & x, T_promote & y)
97 {
98 y = std::min(y, x);
99 }
100};
101
102
103
104
106{
107
108 int nthreads=1;
109#ifdef THREADED
110#pragma omp parallel
111 {
112 nthreads = omp_get_num_threads();
113 }
114#endif
115
118
119 double tspmvall=0, tall=0;
120 int iterall = 0;
121 int visitedE = 0;
122 int visitedV = 0;
123
124
125 for(int i=0; i<ITERS; ++i)
126 {
127 // FullyDistVec ( shared_ptr<CommGrid> grid, IT globallen, NT initval);
128 FullyDistVec<int64_t, int64_t> parents ( Aeff.getcommgrid(), Aeff.getncol(), (int64_t) -1); // identity is -1
129
130 // FullyDistSpVec ( shared_ptr<CommGrid> grid, IT glen);
131 FullyDistSpVec<int64_t, int64_t> fringe(Aeff.getcommgrid(), Aeff.getncol()); // numerical values are stored 0-based
132
134 double t1 = MPI_Wtime();
135
136 fringe.SetElement(source, source);
137 int iterations = 0;
138
139 while(fringe.getnnz() > 0)
140 {
141 int64_t xnnz = fringe.getnnz();
142 fringe.setNumToInd();
143 double tstart = MPI_Wtime();
145 double tspmv = MPI_Wtime()-tstart;
146 tspmvall += tspmv;
147 int64_t ynnz = fringe.getnnz();
148 fringe = EWiseMult(fringe, parents, true, (int64_t) -1); // clean-up vertices that already has parents
149
151 outs1 << "iteration: " << iterations << " xnnz: "<< xnnz << " ynnz: " << ynnz << " SpMSpV time: " << tspmv << endl;
153
154 parents.Set(fringe);
155 iterations++;
156 }
158 double t2 = MPI_Wtime();
159 tall += (t2 - t1);
160
162 parentsp.Apply(myset<int64_t>(1));
163
164 // we use degrees on the directed graph, so that we don't count the reverse edges in the teps score
165 int64_t nedges = EWiseMult(parentsp, degrees, false, (int64_t) 0).Reduce(plus<int64_t>(), (int64_t) 0);
166
167 visitedE += nedges;
168 visitedV += parentsp.Reduce(plus<int64_t>(), (int64_t) 0);
170 }
171
172 int myrank;
174 if(myrank == 0)
175 {
176 cout << "\nOverall stats:" << endl;
177 cout << " starting vertex: " << source << endl;
178 cout << " Avg number iterations: " << iterall/ITERS << endl;
179 cout << " Avg number of vertices found: " << visitedV/ITERS << endl;
180 cout << " Avg Number of edges traversed: " << visitedE/ITERS << endl;
181 cout << " Avg SpMSpV time: " << tspmvall/ITERS << endl;
182 cout << " Avg Total time: " << tall/ITERS << endl;
183 }
184
185}
186
187
189{
190
192 OptBuf<int32_t, int64_t> optbuf; // let indices be 32-bits
193 //Aeff.OptimizeForGraph500(optbuf);
194 int nthreads=1;
195#ifdef THREADED
196#pragma omp parallel
197 {
198 nthreads = omp_get_num_threads();
200 }
201 Aeff.ActivateThreading(cblas_splits);
202#endif
203
204 double tspmvall=0, tall=0;
205 int iterall = 0;
206 int visitedE = 0;
207 int visitedV = 0;
208
209
210 for(int i=0; i<ITERS; ++i)
211 {
212 // FullyDistVec ( shared_ptr<CommGrid> grid, IT globallen, NT initval);
213 FullyDistVec<int64_t, int64_t> parents ( Aeff.getcommgrid(), Aeff.getncol(), (int64_t) -1); // identity is -1
214
215 // FullyDistSpVec ( shared_ptr<CommGrid> grid, IT glen);
216 FullyDistSpVec<int64_t, int64_t> fringe(Aeff.getcommgrid(), Aeff.getncol()); // numerical values are stored 0-based
217
219 double t1 = MPI_Wtime();
220
221 fringe.SetElement(source, source);
222 int iterations = 0;
223
224 while(fringe.getnnz() > 0)
225 {
226 int64_t xnnz = fringe.getnnz();
227 fringe.setNumToInd();
228 double tstart = MPI_Wtime();
230 double tspmv = MPI_Wtime()-tstart;
231 tspmvall += tspmv;
232 int64_t ynnz = fringe.getnnz();
233 fringe = EWiseMult(fringe, parents, true, (int64_t) -1); // clean-up vertices that already has parents
234
236 outs1 << "iteration: " << iterations << " xnnz: "<< xnnz << " ynnz: " << ynnz << " SpMSpV time: " << tspmv << endl;
238
239
240 parents.Set(fringe);
241 iterations++;
242 }
244 double t2 = MPI_Wtime();
245 tall += (t2 - t1);
246
248 parentsp.Apply(myset<int64_t>(1));
249
250 // we use degrees on the directed graph, so that we don't count the reverse edges in the teps score
251 int64_t nedges = EWiseMult(parentsp, degrees, false, (int64_t) 0).Reduce(plus<int64_t>(), (int64_t) 0);
252
253 visitedE += nedges;
254 visitedV += parentsp.Reduce(plus<int64_t>(), (int64_t) 0);
256 }
257
258 int myrank;
260 if(myrank == 0)
261 {
262 cout << "\nOverall stats:" << endl;
263 cout << " starting vertex: " << source << endl;
264 cout << " Avg number iterations: " << iterall/ITERS << endl;
265 cout << " Avg number of vertices found: " << visitedV/ITERS << endl;
266 cout << " Avg Number of edges traversed: " << visitedE/ITERS << endl;
267 cout << " Avg SpMSpV time: " << tspmvall/ITERS << endl;
268 cout << " Avg Total time: " << tall/ITERS << endl;
269 }
270
271
272}
273
274
275
276
278{
279 int nthreads=1;
280
281
283
284
285
286
287#ifdef THREADED
288#pragma omp parallel
289 {
290 nthreads = omp_get_num_threads();
292 }
293 ABoolCSC.ActivateThreading(cblas_splits);
294#endif
295
296 double tspmvall=0, tall=0;
297 int iterall = 0;
298 int visitedE = 0;
299 int visitedV = 0;
300
301
302 for(int i=0; i<ITERS; ++i)
303 {
304 // FullyDistVec ( shared_ptr<CommGrid> grid, IT globallen, NT initval);
305 FullyDistVec<int64_t, int64_t> parents ( Aeff.getcommgrid(), Aeff.getncol(), (int64_t) -1); // identity is -1
306
307 // FullyDistSpVec ( shared_ptr<CommGrid> grid, IT glen);
308 FullyDistSpVec<int64_t, int64_t> fringe(Aeff.getcommgrid(), Aeff.getncol()); // numerical values are stored 0-based
309
311 double t1 = MPI_Wtime();
312
313 fringe.SetElement(source, source);
314 int iterations = 0;
315
316 while(fringe.getnnz() > 0)
317 {
318 int64_t xnnz = fringe.getnnz();
319 fringe.setNumToInd();
320 double tstart = MPI_Wtime();
322 double tspmv = MPI_Wtime()-tstart;
323 tspmvall += tspmv;
324 int64_t ynnz = fringe.getnnz();
325 fringe = EWiseMult(fringe, parents, true, (int64_t) -1); // clean-up vertices that already has parents
327 outs1 << "iteration: " << iterations << " xnnz: "<< xnnz << " ynnz: " << ynnz << " SpMSpV time: " << tspmv << endl;
329
330 parents.Set(fringe);
331 iterations++;
332 }
334 double t2 = MPI_Wtime();
335 tall += (t2-t1);
336
338 parentsp.Apply(myset<int64_t>(1));
339
340 // we use degrees on the directed graph, so that we don't count the reverse edges in the teps score
341 int64_t nedges = EWiseMult(parentsp, degrees, false, (int64_t) 0).Reduce(plus<int64_t>(), (int64_t) 0);
342
343 visitedE += nedges;
344 visitedV += parentsp.Reduce(plus<int64_t>(), (int64_t) 0);
346 }
347
348 int myrank;
350 if(myrank == 0)
351 {
352 cout << "\nOverall stats:" << endl;
353 cout << " starting vertex: " << source << endl;
354 cout << " Avg number iterations: " << iterall/ITERS << endl;
355 cout << " Avg number of vertices found: " << visitedV/ITERS << endl;
356 cout << " Avg Number of edges traversed: " << visitedE/ITERS << endl;
357 cout << " Avg SpMSpV time: " << tspmvall/ITERS << endl;
358 cout << " Avg Total time: " << tall/ITERS << endl;
359
360
361 }
362
363
364}
365
366
367
368
369int main(int argc, char* argv[])
370{
371 int nprocs, myrank;
372
373 int provided;
376 {
377 printf("ERROR: The MPI library does not have MPI_THREAD_SERIALIZED support\n");
379 }
380
383 if(argc < 3)
384 {
385 if(myrank == 0)
386 {
387 cout << "Usage: ./SpMSpVBench <-input|-rmat|-er> <scale|filename> " << endl;
388 cout << " optional parameters:" << endl;
389 cout << " -source \"source of BFS\" (default: 0) " << endl;
390 cout << " -iter \"number of BFS iterations\" (default: 1)" << endl;
391 cout << "Example with a user supplied matrix:" << endl;
392 cout << " mpirun -np 4 ./SpMSpVBench -input a.mtx -source 2" << endl;
393 cout << "Example with a user supplied matrix (pre-permute the input matrix for load balance):" << endl;
394 cout << " mpirun -np 4 ./SpMSpVBench -input a.mtx -permute" << endl;
395 cout << "Example with RMAT matrix: mpirun -np 4 ./SpMSpVBench -rmat 18" << endl;
396 cout << "Example with an Erdos-Renyi matrix: mpirun -np 4 ./SpMSpVBench -er 18" << endl;
397 }
398 MPI_Finalize();
399 return -1;
400 }
401 {
403 fullWorld.reset( new CommGrid(MPI_COMM_WORLD, 0, 0) );
404 int nthreads=1;
405
406#ifdef THREADED
407#pragma omp parallel
408 {
409 nthreads = omp_get_num_threads();
410 }
411#endif
412
413
414 // Declare objects
417 FullyDistVec<int64_t, int64_t> degrees(fullWorld); // degrees of vertices (including multi-edges and self-loops)
418 FullyDistVec<int64_t, int64_t> nonisov(fullWorld); // id's of non-isolated (connected) vertices
419 unsigned scale;
420 bool scramble = false;
421 int source = 0;
422 ITERS = 1;
423 bool randpermute = false;
424 bool symm = false;
425 int maxthreads = nthreads;
426 int minthreads = nthreads;
427 string filename(argv[2]);
428 for (int i = 1; i < argc; i++)
429 {
430 if (strcmp(argv[i],"-permute")==0)
431 {
432 if(myrank == 0) cout << "Randomly permute the matrix " << endl;
433 randpermute = true;
434 }
435 if (strcmp(argv[i],"-source")==0)
436 {
437 source = atoi(argv[i + 1]);
438 if(myrank == 0) cout << "Source vertex: " << source << endl;
439 }
440 if (strcmp(argv[i],"-iter")==0)
441 {
442 ITERS = atoi(argv[i + 1]);
443 if(myrank == 0) cout << "Number of iterations: " << ITERS << endl;
444 }
445 }
446
447
448
449 if(string(argv[1]) == string("-input")) // input option
450 {
451
452 //A.ReadDistribute(string(argv[2]), 0); // read it from file
453 A.ParallelReadMM(filename, true, maximum<bool>());
454 SpParHelper::Print("Read input\n");
455
456 PSpMat_Int64 * G = new PSpMat_Int64(A);
457 G->Reduce(degrees, Row, plus<int64_t>(), static_cast<int64_t>(0)); // identity is 0
458 delete G;
459
461
463 A.Reduce(*ColSums, Column, plus<int64_t>(), static_cast<int64_t>(0)); // plus<int64_t> matches the type of the output vector
464 nonisov = ColSums->FindInds(bind2nd(greater<int64_t>(), 0)); // only the indices of non-isolated vertices
465 delete ColSums;
466
467 if(randpermute)
468 nonisov.RandPerm();
469 A(nonisov, nonisov, true); // in-place permute to save memory
470 degrees = degrees(nonisov); // fix the degrees array too
472 source = newsource.GetElement(0);
473 degrees = degrees(nonisov); // fix the source vertex too
475 A.FreeMemory();
476 }
477 else if(string(argv[1]) == string("-rmat"))
478 {
479 unsigned scale;
480 scale = static_cast<unsigned>(atoi(argv[2]));
481 double initiator[4] = {.57, .19, .19, .05};
483 DEL->GenGraph500Data(initiator, scale, EDGEFACTOR, true, false );
485
486 A = PSpMat_Bool(*DEL, false);
487 delete DEL;
490 Aeff.Reduce(degrees, Row, plus<int64_t>(), static_cast<int64_t>(0)); // identity is 0
491 A.FreeMemory();
492
493
494 }
495 else if(string(argv[1]) == string("-er"))
496 {
497 unsigned scale;
498 scale = static_cast<unsigned>(atoi(argv[2]));
499 double initiator[4] = {.25, .25, .25, .25};
501 DEL->GenGraph500Data(initiator, scale, EDGEFACTOR, true, false );
503
504 A = PSpMat_Bool(*DEL, false);
505 delete DEL;
508 Aeff.Reduce(degrees, Row, plus<int64_t>(), static_cast<int64_t>(0)); // identity is 0
509 A.FreeMemory();
510 }
511 else
512 {
513 SpParHelper::Print("Unknown input option\n");
514 MPI_Finalize();
515 return -1;
516 }
517
518
519
520
521 Aeff.PrintInfo();
522 float balance = Aeff.LoadImbalance();
524 outs << "Load balance: " << balance << endl;
526
528
529
530 SpParHelper::Print("-------------------------------------------------\n");
531 SpParHelper::Print ("BFS With CSC matrix and SpMSpV-bucket algorithm \n");
532 SpParHelper::Print("-------------------------------------------------\n");
534 SpParHelper::Print("-------------------------------------------------\n");
535 SpParHelper::Print ("BFS With Split CSC matrix and SpMSpV-heapsort algorithm \n");
536 SpParHelper::Print("-------------------------------------------------\n");
538 SpParHelper::Print("-------------------------------------------------\n");
539 SpParHelper::Print ("BFS With DCSC matric and SpMSpV-SPA algorithm \n");
540 SpParHelper::Print("-------------------------------------------------\n");
542
543 }
544 MPI_Finalize();
545 return 0;
546
547}
int main()
Definition Driver.cpp:12
void BFS_DCSC(PSpMat_s32p64 Aeff1, int64_t source, FullyDistVec< int64_t, int64_t > degrees)
SpParMat< int64_t, bool, SpDCCols< int64_t, bool > > PSpMat_Bool
double cblas_transvectime
double cblas_localspmvtime
int cblas_splits
double cblas_allgathertime
double cblas_alltoalltime
SpParMat< int64_t, bool, SpCCols< int64_t, bool > > Par_CSC_Bool
SpParMat< int64_t, int, SpDCCols< int32_t, int > > PSpMat_s32p64_Int
SpParMat< int64_t, bool, SpDCCols< int32_t, bool > > PSpMat_s32p64
#define EDGEFACTOR
void BFS_CSC_Split(PSpMat_s32p64 Aeff, int64_t source, FullyDistVec< int64_t, int64_t > degrees)
SpParMat< int64_t, int64_t, SpDCCols< int64_t, int64_t > > PSpMat_Int64
void BFS_CSC(PSpMat_s32p64 Aeff, int64_t source, FullyDistVec< int64_t, int64_t > degrees)
int ITERS
double cblas_mergeconttime
SelectMaxSRing< bool, int32_t > SR
void GenGraph500Data(double initiator[4], int log_numverts, int edgefactor, bool scramble=false, bool packed=false)
static void Print(const std::string &s)
int nprocs
Definition comms.cpp:55
@ Column
Definition SpDefs.h:115
void Symmetricize(PARMAT &A)
Definition ReadMatDist.h:29
Dcsc< IU, typename promote_trait< NU1, NU2 >::T_promote > EWiseMult(const Dcsc< IU, NU1 > &A, const Dcsc< IU, NU2 > *B, bool exclude)
Definition Friends.h:834
double A
static void axpy(bool a, const T_promote &x, T_promote &y)
static T_promote add(const T_promote &arg1, const T_promote &arg2)
int64_t T_promote
static T_promote id()
static T_promote multiply(const bool &arg1, const T_promote &arg2)
static bool returnedSAID()