COMBINATORIAL_BLAS 1.6
 
Loading...
Searching...
No Matches
mpipspgemm.cpp
Go to the documentation of this file.
1#include <mpi.h>
2#include <sys/time.h>
3#include <iostream>
4#include <functional>
5#include <algorithm>
6#include <vector>
7#include <string>
8#include <sstream>
9#include <stdint.h>
10#include <cmath>
11#include "CombBLAS/CombBLAS.h"
12#include "Glue.h"
13#include "CCGrid.h"
14#include "Reductions.h"
15#include "Multiplier.h"
16#include "SplitMatDist.h"
17
18
19using namespace std;
20using namespace combblas;
21
31
32#define ITERS 5
33
34int main(int argc, char *argv[])
35{
36 int provided;
37
40 {
41 printf("ERROR: The MPI library does not have MPI_THREAD_SERIALIZED support\n");
43 }
44
45
46
47 int nprocs, myrank;
50
51 if(argc < 8)
52 {
53 if(myrank == 0)
54 {
55 printf("Usage (random): ./mpipspgemm <GridRows> <GridCols> <Layers> <Type> <Scale> <EDGEFACTOR> <algo>\n");
56 printf("Usage (input): ./mpipspgemm <GridRows> <GridCols> <Layers> <Type=input> <matA> <matB> <algo>\n");
57 printf("Example: ./mpipspgemm 4 4 2 ER 19 16 outer\n");
58 printf("Example: ./mpipspgemm 4 4 2 Input matA.mtx matB.mtx column\n");
59 printf("Type ER: Erdos-Renyi\n");
60 printf("Type SSCA: R-MAT with SSCA benchmark parameters\n");
61 printf("Type G500: R-MAT with Graph500 benchmark parameters\n");
62 printf("algo: outer | column \n");
63 }
64 return -1;
65 }
66
67
68 unsigned GRROWS = (unsigned) atoi(argv[1]);
69 unsigned GRCOLS = (unsigned) atoi(argv[2]);
70 unsigned C_FACTOR = (unsigned) atoi(argv[3]);
72 int nthreads;
73#pragma omp parallel
74 {
75 nthreads = omp_get_num_threads();
76 }
77
78
79 if(GRROWS != GRCOLS)
80 {
81 SpParHelper::Print("This version of the Combinatorial BLAS only works on a square logical processor grid\n");
84 }
85
88 {
89 SpParHelper::Print("The product of <GridRows> <GridCols> <Replicas> does not match the number of processes\n");
92 }
93
94 {
97 string type;
99 layerGrid.reset( new CommGrid(CMG.layerWorld, 0, 0) );
100 FullyDistVec<int64_t, int64_t> p(layerGrid); // permutation vector defined on layers
101
102 if(string(argv[4]) == string("input")) // input option
103 {
104 string fileA(argv[5]);
105 string fileB(argv[6]);
106
107 double t01 = MPI_Wtime();
110 SplitMat(CMG, A, splitA, false);
111 SplitMat(CMG, B, splitB, true); //row-split
112 if(myrank == 0) cout << "Matrices read and replicated along layers : time " << MPI_Wtime() - t01 << endl;
113 }
114 else
115 {
116 unsigned scale = (unsigned) atoi(argv[5]);
117 unsigned EDGEFACTOR = (unsigned) atoi(argv[6]);
118 double initiator[4];
119 if(string(argv[4]) == string("ER"))
120 {
121 initiator[0] = .25;
122 initiator[1] = .25;
123 initiator[2] = .25;
124 initiator[3] = .25;
125 }
126 else if(string(argv[4]) == string("G500"))
127 {
128 initiator[0] = .57;
129 initiator[1] = .19;
130 initiator[2] = .19;
131 initiator[3] = .05;
132 EDGEFACTOR = 16;
133 }
134 else if(string(argv[4]) == string("SSCA"))
135 {
136 initiator[0] = .6;
137 initiator[1] = .4/3;
138 initiator[2] = .4/3;
139 initiator[3] = .4/3;
140 EDGEFACTOR = 8;
141 }
142 else {
143 if(myrank == 0)
144 printf("The initiator parameter - %s - is not recognized.\n", argv[5]);
146 }
147
148
149 double t01 = MPI_Wtime();
152
153 SplitMat(CMG, A, splitA, false);
154 SplitMat(CMG, B, splitB, true); //row-split
155 if(myrank == 0) cout << "RMATs Generated and replicated along layers : time " << MPI_Wtime() - t01 << endl;
156
157 }
158
160 int64_t localnnzA = splitA.getnnz();
161 int64_t localnnzB = splitB.getnnz();
164 if(myrank == 0) cout << "After split: nnzA= " << globalnnzA << " & nnzB= " << globalnnzB;
165
166
167
168 type = string(argv[7]);
169 if(myrank == 0)
170 {
171 printf("\n Processor Grid (row x col x layers x threads): %dx%dx%dx%d \n", CMG.GridRows, CMG.GridCols, CMG.GridLayers, nthreads);
172 printf(" prow pcol layer thread comm_bcast comm_scatter comp_summa comp_merge comp_scatter comp_result other total\n");
173 }
174 if(type == string("outer"))
175 {
176 splitB.Transpose(); //locally transpose for outer product
177 for(int k=0; k<ITERS; k++)
178 {
179 splitC = multiply(splitA, splitB, CMG, true, false); // outer product
180 delete splitC;
181 }
182 }
183
184 else // default column-threaded
185 {
186 for(int k=0; ITERS>0 && k<ITERS-1; k++)
187 {
188 splitC = multiply(splitA, splitB, CMG, false, true);
189 delete splitC;
190 }
191 splitC = multiply(splitA, splitB, CMG, false, true);
192 int64_t nnzC=0;
193 int64_t localnnzC = splitC->getnnz();
195 if(myrank == 0) cout << "\n After multiplication: nnzC= " << nnzC << endl << endl;
196 delete splitC;
197
198 }
199 }
200
201 MPI_Finalize();
202 return 0;
203}
204
205
#define EDGEFACTOR
Definition DirOptBFS.cpp:81
int main()
Definition Driver.cpp:12
Definition test.cpp:53
static void Print(const std::string &s)
int nprocs
Definition comms.cpp:55
double comm_reduce
double comp_split
#define ITERS
double comm_bcast
double comm_split
double comp_reduce
double comp_result
double comp_trans
double comp_summa
double comp_reduce_layer
void SplitMat(CCGrid &CMG, SpDCCols< IT, NT > *localmat, SpDCCols< IT, NT > &splitmat, bool rowsplit=false)
SpDCCols< IT, NT > * multiply(SpDCCols< IT, NT > &splitA, SpDCCols< IT, NT > &splitB, CCGrid &CMG, bool isBT, bool threaded)
Definition Multiplier.h:11
double A