COMBINATORIAL_BLAS
1.6
Loading...
Searching...
No Matches
mpipspgemm.cpp
Go to the documentation of this file.
1
#include <mpi.h>
2
#include <
sys/time.h
>
3
#include <iostream>
4
#include <functional>
5
#include <algorithm>
6
#include <vector>
7
#include <string>
8
#include <sstream>
9
#include <
stdint.h
>
10
#include <cmath>
11
#include "CombBLAS/CombBLAS.h"
12
#include "
Glue.h
"
13
#include "
CCGrid.h
"
14
#include "
Reductions.h
"
15
#include "
Multiplier.h
"
16
#include "
SplitMatDist.h
"
17
18
19
using namespace
std;
20
using namespace
combblas
;
21
22
double
comm_bcast
;
23
double
comm_reduce
;
24
double
comp_summa
;
25
double
comp_reduce
;
26
double
comp_result
;
27
double
comp_reduce_layer
;
28
double
comp_split
;
29
double
comp_trans
;
30
double
comm_split
;
31
32
#define ITERS 5
33
34
int
main
(
int
argc
,
char
*
argv
[])
35
{
36
int
provided
;
37
38
MPI_Init_thread
(&
argc
, &
argv
,
MPI_THREAD_SERIALIZED
, &
provided
);
39
if
(
provided
<
MPI_THREAD_SERIALIZED
)
40
{
41
printf
(
"ERROR: The MPI library does not have MPI_THREAD_SERIALIZED support\n"
);
42
MPI_Abort
(
MPI_COMM_WORLD
, 1);
43
}
44
45
46
47
int
nprocs
, myrank;
48
MPI_Comm_size
(
MPI_COMM_WORLD
,&
nprocs
);
49
MPI_Comm_rank
(
MPI_COMM_WORLD
,&myrank);
50
51
if
(
argc
< 8)
52
{
53
if
(myrank == 0)
54
{
55
printf
(
"Usage (random): ./mpipspgemm <GridRows> <GridCols> <Layers> <Type> <Scale> <EDGEFACTOR> <algo>\n"
);
56
printf
(
"Usage (input): ./mpipspgemm <GridRows> <GridCols> <Layers> <Type=input> <matA> <matB> <algo>\n"
);
57
printf
(
"Example: ./mpipspgemm 4 4 2 ER 19 16 outer\n"
);
58
printf
(
"Example: ./mpipspgemm 4 4 2 Input matA.mtx matB.mtx column\n"
);
59
printf
(
"Type ER: Erdos-Renyi\n"
);
60
printf
(
"Type SSCA: R-MAT with SSCA benchmark parameters\n"
);
61
printf
(
"Type G500: R-MAT with Graph500 benchmark parameters\n"
);
62
printf
(
"algo: outer | column \n"
);
63
}
64
return
-1;
65
}
66
67
68
unsigned
GRROWS
= (
unsigned
)
atoi
(
argv
[1]);
69
unsigned
GRCOLS
= (
unsigned
)
atoi
(
argv
[2]);
70
unsigned
C_FACTOR
= (
unsigned
)
atoi
(
argv
[3]);
71
CCGrid
CMG
(
C_FACTOR
,
GRCOLS
);
72
int
nthreads
;
73
#pragma omp parallel
74
{
75
nthreads
= omp_get_num_threads();
76
}
77
78
79
if
(
GRROWS
!=
GRCOLS
)
80
{
81
SpParHelper::Print
(
"This version of the Combinatorial BLAS only works on a square logical processor grid\n"
);
82
MPI_Barrier
(
MPI_COMM_WORLD
);
83
MPI_Abort
(
MPI_COMM_WORLD
, 1);
84
}
85
86
int
layer_length
=
GRROWS
*
GRCOLS
;
87
if
(
layer_length
*
C_FACTOR
!=
nprocs
)
88
{
89
SpParHelper::Print
(
"The product of <GridRows> <GridCols> <Replicas> does not match the number of processes\n"
);
90
MPI_Barrier
(
MPI_COMM_WORLD
);
91
MPI_Abort
(
MPI_COMM_WORLD
, 1);
92
}
93
94
{
95
SpDCCols<int64_t, double>
splitA
,
splitB
;
96
SpDCCols<int64_t, double>
*
splitC
;
97
string
type;
98
shared_ptr<CommGrid>
layerGrid
;
99
layerGrid
.reset(
new
CommGrid
(
CMG
.layerWorld, 0, 0) );
100
FullyDistVec<int64_t, int64_t>
p(
layerGrid
);
// permutation vector defined on layers
101
102
if
(
string
(
argv
[4]) ==
string
(
"input"
))
// input option
103
{
104
string
fileA
(
argv
[5]);
105
string
fileB
(
argv
[6]);
106
107
double
t01
=
MPI_Wtime
();
108
SpDCCols<int64_t, double>
*
A
=
ReadMat<double>
(
fileA
,
CMG
,
true
, p);
109
SpDCCols<int64_t, double>
*
B
=
ReadMat<double>
(
fileB
,
CMG
,
true
, p);
110
SplitMat
(
CMG
,
A
,
splitA
,
false
);
111
SplitMat
(
CMG
,
B
,
splitB
,
true
);
//row-split
112
if
(myrank == 0)
cout
<<
"Matrices read and replicated along layers : time "
<<
MPI_Wtime
() -
t01
<<
endl
;
113
}
114
else
115
{
116
unsigned
scale
= (
unsigned
)
atoi
(
argv
[5]);
117
unsigned
EDGEFACTOR
= (
unsigned
)
atoi
(
argv
[6]);
118
double
initiator
[4];
119
if
(
string
(
argv
[4]) ==
string
(
"ER"
))
120
{
121
initiator
[0] = .25;
122
initiator
[1] = .25;
123
initiator
[2] = .25;
124
initiator
[3] = .25;
125
}
126
else
if
(
string
(
argv
[4]) ==
string
(
"G500"
))
127
{
128
initiator
[0] = .57;
129
initiator
[1] = .19;
130
initiator
[2] = .19;
131
initiator
[3] = .05;
132
EDGEFACTOR
= 16;
133
}
134
else
if
(
string
(
argv
[4]) ==
string
(
"SSCA"
))
135
{
136
initiator
[0] = .6;
137
initiator
[1] = .4/3;
138
initiator
[2] = .4/3;
139
initiator
[3] = .4/3;
140
EDGEFACTOR
= 8;
141
}
142
else
{
143
if
(myrank == 0)
144
printf
(
"The initiator parameter - %s - is not recognized.\n"
,
argv
[5]);
145
MPI_Abort
(
MPI_COMM_WORLD
, 1);
146
}
147
148
149
double
t01
=
MPI_Wtime
();
150
SpDCCols<int64_t, double>
*
A
=
GenMat<int64_t,double>
(
CMG
,
scale
,
EDGEFACTOR
,
initiator
,
true
);
151
SpDCCols<int64_t, double>
*
B
=
GenMat<int64_t,double>
(
CMG
,
scale
,
EDGEFACTOR
,
initiator
,
true
);
152
153
SplitMat
(
CMG
,
A
,
splitA
,
false
);
154
SplitMat
(
CMG
,
B
,
splitB
,
true
);
//row-split
155
if
(myrank == 0)
cout
<<
"RMATs Generated and replicated along layers : time "
<<
MPI_Wtime
() -
t01
<<
endl
;
156
157
}
158
159
int64_t
globalnnzA
=0,
globalnnzB
=0;
160
int64_t
localnnzA
=
splitA
.getnnz();
161
int64_t
localnnzB
=
splitB
.getnnz();
162
MPI_Allreduce
( &
localnnzA
, &
globalnnzA
, 1,
MPIType<int64_t>
(),
MPI_SUM
,
MPI_COMM_WORLD
);
163
MPI_Allreduce
( &
localnnzB
, &
globalnnzB
, 1,
MPIType<int64_t>
(),
MPI_SUM
,
MPI_COMM_WORLD
);
164
if
(myrank == 0)
cout
<<
"After split: nnzA= "
<<
globalnnzA
<<
" & nnzB= "
<<
globalnnzB
;
165
166
167
168
type =
string
(
argv
[7]);
169
if
(myrank == 0)
170
{
171
printf
(
"\n Processor Grid (row x col x layers x threads): %dx%dx%dx%d \n"
,
CMG
.GridRows,
CMG
.GridCols,
CMG
.GridLayers,
nthreads
);
172
printf
(
" prow pcol layer thread comm_bcast comm_scatter comp_summa comp_merge comp_scatter comp_result other total\n"
);
173
}
174
if
(type ==
string
(
"outer"
))
175
{
176
splitB
.Transpose();
//locally transpose for outer product
177
for
(
int
k=0; k<
ITERS
; k++)
178
{
179
splitC
=
multiply
(
splitA
,
splitB
,
CMG
,
true
,
false
);
// outer product
180
delete
splitC
;
181
}
182
}
183
184
else
// default column-threaded
185
{
186
for
(
int
k=0;
ITERS
>0 && k<
ITERS
-1; k++)
187
{
188
splitC
=
multiply
(
splitA
,
splitB
,
CMG
,
false
,
true
);
189
delete
splitC
;
190
}
191
splitC
=
multiply
(
splitA
,
splitB
,
CMG
,
false
,
true
);
192
int64_t
nnzC
=0;
193
int64_t
localnnzC
=
splitC
->getnnz();
194
MPI_Allreduce
( &
localnnzC
, &
nnzC
, 1,
MPIType<int64_t>
(),
MPI_SUM
,
MPI_COMM_WORLD
);
195
if
(myrank == 0)
cout
<<
"\n After multiplication: nnzC= "
<<
nnzC
<<
endl
<<
endl
;
196
delete
splitC
;
197
198
}
199
}
200
201
MPI_Finalize
();
202
return
0;
203
}
204
205
CCGrid.h
EDGEFACTOR
#define EDGEFACTOR
Definition
DirOptBFS.cpp:81
main
int main()
Definition
Driver.cpp:12
Glue.h
Multiplier.h
Reductions.h
SplitMatDist.h
B
Definition
test.cpp:53
combblas::CCGrid
Definition
CCGrid.h:7
combblas::CommGrid
Definition
CommGrid.h:45
combblas::DistEdgeList
Definition
DistEdgeList.h:82
combblas::SpParHelper::Print
static void Print(const std::string &s)
Definition
SpParHelper.cpp:836
nprocs
int nprocs
Definition
comms.cpp:55
comm_reduce
double comm_reduce
Definition
mpipspgemm.cpp:23
comp_split
double comp_split
Definition
mpipspgemm.cpp:28
ITERS
#define ITERS
Definition
mpipspgemm.cpp:32
comm_bcast
double comm_bcast
Definition
mpipspgemm.cpp:22
comm_split
double comm_split
Definition
mpipspgemm.cpp:30
comp_reduce
double comp_reduce
Definition
mpipspgemm.cpp:25
comp_result
double comp_result
Definition
mpipspgemm.cpp:26
comp_trans
double comp_trans
Definition
mpipspgemm.cpp:29
comp_summa
double comp_summa
Definition
mpipspgemm.cpp:24
comp_reduce_layer
double comp_reduce_layer
Definition
mpipspgemm.cpp:27
combblas
Definition
CCGrid.h:4
combblas::SplitMat
void SplitMat(CCGrid &CMG, SpDCCols< IT, NT > *localmat, SpDCCols< IT, NT > &splitmat, bool rowsplit=false)
Definition
SplitMatDist.h:144
combblas::multiply
SpDCCols< IT, NT > * multiply(SpDCCols< IT, NT > &splitA, SpDCCols< IT, NT > &splitB, CCGrid &CMG, bool isBT, bool threaded)
Definition
Multiplier.h:11
A
double A
stdint.h
time.h
3DSpGEMM
mpipspgemm.cpp
Generated by
1.9.8