COMBINATORIAL_BLAS 1.6
 
Loading...
Searching...
No Matches
BcastTest.cpp
Go to the documentation of this file.
1#include <mpi.h>
2#include <sys/time.h>
3#include <iostream>
4#include <functional>
5#include <algorithm>
6#include <vector>
7#include <sstream>
8#include "CombBLAS/CombBLAS.h"
9#include "CombBLAS/CommGrid3D.h"
10#include "CombBLAS/SpParMat3D.h"
11#include "CombBLAS/ParFriends.h"
12
13using namespace std;
14using namespace combblas;
15
16#define EPS 0.0001
17
18#ifdef TIMING
21#endif
22
23#ifdef _OPENMP
25#else
27#endif
28
29
30// Simple helper class for declarations: Just the numerical type is templated
31// The index type and the sequential matrix type stays the same for the whole code
32// In this case, they are "int" and "SpDCCols"
33template <class NT>
40
41int main(int argc, char* argv[])
42{
43 int nprocs, myrank;
44 MPI_Init(&argc, &argv);
47
48 if(argc < 2){
49 if(myrank == 0)
50 {
51 cout << "Usage: ./<Binary> <MatrixA> " << endl;
52 }
54 return -1;
55 }
56 else {
57 string Aname(argv[1]);
58
60 fullWorld.reset( new CommGrid(MPI_COMM_WORLD, 0, 0) );
61
64
65 M.ParallelReadMM(Aname, true, maximum<double>());
66 FullyDistVec<int64_t, int64_t> p( M.getcommgrid() );
67 p.iota(M.getnrow(), 0);
68 p.RandPerm();
69 (M)(p,p,true);// in-place permute to save memory
70
71 //M.ReadGeneralizedTuples(Aname, maximum<double>());
72
74 //SpParMat3D<int64_t, double, SpDCCols < int64_t, double >> A3D(A, 4, true, false);
76 //SpParMat3D<int64_t, double, SpDCCols < int64_t, double >> B3D(B, 4, false, false);
77
79 typedef int64_t IT;
80 typedef double NT;
82
83 double Abcasttime = 0;
84 double Abcasttime_prev;
85 double Bbcasttime = 0;
86 double Bbcasttime_prev;
87 double t0, t1;
88
89 int dummy, stages;
90 std::shared_ptr<CommGrid> GridC = ProductGrid((A.getcommgrid()).get(), (B.getcommgrid()).get(), stages, dummy, dummy);
91
92 //int buffsize = 1024 * 1024 * (512 / sizeof(IT));
93 //if(myrank == 0) fprintf(stderr, "Memory to be allocated %d\n", buffsize);
94 //IT * sendbuf = new IT[buffsize];
95 //if(myrank == 0) fprintf(stderr, "Memory allocated\n");
96
97 for(int phases = 1; phases <= 256; phases = phases * 2){
98 if(myrank == 0) fprintf(stderr, "Running with phase: %d\n", phases);
99 for(int it = 0; it < 3; it++){
100 Abcasttime = 0;
101 Bbcasttime = 0;
102
103 std::vector< DER > PiecesOfB;
104 DER CopyB = *(B.seqptr()); // we allow alias matrices as input because of this local copy
105
106 CopyB.ColSplit(phases, PiecesOfB); // CopyB's memory is destroyed at this point
107 MPI_Barrier(GridC->GetWorld());
108
109 IT ** ARecvSizes = SpHelper::allocate2D<IT>(DER::esscount, stages);
110 IT ** BRecvSizes = SpHelper::allocate2D<IT>(DER::esscount, stages);
111
112 SpParHelper::GetSetSizes( *(A.seqptr()), ARecvSizes, (A.getcommgrid())->GetRowWorld());
113
114 // Remotely fetched matrices are stored as pointers
115 DER * ARecv;
116 DER * BRecv;
117
118 int Aself = (A.getcommgrid())->GetRankInProcRow();
119 int Bself = (B.getcommgrid())->GetRankInProcCol();
120
121 //int chunksize = buffsize / phases;
122 //if(myrank == 0) fprintf(stderr, "chunksize: %d\n", chunksize);
123
124 for(int p = 0; p < phases; ++p)
125 {
126 SpParHelper::GetSetSizes( PiecesOfB[p], BRecvSizes, (B.getcommgrid())->GetColWorld());
127 for(int i = 0; i < stages; ++i)
128 {
129 //t0 = MPI_Wtime();
130 //MPI_Bcast(sendbuf+(chunksize*p), chunksize, MPIType<IT>(), i, GridC->GetColWorld());
131 //t1 = MPI_Wtime();
132 //Bbcasttime += (t1-t0);
133
134 std::vector<IT> ess;
135
136 if(i == Aself) ARecv = A.seqptr(); // shallow-copy
137 else {
138 ess.resize(DER::esscount);
139 for(int j=0; j< DER::esscount; ++j)
140 ess[j] = ARecvSizes[j][i]; // essentials of the ith matrix in this row
141 ARecv = new DER(); // first, create the object
142 }
143 MPI_Barrier(A.getcommgrid()->GetWorld());
144 t0=MPI_Wtime();
145 //SpParHelper::BCastMatrix(GridC->GetRowWorld(), *ARecv, ess, i); // then, receive its elements
146 MPI_Barrier(A.getcommgrid()->GetWorld());
147 t1=MPI_Wtime();
148 Abcasttime += (t1-t0);
149
150 ess.clear();
151
152 if(i == Bself) BRecv = &(PiecesOfB[p]); // shallow-copy
153 else {
154 ess.resize(DER::esscount);
155 for(int j=0; j< DER::esscount; ++j)
156 ess[j] = BRecvSizes[j][i];
157 BRecv = new DER();
158 }
159 MPI_Barrier(A.getcommgrid()->GetWorld());
160 t0=MPI_Wtime();
161 SpParHelper::BCastMatrix(GridC->GetColWorld(), *BRecv, ess, i); // then, receive its elements
162 MPI_Barrier(A.getcommgrid()->GetWorld());
163 t1=MPI_Wtime();
164 Bbcasttime += (t1-t0);
165
166 //if(i != Aself){
167 //if(ARecv != NULL) delete ARecv;
168 //}
169 if(i != Bself) {
170 if(BRecv != NULL) delete BRecv;
171 }
172
173 } // all stages executed
174
175 }
176
177
178 SpHelper::deallocate2D(ARecvSizes, DER::esscount);
179 SpHelper::deallocate2D(BRecvSizes, DER::esscount);
180 if(myrank == 0){
181 fprintf(stderr, "Iteration : %d - Abcasttime: %lf\n", it, Abcasttime);
182 fprintf(stderr, "Iteration : %d - Bbcasttime: %lf\n", it, Bbcasttime);
183 }
184 }
185 if(myrank == 0) fprintf(stderr, "\n\n++++++++++++++++++++++++++++++++++++++++++++\n\n\n\n");
186 }
187 }
188 MPI_Finalize();
189 return 0;
190}
int cblas_splits
Definition BcastTest.cpp:26
int64_t IT
double NT
double cblas_allgathertime
Definition DirOptBFS.cpp:58
double cblas_alltoalltime
Definition DirOptBFS.cpp:57
int main()
Definition Driver.cpp:12
Definition test.cpp:53
SpParMat< int64_t, NT, DCCols > MPI_DCCols
Definition BcastTest.cpp:38
SpDCCols< int64_t, NT > DCCols
Definition BcastTest.cpp:37
static void deallocate2D(T **array, I m)
Definition SpHelper.h:249
static void GetSetSizes(const SpMat< IT, NT, DER > &Matrix, IT **&sizes, MPI_Comm &comm1d)
static void BCastMatrix(MPI_Comm &comm1d, SpMat< IT, NT, DER > &Matrix, const std::vector< IT > &essentials, int root)
int nprocs
Definition comms.cpp:55
shared_ptr< CommGrid > ProductGrid(CommGrid *gridA, CommGrid *gridB, int &innerdim, int &Aoffset, int &Boffset)
Definition CommGrid.cpp:164
double A