1 #ifndef BP_MAXIMUM_MATCHING_H
2 #define BP_MAXIMUM_MATCHING_H
25 template <
class IT,
class DER>
29 IT procsPerRow = ri.commGrid->GetGridCols();
30 IT procsPerCol = ri.commGrid->GetGridRows();
33 IT global_nrow = ri.TotalLength();
34 IT global_ncol = ncol;
35 IT m_perprocrow = global_nrow / procsPerRow;
36 IT n_perproccol = global_ncol / procsPerCol;
43 std::vector< std::vector<IT> > rowid(procsPerRow);
44 std::vector< std::vector<IT> > colid(procsPerRow);
46 IT locvec = ri.arr.size();
47 IT roffset = ri.RowLenUntil();
48 for(
typename std::vector<IT>::size_type i=0; i< (unsigned)locvec; ++i)
50 if(ri.arr[i]>=0 && ri.arr[i]<ncol)
52 IT rowrec = (n_perproccol!=0) ? std::min(ri.arr[i] / n_perproccol, procsPerRow-1) : (procsPerRow-1);
54 rowid[rowrec].push_back( i + roffset);
55 colid[rowrec].push_back(ri.arr[i] - (rowrec * n_perproccol));
62 int * sendcnt =
new int[procsPerRow];
63 int * recvcnt =
new int[procsPerRow];
64 for(IT i=0; i<procsPerRow; ++i)
66 sendcnt[i] = rowid[i].size();
69 MPI_Alltoall(sendcnt, 1, MPI_INT, recvcnt, 1, MPI_INT, ri.commGrid->GetRowWorld());
71 int * sdispls =
new int[procsPerRow]();
72 int * rdispls =
new int[procsPerRow]();
73 partial_sum(sendcnt, sendcnt+procsPerRow-1, sdispls+1);
74 partial_sum(recvcnt, recvcnt+procsPerRow-1, rdispls+1);
75 IT p_nnz = accumulate(recvcnt,recvcnt+procsPerRow,
static_cast<IT
>(0));
78 IT * p_rows =
new IT[p_nnz];
79 IT * p_cols =
new IT[p_nnz];
80 IT * senddata =
new IT[locvec];
81 for(
int i=0; i<procsPerRow; ++i)
83 copy(rowid[i].begin(), rowid[i].end(), senddata+sdispls[i]);
84 std::vector<IT>().swap(rowid[i]);
86 MPI_Alltoallv(senddata, sendcnt, sdispls, MPIType<IT>(), p_rows, recvcnt, rdispls, MPIType<IT>(), ri.commGrid->GetRowWorld());
88 for(
int i=0; i<procsPerRow; ++i)
90 copy(colid[i].begin(), colid[i].end(), senddata+sdispls[i]);
91 std::vector<IT>().swap(colid[i]);
93 MPI_Alltoallv(senddata, sendcnt, sdispls, MPIType<IT>(), p_cols, recvcnt, rdispls, MPIType<IT>(), ri.commGrid->GetRowWorld());
96 std::tuple<IT,IT,bool> * p_tuples =
new std::tuple<IT,IT,bool>[p_nnz];
97 for(IT i=0; i< p_nnz; ++i)
99 p_tuples[i] = make_tuple(p_rows[i], p_cols[i], 1);
105 IT local_nrow = ri.MyRowLength();
106 int my_proccol = ri.commGrid->GetRankInProcRow();
107 IT local_ncol = (my_proccol<(procsPerCol-1))? (n_perproccol) : (global_ncol - (n_perproccol*(procsPerCol-1)));
111 DER_IT * PSeq =
new DER_IT();
112 PSeq->Create( p_nnz, local_nrow, local_ncol, p_tuples);
128 template <
typename IT>
132 IT nrow = mateRow2Col.TotalLength();
133 IT ncol = mateCol2Row.TotalLength();
138 while(col.getnnz()!=0)
142 row = EWiseApply<IT>(row, parentsRow,
143 [](IT root, IT parent){
return parent;},
144 [](IT root, IT parent){
return true;},
148 nextcol = EWiseApply<IT>(col, mateCol2Row,
149 [](IT child, IT mate){
return mate;},
150 [](IT child, IT mate){
return mate!=-1;},
152 mateRow2Col.
Set(row);
153 mateCol2Row.
Set(col);
167 template <
typename IT>
170 MPI_Win win_mateRow2Col, win_mateCol2Row, win_parentsRow;
171 MPI_Win_create((IT*)mateRow2Col.
GetLocArr(), mateRow2Col.
LocArrSize() *
sizeof(IT),
sizeof(IT), MPI_INFO_NULL, mateRow2Col.commGrid->GetWorld(), &win_mateRow2Col);
172 MPI_Win_create((IT*)mateCol2Row.
GetLocArr(), mateCol2Row.
LocArrSize() *
sizeof(IT),
sizeof(IT), MPI_INFO_NULL, mateCol2Row.commGrid->GetWorld(), &win_mateCol2Row);
173 MPI_Win_create((IT*)parentsRow.
GetLocArr(), parentsRow.
LocArrSize() *
sizeof(IT),
sizeof(IT), MPI_INFO_NULL, parentsRow.commGrid->GetWorld(), &win_parentsRow);
176 IT* leaves_ptr = (IT*) leaves.
GetLocArr();
181 IT row, col=100, nextrow;
182 int owner_row, owner_col;
183 IT locind_row, locind_col;
186 MPI_Comm_rank(MPI_COMM_WORLD, &myrank);
192 row = *(leaves_ptr+i);
196 owner_row = mateRow2Col.Owner(row, locind_row);
197 MPI_Win_lock(MPI_LOCK_SHARED, owner_row, 0, win_parentsRow);
198 MPI_Get(&col, 1, MPIType<IT>(), owner_row, locind_row, 1, MPIType<IT>(), win_parentsRow);
199 MPI_Win_unlock(owner_row, win_parentsRow);
201 owner_col = mateCol2Row.Owner(col, locind_col);
202 MPI_Win_lock(MPI_LOCK_SHARED, owner_col, 0, win_mateCol2Row);
203 MPI_Fetch_and_op(&row, &nextrow, MPIType<IT>(), owner_col, locind_col, MPI_REPLACE, win_mateCol2Row);
204 MPI_Win_unlock(owner_col, win_mateCol2Row);
206 MPI_Win_lock(MPI_LOCK_SHARED, owner_row, 0, win_mateRow2Col);
207 MPI_Put(&col, 1, MPIType<IT>(), owner_row, locind_row, 1, MPIType<IT>(), win_mateRow2Col);
208 MPI_Win_unlock(owner_row, win_mateRow2Col);
218 MPI_Win_free(&win_mateRow2Col);
219 MPI_Win_free(&win_mateCol2Row);
220 MPI_Win_free(&win_parentsRow);
229 template <
typename IT,
typename NT,
typename DER>
240 nthreads = omp_get_num_threads();
245 double tstart = MPI_Wtime();
247 MPI_Comm_size(MPI_COMM_WORLD,&nprocs);
248 MPI_Comm_rank(MPI_COMM_WORLD,&myrank);
250 IT nrow =
A.getnrow();
251 IT ncol =
A.getncol();
257 std::vector<std::vector<double> > timing;
258 std::vector<int> layers;
259 std::vector<int64_t> phaseMatched;
260 double t1, time_search, time_augment, time_phase;
269 MPI_Win_create((IT*)leaves.
GetLocArr(), leaves.
LocArrSize() *
sizeof(IT),
sizeof(IT), MPI_INFO_NULL,
A.getcommgrid()->GetWorld(), &winLeaves);
274 time_phase = MPI_Wtime();
276 std::vector<double> phase_timing(8,0);
277 leaves.
Apply ( [](IT val){
return (IT) -1;});
280 fringeCol = EWiseApply<VertexType>(fringeCol, mateCol2Row,
282 [](
VertexType vtx, IT mate){
return mate==-1;},
296 numUnmatchedCol = fringeCol.
getnnz();
300 time_search = MPI_Wtime();
301 while(fringeCol.
getnnz() > 0)
308 SpMV<WeightMaxMMSR<NT, VertexType>>(
A, fringeCol, fringeRow,
false, SPA);
310 SpMV<Select2ndMinSR<NT, VertexType>>(
A, fringeCol, fringeRow,
false, SPA);
311 phase_timing[0] += MPI_Wtime()-t1;
318 fringeRow = EWiseApply<VertexType>(fringeRow, parentsRow,
320 [](
VertexType vtx, IT parent){
return parent==-1;},
330 umFringeRow = EWiseApply<IT>(fringeRow, mateRow2Col,
331 [](
VertexType vtx, IT mate){
return vtx.root;},
332 [](
VertexType vtx, IT mate){
return mate==-1;},
335 phase_timing[1] += MPI_Wtime()-t1;
338 IT nnz_umFringeRow = umFringeRow.
getnnz();
341 if(nnz_umFringeRow >0)
356 temp1 = umFringeRow.
Invert(ncol);
361 phase_timing[2] += MPI_Wtime()-t1;
367 fringeRow = EWiseApply<VertexType>(fringeRow, mateRow2Col,
369 [](
VertexType vtx, IT mate){
return mate!=-1;},
373 if(nnz_umFringeRow>0 &&
prune)
375 fringeRow.FilterByVal (umFringeRow,[](
VertexType vtx){
return vtx.root;},
false);
377 double tprune = MPI_Wtime()-t1;
378 phase_timing[3] += tprune;
383 fringeCol = fringeRow.Invert(ncol,
385 [](
VertexType& vtx,
const IT & index){
return vtx;},
387 phase_timing[4] += MPI_Wtime()-t1;
393 time_search = MPI_Wtime() - time_search;
394 phase_timing[5] += time_search;
396 IT numMatchedCol = leaves.
Count([](IT leaf){
return leaf!=-1;});
397 phaseMatched.push_back(numMatchedCol);
398 time_augment = MPI_Wtime();
399 if (numMatchedCol== 0) matched =
false;
403 if(numMatchedCol < (2* nprocs * nprocs))
404 AugmentPath(mateRow2Col, mateCol2Row,parentsRow, leaves);
406 AugmentLevel(mateRow2Col, mateCol2Row,parentsRow, leaves);
408 time_augment = MPI_Wtime() - time_augment;
409 phase_timing[6] += time_augment;
411 time_phase = MPI_Wtime() - time_phase;
412 phase_timing[7] += time_phase;
413 timing.push_back(phase_timing);
415 layers.push_back(layer);
420 MPI_Win_free(&winLeaves);
430 std::cout <<
"****** maximum matching runtime ********\n";
431 std::cout << std::endl;
432 std::cout <<
"========================================================================\n";
433 std::cout <<
" BFS Search \n";
434 std::cout <<
"===================== ==================================================\n";
435 std::cout <<
"Phase Layer Match SpMV EWOpp CmUqL Prun CmMC BFS Aug Total\n";
436 std::cout <<
"===================== ===================================================\n";
438 std::vector<double> totalTimes(timing[0].
size(),0);
439 int nphases = timing.size();
440 for(
int i=0; i<timing.size(); i++)
442 printf(
" %3d %3d %8lld ", i+1, layers[i], phaseMatched[i]);
443 for(
int j=0; j<timing[i].size(); j++)
445 totalTimes[j] += timing[i][j];
447 printf(
"%.2lf ", timing[i][j]);
453 std::cout <<
"-----------------------------------------------------------------------\n";
454 std::cout <<
"Phase Layer UnMat SpMV EWOpp CmUqL Prun CmMC BFS Aug Total \n";
455 std::cout <<
"-----------------------------------------------------------------------\n";
457 combTime = totalTimes.back();
458 printf(
" %3d %3d %8lld ", nphases, totalLayer/nphases, numUnmatchedCol);
459 for(
int j=0; j<totalTimes.size()-1; j++)
461 printf(
"%.2lf ", totalTimes[j]);
463 printf(
"%.2lf\n", combTime);
466 IT nrows=
A.getnrow();
467 IT matchedRow = mateRow2Col.
Count([](IT mate){
return mate!=-1;});
470 std::cout <<
"***Final Maximum Matching***\n";
471 std::cout <<
"***Total-Rows Matched-Rows Total Time***\n";
472 printf(
"%lld %lld %lf \n",nrows, matchedRow, combTime);
473 printf(
"matched rows: %lld , which is: %lf percent \n",matchedRow, 100*(
double)matchedRow/(nrows));
474 std::cout <<
"-------------------------------------------------------\n\n";
FullyDistSpVec< IT, NT > Invert(IT globallen)
void ApplyInd(_BinaryOperation __binary_op)
void EWiseApply(const FullyDistVec< IT, NT2 > &other, _BinaryOperation __binary_op, _BinaryPredicate _do_op, const bool useExtendedBinOp)
std::shared_ptr< CommGrid > getcommgrid() const
const NT * GetLocArr() const
void Set(const FullyDistSpVec< IT, NT > &rhs)
IT Count(_Predicate pred) const
Return the number of elements for which pred is true.
void Apply(_UnaryOperation __unary_op)
void AugmentPath(FullyDistVec< IT, IT > &mateRow2Col, FullyDistVec< IT, IT > &mateCol2Row, FullyDistVec< IT, IT > &parentsRow, FullyDistVec< IT, IT > &leaves)
void maximumMatching(SpParMat< IT, NT, DER > &A, FullyDistVec< IT, IT > &mateRow2Col, FullyDistVec< IT, IT > &mateCol2Row, bool prune=true, bool randMM=false, bool maximizeWeight=false)
void AugmentLevel(FullyDistVec< IT, IT > &mateRow2Col, FullyDistVec< IT, IT > &mateCol2Row, FullyDistVec< IT, IT > &parentsRow, FullyDistVec< IT, IT > &leaves)
SpParMat< IT, bool, DER > PermMat(const FullyDistVec< IT, IT > &ri, const IT ncol)