1#ifndef BP_MAXIMUM_MATCHING_H
2#define BP_MAXIMUM_MATCHING_H
4#include "CombBLAS/CombBLAS.h"
25template <
class IT,
class DER>
26SpParMat<IT, bool, DER>
PermMat (
const FullyDistVec<IT,IT> & ri,
const IT ncol)
30 IT procsPerCol = ri.commGrid->GetGridRows();
33 IT global_nrow = ri.TotalLength();
34 IT global_ncol = ncol;
35 IT m_perprocrow = global_nrow / procsPerRow;
36 IT n_perproccol = global_ncol / procsPerCol;
43 std::vector< std::vector<IT> > rowid(procsPerRow);
44 std::vector< std::vector<IT> > colid(procsPerRow);
46 IT locvec = ri.arr.size();
47 IT roffset = ri.RowLenUntil();
48 for(
typename std::vector<IT>::size_type i=0; i< (unsigned)locvec; ++i)
50 if(ri.arr[i]>=0 && ri.arr[i]<ncol)
52 IT rowrec = (n_perproccol!=0) ? std::min(ri.arr[i] / n_perproccol, procsPerRow-1) : (procsPerRow-1);
54 rowid[rowrec].push_back( i + roffset);
55 colid[rowrec].push_back(ri.arr[i] - (rowrec * n_perproccol));
62 int * sendcnt =
new int[procsPerRow];
63 int * recvcnt =
new int[procsPerRow];
64 for(
IT i=0; i<procsPerRow; ++i)
66 sendcnt[i] = rowid[i].size();
69 MPI_Alltoall(sendcnt, 1, MPI_INT, recvcnt, 1, MPI_INT, ri.commGrid->GetRowWorld());
71 int * sdispls =
new int[procsPerRow]();
72 int * rdispls =
new int[procsPerRow]();
73 partial_sum(sendcnt, sendcnt+procsPerRow-1, sdispls+1);
74 partial_sum(recvcnt, recvcnt+procsPerRow-1, rdispls+1);
75 IT p_nnz = accumulate(recvcnt,recvcnt+procsPerRow,
static_cast<IT>(0));
78 IT * p_rows =
new IT[p_nnz];
79 IT * p_cols =
new IT[p_nnz];
80 IT * senddata =
new IT[locvec];
81 for(
int i=0; i<procsPerRow; ++i)
83 copy(rowid[i].begin(), rowid[i].end(), senddata+sdispls[i]);
84 std::vector<IT>().swap(rowid[i]);
86 MPI_Alltoallv(senddata, sendcnt, sdispls, MPIType<IT>(), p_rows, recvcnt, rdispls, MPIType<IT>(), ri.commGrid->GetRowWorld());
88 for(
int i=0; i<procsPerRow; ++i)
90 copy(colid[i].begin(), colid[i].end(), senddata+sdispls[i]);
91 std::vector<IT>().swap(colid[i]);
93 MPI_Alltoallv(senddata, sendcnt, sdispls, MPIType<IT>(), p_cols, recvcnt, rdispls, MPIType<IT>(), ri.commGrid->GetRowWorld());
96 std::tuple<IT,IT,bool> * p_tuples =
new std::tuple<IT,IT,bool>[p_nnz];
97 for(
IT i=0; i< p_nnz; ++i)
99 p_tuples[i] = make_tuple(p_rows[i], p_cols[i], 1);
105 IT local_nrow = ri.MyRowLength();
106 int my_proccol = ri.commGrid->GetRankInProcRow();
107 IT local_ncol = (my_proccol<(procsPerCol-1))? (n_perproccol) : (global_ncol - (n_perproccol*(procsPerCol-1)));
110 typedef typename create_trait<DER, IT, bool>::T_inferred DER_IT;
111 DER_IT * PSeq =
new DER_IT();
112 PSeq->Create( p_nnz, local_nrow, local_ncol, p_tuples);
114 SpParMat<IT,bool,DER_IT> P (PSeq, ri.commGrid);
128template <
typename IT>
129void AugmentLevel(FullyDistVec<IT, IT>& mateRow2Col, FullyDistVec<IT, IT>& mateCol2Row, FullyDistVec<IT, IT>& parentsRow, FullyDistVec<IT, IT>& leaves)
132 IT nrow = mateRow2Col.TotalLength();
133 IT ncol = mateCol2Row.TotalLength();
134 FullyDistSpVec<IT, IT> col(leaves, [](
IT leaf){
return leaf!=-1;});
135 FullyDistSpVec<IT, IT> row(mateRow2Col.getcommgrid(), nrow);
136 FullyDistSpVec<IT, IT> nextcol(col.getcommgrid(), ncol);
138 while(col.getnnz()!=0)
141 row = col.Invert(nrow);
142 row = EWiseApply<IT>(row, parentsRow,
143 [](
IT root,
IT parent){
return parent;},
144 [](
IT root,
IT parent){
return true;},
147 col = row.Invert(ncol);
148 nextcol = EWiseApply<IT>(col, mateCol2Row,
149 [](
IT child,
IT mate){
return mate;},
150 [](
IT child,
IT mate){
return mate!=-1;},
152 mateRow2Col.Set(row);
153 mateCol2Row.Set(col);
167template <
typename IT>
168void AugmentPath(FullyDistVec<IT, IT>& mateRow2Col, FullyDistVec<IT, IT>& mateCol2Row,FullyDistVec<IT, IT>& parentsRow, FullyDistVec<IT, IT>& leaves)
170 MPI_Win win_mateRow2Col, win_mateCol2Row, win_parentsRow;
171 MPI_Win_create((
IT*)mateRow2Col.GetLocArr(), mateRow2Col.LocArrSize() *
sizeof(
IT),
sizeof(
IT), MPI_INFO_NULL, mateRow2Col.commGrid->GetWorld(), &win_mateRow2Col);
172 MPI_Win_create((
IT*)mateCol2Row.GetLocArr(), mateCol2Row.LocArrSize() *
sizeof(
IT),
sizeof(
IT), MPI_INFO_NULL, mateCol2Row.commGrid->GetWorld(), &win_mateCol2Row);
173 MPI_Win_create((
IT*)parentsRow.GetLocArr(), parentsRow.LocArrSize() *
sizeof(
IT),
sizeof(
IT), MPI_INFO_NULL, parentsRow.commGrid->GetWorld(), &win_parentsRow);
176 IT* leaves_ptr = (
IT*) leaves.GetLocArr();
181 IT row, col=100, nextrow;
182 int owner_row, owner_col;
183 IT locind_row, locind_col;
186 MPI_Comm comm = mateRow2Col.getcommgrid()->GetWorld();
187 MPI_Comm_rank(comm,&myrank);
190 for(
IT i=0; i<leaves.LocArrSize(); i++)
193 row = *(leaves_ptr+i);
197 owner_row = mateRow2Col.Owner(row, locind_row);
198 MPI_Win_lock(MPI_LOCK_SHARED, owner_row, 0, win_parentsRow);
199 MPI_Get(&col, 1, MPIType<IT>(), owner_row, locind_row, 1, MPIType<IT>(), win_parentsRow);
200 MPI_Win_unlock(owner_row, win_parentsRow);
202 owner_col = mateCol2Row.Owner(col, locind_col);
203 MPI_Win_lock(MPI_LOCK_SHARED, owner_col, 0, win_mateCol2Row);
204 MPI_Fetch_and_op(&row, &nextrow, MPIType<IT>(), owner_col, locind_col, MPI_REPLACE, win_mateCol2Row);
205 MPI_Win_unlock(owner_col, win_mateCol2Row);
207 MPI_Win_lock(MPI_LOCK_SHARED, owner_row, 0, win_mateRow2Col);
208 MPI_Put(&col, 1, MPIType<IT>(), owner_row, locind_row, 1, MPIType<IT>(), win_mateRow2Col);
209 MPI_Win_unlock(owner_row, win_mateRow2Col);
219 MPI_Win_free(&win_mateRow2Col);
220 MPI_Win_free(&win_mateCol2Row);
221 MPI_Win_free(&win_parentsRow);
230template <
typename IT,
typename NT,
typename DER>
231void maximumMatching(SpParMat < IT, NT, DER > &
A, FullyDistVec<IT, IT>& mateRow2Col,
232 FullyDistVec<IT, IT>& mateCol2Row,
bool prune=
true,
bool randMM =
false,
bool maximizeWeight =
false)
241 nthreads = omp_get_num_threads();
244 PreAllocatedSPA<VertexType> SPA(
A.seq(), nthreads*4);
246 double tstart = MPI_Wtime();
248 MPI_Comm comm =
A.getcommgrid()->GetWorld();
249 MPI_Comm_size(comm,&
nprocs);
250 MPI_Comm_rank(comm,&myrank);
252 IT nrow =
A.getnrow();
253 IT ncol =
A.getncol();
255 FullyDistSpVec<IT, VertexType> fringeRow(
A.getcommgrid(), nrow);
256 FullyDistSpVec<IT, IT> umFringeRow(
A.getcommgrid(), nrow);
257 FullyDistVec<IT, IT> leaves (
A.getcommgrid(), ncol, (
IT) -1);
259 std::vector<std::vector<double> > timing;
260 std::vector<int> layers;
261 std::vector<int64_t> phaseMatched;
262 double t1, time_search, time_augment, time_phase;
271 MPI_Win_create((
IT*)leaves.GetLocArr(), leaves.LocArrSize() *
sizeof(
IT),
sizeof(
IT), MPI_INFO_NULL,
A.getcommgrid()->GetWorld(), &winLeaves);
276 time_phase = MPI_Wtime();
278 std::vector<double> phase_timing(8,0);
279 leaves.Apply ( [](
IT val){
return (
IT) -1;});
280 FullyDistVec<IT, IT> parentsRow (
A.getcommgrid(), nrow, (
IT) -1);
281 FullyDistSpVec<IT, VertexType> fringeCol(
A.getcommgrid(), ncol);
282 fringeCol = EWiseApply<VertexType>(fringeCol, mateCol2Row,
298 numUnmatchedCol = fringeCol.getnnz();
302 time_search = MPI_Wtime();
303 while(fringeCol.getnnz() > 0)
310 SpMV<WeightMaxMMSR<NT, VertexType>>(
A, fringeCol, fringeRow,
false, SPA);
312 SpMV<Select2ndMinSR<NT, VertexType>>(
A, fringeCol, fringeRow,
false, SPA);
313 phase_timing[0] += MPI_Wtime()-t1;
320 fringeRow = EWiseApply<VertexType>(fringeRow, parentsRow,
326 parentsRow.EWiseApply(fringeRow,
332 umFringeRow = EWiseApply<IT>(fringeRow, mateRow2Col,
337 phase_timing[1] += MPI_Wtime()-t1;
340 IT nnz_umFringeRow = umFringeRow.getnnz();
343 if(nnz_umFringeRow >0)
357 FullyDistSpVec<IT, IT> temp1(
A.getcommgrid(), ncol);
358 temp1 = umFringeRow.Invert(ncol);
363 phase_timing[2] += MPI_Wtime()-t1;
369 fringeRow = EWiseApply<VertexType>(fringeRow, mateRow2Col,
375 if(nnz_umFringeRow>0 &&
prune)
377 fringeRow.FilterByVal (umFringeRow,[](
VertexType vtx){
return vtx.
root;},
false);
379 double tprune = MPI_Wtime()-t1;
380 phase_timing[3] += tprune;
385 fringeCol = fringeRow.Invert(ncol,
389 phase_timing[4] += MPI_Wtime()-t1;
395 time_search = MPI_Wtime() - time_search;
396 phase_timing[5] += time_search;
398 IT numMatchedCol = leaves.Count([](
IT leaf){
return leaf!=-1;});
399 phaseMatched.push_back(numMatchedCol);
400 time_augment = MPI_Wtime();
401 if (numMatchedCol== 0) matched =
false;
406 AugmentPath(mateRow2Col, mateCol2Row,parentsRow, leaves);
408 AugmentLevel(mateRow2Col, mateCol2Row,parentsRow, leaves);
410 time_augment = MPI_Wtime() - time_augment;
411 phase_timing[6] += time_augment;
413 time_phase = MPI_Wtime() - time_phase;
414 phase_timing[7] += time_phase;
415 timing.push_back(phase_timing);
417 layers.push_back(layer);
422 MPI_Win_free(&winLeaves);
433 std::cout <<
"****** maximum matching runtime ********\n";
434 std::cout << std::endl;
435 std::cout <<
"========================================================================\n";
436 std::cout <<
" BFS Search \n";
437 std::cout <<
"===================== ==================================================\n";
438 std::cout <<
"Phase Layer Match SpMV EWOpp CmUqL Prun CmMC BFS Aug Total\n";
439 std::cout <<
"===================== ===================================================\n";
441 std::vector<double> totalTimes(timing[0].
size(),0);
442 int nphases = timing.size();
443 for(
int i=0; i<timing.size(); i++)
445 printf(
" %3d %3d %8lld ", i+1, layers[i], phaseMatched[i]);
446 for(
int j=0; j<timing[i].size(); j++)
448 totalTimes[j] += timing[i][j];
450 printf(
"%.2lf ", timing[i][j]);
456 std::cout <<
"-----------------------------------------------------------------------\n";
457 std::cout <<
"Phase Layer UnMat SpMV EWOpp CmUqL Prun CmMC BFS Aug Total \n";
458 std::cout <<
"-----------------------------------------------------------------------\n";
460 combTime = totalTimes.back();
461 printf(
" %3d %3d %8lld ", nphases, totalLayer/nphases, numUnmatchedCol);
462 for(
int j=0; j<totalTimes.size()-1; j++)
464 printf(
"%.2lf ", totalTimes[j]);
466 printf(
"%.2lf\n", combTime);
470 IT nrows=
A.getnrow();
471 IT matchedRow = mateRow2Col.Count([](
IT mate){
return mate!=-1;});
475 std::cout <<
"***Final Maximum Matching***\n";
476 std::cout <<
"***Total-Rows Matched-Rows Total Time***\n";
477 printf(
"%lld %lld %lf \n",nrows, matchedRow, combTime);
478 printf(
"matched rows: %lld , which is: %lf percent \n",matchedRow, 100*(
double)matchedRow/(nrows));
479 std::cout <<
"-------------------------------------------------------\n\n";
std::shared_ptr< CommGrid > commGrid
void AugmentPath(FullyDistVec< IT, IT > &mateRow2Col, FullyDistVec< IT, IT > &mateCol2Row, FullyDistVec< IT, IT > &parentsRow, FullyDistVec< IT, IT > &leaves)
void maximumMatching(SpParMat< IT, NT, DER > &A, FullyDistVec< IT, IT > &mateRow2Col, FullyDistVec< IT, IT > &mateCol2Row, bool prune=true, bool randMM=false, bool maximizeWeight=false)
void AugmentLevel(FullyDistVec< IT, IT > &mateRow2Col, FullyDistVec< IT, IT > &mateCol2Row, FullyDistVec< IT, IT > &parentsRow, FullyDistVec< IT, IT > &leaves)
SpParMat< IT, bool, DER > PermMat(const FullyDistVec< IT, IT > &ri, const IT ncol)