9#ifndef ApproxWeightPerfectMatching_h
10#define ApproxWeightPerfectMatching_h
12#include "CombBLAS/CombBLAS.h"
15#include <parallel/algorithm>
16#include <parallel/numeric>
43template <
class IT,
class NT>
57 int * sdispls =
new int[
nprocs]();
58 int * rdispls =
new int[
nprocs]();
76 for(
int i=0; i<
nprocs; ++i)
91template <
class IT,
class NT>
105 int * sdispls =
new int[
nprocs]();
106 int * rdispls =
new int[
nprocs]();
123 for(
int i=0; i<
nprocs; ++i)
140template <
class IT,
class NT,
class DER>
143 auto commGrid =
A.getcommgrid();
144 int procrows = commGrid->GetGridRows();
145 int proccols = commGrid->GetGridCols();
147 IT m_perproc =
nrows / procrows;
148 IT n_perproc =
ncols / proccols;
151 pr = std::min(
static_cast<int>(
grow / m_perproc), procrows-1);
155 pc = std::min(
static_cast<int>(
gcol / n_perproc), proccols-1);
158 return commGrid->GetRank(pr, pc);
198 int * rdispls =
new int[
nprocs]();
225template <
class IT,
class NT>
235#pragma omp parallel for
237 for(
int k=0; k<
param.lncol; ++k)
245 for(
IT cp = colptr[k]; cp < colptr[k+1]; ++cp)
247 IT li = dcsc->ir[cp];
272template <
class IT,
class NT,
class DER>
278 auto commGrid =
A.getcommgrid();
280 int myrank=commGrid->GetRank();
281 int pr = commGrid->GetGridRows();
282 int pc = commGrid->GetGridCols();
288 int rowrank = commGrid->GetRankInProcRow();
289 int colrank = commGrid->GetRankInProcCol();
292 DER* spSeq =
A.seqptr();
293 IT localRowStart = colrank * m_perproc;
302 IT j =
lj + localColStart;
308 IT i =
li + localRowStart;
334 minw = 99999999999999.0;
361 int rowroot = commGrid->GetDiagOfProcRow();
362 int pc = commGrid->GetGridCols();
376 std::vector <int>
dpls(pc);
378 for(
int i=1; i<pc; i++)
396#define L2_CACHE_SIZE 256000
412template <
class IT,
class NT>
437 for(
int k=0; k<
param.lncol; ++k)
442 for(
IT cp = colptr[k]; cp < colptr[k+1]; ++cp)
444 IT li = dcsc->ir[cp];
457 for(
int i=0; i<
param.nprocs; i++)
467 std::vector<int> sdispls (
param.nprocs, 0);
485 for(
int k=0; k<
param.lncol; ++k)
491 for(
IT cp = colptr[k]; cp < colptr[k+1]; ++cp)
493 IT li = dcsc->ir[cp];
535 std::vector<int> rdispls (
param.nprocs, 0);
557template <
class IT,
class NT>
558std::vector< std::tuple<IT,IT,IT,NT> >
Phase2(
const AWPM_param<IT>&
param, std::vector<std::tuple<IT,IT,NT>>&
recvTuples,
Dcsc<IT, NT>* dcsc,
const std::vector<IT>& colptr,
const std::vector<IT>&
RepMateR2C,
const std::vector<IT>&
RepMateC2R,
const std::vector<NT>&
RepMateWR2C,
const std::vector<NT>&
RepMateWC2R )
567 std::vector<std::vector<std::tuple<IT,IT, IT, NT>>>
tempTuples1 (
param.nprocs);
578 nBins = omp_get_num_threads() * 4;
583#pragma omp parallel for
585 for(
int i=0; i<
nBins; i++)
634 for(
int i=0; i<
param.nprocs; i++)
644 std::vector<int> sdispls (
param.nprocs, 0);
654#pragma omp parallel for
656 for(
int i=0; i<
nBins; i++)
735 std::vector<int> rdispls (
param.nprocs, 0);
756template <
class IT,
class NT>
757std::vector<std::vector<std::tuple<IT,IT, IT, NT>>>
Phase2_old(
const AWPM_param<IT>&
param, std::vector<std::tuple<IT,IT,NT>>&
recvTuples,
Dcsc<IT, NT>* dcsc,
const std::vector<IT>& colptr,
const std::vector<IT>&
RepMateR2C,
const std::vector<IT>&
RepMateC2R,
const std::vector<NT>&
RepMateWR2C,
const std::vector<NT>&
RepMateWC2R )
760 std::vector<std::vector<std::tuple<IT,IT, IT, NT>>>
tempTuples1 (
param.nprocs);
768 if(colptr[
mi-
param.localColStart+1] > colptr[
mi-
param.localColStart] )
770 IT * ele = find(dcsc->ir+colptr[
mi -
param.localColStart], dcsc->ir+colptr[
mi -
param.localColStart+1],
mj -
param.localRowStart);
773 if (ele != dcsc->ir+colptr[
mi -
param.localColStart+1])
791template <
class IT,
class NT,
class DER>
797 auto commGrid =
A.getcommgrid();
798 int myrank=commGrid->GetRank();
802 int nprocs = commGrid->GetSize();
803 int pr = commGrid->GetGridRows();
804 int pc = commGrid->GetGridCols();
805 int rowrank = commGrid->GetRankInProcRow();
806 int colrank = commGrid->GetRankInProcCol();
807 int diagneigh = commGrid->GetComplementRank();
817 DER* spSeq =
A.seqptr();
819 IT lnrow = spSeq->getnrow();
820 IT lncol = spSeq->getncol();
821 IT localRowStart = colrank * m_perproc;
830 param.m_perproc = m_perproc;
831 param.n_perproc = n_perproc;
832 param.localRowStart = localRowStart;
833 param.localColStart = localColStart;
834 param.myrank = myrank;
849 MPI_Sendrecv(&
xsize, 1,
MPI_INT, diagneigh,
TRX, &
trxsize, 1,
MPI_INT, diagneigh,
TRX,
World, &
status);
851 MPI_Sendrecv(
mateCol2Row.GetLocArr(),
xsize,
MPIType<IT>(), diagneigh,
TRX,
trxnums.data(),
trxsize,
MPIType<IT>(), diagneigh,
TRX,
World, &
status);
857 std::vector<int>
dpls(pc,0);
877 std::vector<int>
rdpls(pr,0);
887 std::vector<IT> colptr (lncol+1,-1);
894 colptr[lncol] = spSeq->getnnz();
895 for(
IT k=lncol-1; k>=0; k--)
899 colptr[k] = colptr[k+1];
922 if(myrank==0) std::cout <<
"Iteration " <<
iterations <<
". matching weight: sum = "<<
weightCur <<
" min = " <<
minw << std::endl;
931 std::vector<std::tuple<IT,IT,IT,NT>>
recvTuples1 =
Phase2(
param,
recvTuples, dcsc, colptr,
RepMateR2C,
RepMateC2R,
RepMateWR2C,
RepMateWC2R );
939#pragma omp parallel for
941 for(
int k=0; k<lncol; ++k)
953 IT lj =
j - localColStart;
963 for(
int k=0; k<lncol; ++k)
991#pragma omp parallel for
993 for(
int k=0; k<lncol; ++k)
1021 for(
int k=0; k<lncol; ++k)
1050#pragma omp parallel for
1071#pragma omp parallel for
1081#pragma omp parallel for
1106 std::cout <<
"------------- overal timing (HWPM) -------------" << std::endl;
1109 std::cout <<
"-------------------------------------------------" << std::endl;
1121 template <
class IT,
class NT,
class DER>
1128 A.Apply([](
NT val){
return (
fabs(val));});
1139 A.Apply([](
NT val){
return log(val);});
1143 template <
class IT,
class NT>
1183 SpParHelper::Print(
"Maximal matching is not better that the natural ordering. Hence, keeping the natural ordering.\n");
1213 SpParHelper::Print(
"Warning: The Maximum Cardinality Matching did not return a perfect matching! Need to check the input matrix.\n");
1226 SpParHelper::Print(
"Warning: The HWPM code did not return a perfect matching! Need to check the input matrix.\n");
1231 SpParHelper::Print(
"AWPM is not better that the natural ordering. Hence, keeping the natural ordering.\n");
std::shared_ptr< CommGrid > commGrid
static void Print(const std::string &s)
bool CheckMatching(FullyDistVec< IT, IT > &mateRow2Col, FullyDistVec< IT, IT > &mateCol2Row)
int OwnerProcs(SpParMat< IT, NT, DER > &A, IT grow, IT gcol, IT nrows, IT ncols)
std::vector< std::tuple< IT, IT, NT > > Phase1(const AWPM_param< IT > ¶m, Dcsc< IT, NT > *dcsc, const std::vector< IT > &colptr, const std::vector< IT > &RepMateR2C, const std::vector< IT > &RepMateC2R, const std::vector< NT > &RepMateWR2C, const std::vector< NT > &RepMateWC2R)
std::vector< std::tuple< IT, IT, IT, NT > > Phase2(const AWPM_param< IT > ¶m, std::vector< std::tuple< IT, IT, NT > > &recvTuples, Dcsc< IT, NT > *dcsc, const std::vector< IT > &colptr, const std::vector< IT > &RepMateR2C, const std::vector< IT > &RepMateC2R, const std::vector< NT > &RepMateWR2C, const std::vector< NT > &RepMateWC2R)
NT MatchingWeight(std::vector< NT > &RepMateWC2R, MPI_Comm RowWorld, NT &minw)
void TransformWeight(SpParMat< IT, NT, DER > &A, bool applylog)
std::vector< std::tuple< IT, IT, NT > > ExchangeData(std::vector< std::vector< std::tuple< IT, IT, NT > > > &tempTuples, MPI_Comm World)
std::vector< std::tuple< IT, IT, IT, NT > > ExchangeData1(std::vector< std::vector< std::tuple< IT, IT, IT, NT > > > &tempTuples, MPI_Comm World)
NT Trace(SpParMat< IT, NT, DER > &A, IT &rettrnnz=0)
void UpdateMatching(FullyDistVec< IT, IT > &mateRow2Col, FullyDistVec< IT, IT > &mateCol2Row, std::vector< IT > &RepMateR2C, std::vector< IT > &RepMateC2R)
void maximumMatching(SpParMat< IT, NT, DER > &A, FullyDistVec< IT, IT > &mateRow2Col, FullyDistVec< IT, IT > &mateCol2Row, bool prune=true, bool randMM=false, bool maximizeWeight=false)
int ThreadBuffLenForBinning(int itemsize, int nbins)
std::vector< std::tuple< IT, IT > > MateBcast(std::vector< std::tuple< IT, IT > > sendTuples, MPI_Comm World)
void ReplicateMateWeights(const AWPM_param< IT > ¶m, Dcsc< IT, NT > *dcsc, const std::vector< IT > &colptr, std::vector< IT > &RepMateC2R, std::vector< NT > &RepMateWR2C, std::vector< NT > &RepMateWC2R)
std::vector< std::vector< std::tuple< IT, IT, IT, NT > > > Phase2_old(const AWPM_param< IT > ¶m, std::vector< std::tuple< IT, IT, NT > > &recvTuples, Dcsc< IT, NT > *dcsc, const std::vector< IT > &colptr, const std::vector< IT > &RepMateR2C, const std::vector< IT > &RepMateC2R, const std::vector< NT > &RepMateWR2C, const std::vector< NT > &RepMateWC2R)
void TwoThirdApprox(SpParMat< IT, NT, DER > &A, FullyDistVec< IT, IT > &mateRow2Col, FullyDistVec< IT, IT > &mateCol2Row)
void WeightedGreedy(Par_MAT_Double &A, FullyDistVec< IT, IT > &mateRow2Col, FullyDistVec< IT, IT > &mateCol2Row, FullyDistVec< IT, IT > °Col)
void AWPM(SpParMat< IT, NT, SpDCCols< IT, NT > > &A1, FullyDistVec< IT, IT > &mateRow2Col, FullyDistVec< IT, IT > &mateCol2Row, bool optimizeProd=true, bool weightedCard=true)
std::shared_ptr< CommGrid > commGrid