29#ifndef _BFS_FRIENDS_H_
30#define _BFS_FRIENDS_H_
46template <
class IT,
class NT,
class DER>
58template <
typename IT,
typename VT>
63 if(
A.getnnz() > 0 &&
nnzx > 0)
65 int splits =
A.getnsplit();
68 std::vector< std::vector<int32_t> >
indy(splits);
69 std::vector< std::vector< VT > >
numy(splits);
74 #pragma omp parallel for
76 for(
int i=0; i<splits; ++i)
89 std::vector<int32_t>
end_recs(splits);
90 for(
int i=0; i<splits; ++i)
100 #pragma omp parallel for
102 for(
int i=0; i<splits; ++i)
109 for(
typename std::vector<int32_t>::iterator
it =
indy[i].begin();
it !=
indy[i].end(); ++
it)
121 #pragma omp parallel for
123 for(
int i=0; i<splits; ++i)
144 for(
typename std::vector<int32_t>::iterator
it =
indy[i].begin();
it !=
indy[i].end(); ++
it)
162 for(
int i=0; i< splits; ++i)
164 for(
int j=0;
j< p_c; ++
j)
172 std::cout <<
"Something is wrong, splits should be nonzero for multithreaded execution" << std::endl;
183template<
typename VT,
typename IT,
typename UDER>
192 if(
A.spSeq->getnsplit() > 0)
196 generic_gespmv_threaded_setbuffers< Select2ndSRing<bool, VT, VT> > (*(
A.spSeq),
indacc,
numacc, (
int32_t)
accnz,
optbuf.inds,
optbuf.nums,
sendcnt,
optbuf.dspls,
rowneighs);
201 if(
A.getlocalnnz() > 0 &&
accnz > 0)
213 SpParHelper::Print(
"BFS only (no semiring) function only work with optimization buffers\n");
223template <
typename IU,
typename VT>
230 std::vector<IU>().swap(
y.ind);
231 std::vector<VT>().swap(
y.num);
235 bool * isthere =
new bool[
ysize];
236 std::vector< std::pair<IU,VT> >
ts_pairs;
237 std::fill_n(isthere,
ysize,
false);
257 for(
int i=0; i<
nnzy; ++i)
266 int32_t inf = std::numeric_limits<int32_t>::min();
267 int32_t sup = std::numeric_limits<int32_t>::max();
283 y.ind.push_back(
static_cast<IU>(key));
300 if(
y.ind.back() !=
static_cast<IU>(key))
302 y.ind.push_back(
static_cast<IU>(key));
327template <
typename VT,
typename IT,
typename UDER>
368 rdispls[i+1] = rdispls[i] +
recvcnt[i];
385 SpParHelper::Print(
"BFS only (no semiring) function only work with optimization buffers\n");
396template <
typename VT,
typename IT,
typename UDER>
398 std::shared_ptr<CommGrid> cg =
A.getcommgrid();
402 int numcols = cg->GetGridCols();
435template <
typename VT,
typename IT>
441 std::pair<IT,IT>* recv_buff =
new std::pair<IT,IT>[
recv_words>>1];
446#pragma omp parallel for
449 parents.SetLocalElement(recv_buff[i].first, recv_buff[i].second);
457template <
typename VT,
typename IT,
typename UDER>
458void BottomUpStep(
SpParMat<IT,bool,UDER> &
A,
FullyDistSpVec<IT,VT> &
x,
BitMapFringe<int64_t,int64_t> &
bm_fringe,
FullyDistVec<IT,VT> &
parents,
BitMapCarousel<IT,VT> &
done,
SpDCCols<int,bool>::SpColIter*
starts)
460 std::shared_ptr<CommGrid> cg =
A.getcommgrid();
468 int diagneigh = cg->GetComplementRank();
469 MPI_Sendrecv(&
my_coluntil, 1,
MPIType<IT>(), diagneigh,
TROST, &
coluntil, 1,
MPIType<IT>(), diagneigh,
TROST,
World, &
status);
484 int numcols = cg->GetGridCols();
485 int mycol = cg->GetRankInProcRow();
499 int id = omp_get_thread_num();
562 done.RotateAlongRow();
double cblas_transvectime
double cblas_localspmvtime
double cblas_alltoalltime
double cblas_mergeconttime
std::shared_ptr< CommGrid > commGrid
Iterate over the nonzeros of the sparse column.
Iterate over (sparse) columns of the sparse matrix.
static void Print(const std::string &s)
SpDCCols< int, bool >::SpColIter * CalcSubStarts(SpParMat< IT, bool, UDER > &A, FullyDistSpVec< IT, VT > &x, BitMapCarousel< IT, VT > &done)
void BottomUpStep(SpParMat< IT, bool, UDER > &A, FullyDistSpVec< IT, VT > &x, BitMapFringe< int64_t, int64_t > &bm_fringe, FullyDistVec< IT, VT > &parents, BitMapCarousel< IT, VT > &done, SpDCCols< int, bool >::SpColIter *starts)
void MergeContributions(FullyDistSpVec< IU, VT > &y, int *&recvcnt, int *&rdispls, int32_t *&recvindbuf, VT *&recvnumbuf, int rowneighs)
void CheckSpMVCompliance(const MATRIX &A, const VECTOR &x)
void AllGatherVector(MPI_Comm &ColWorld, int trxlocnz, IU lenuntil, int32_t *&trxinds, NV *&trxnums, int32_t *&indacc, NV *&numacc, int &accnz, bool indexisvalue)
void UpdateParents(MPI_Comm &RowWorld, std::pair< IT, IT > *updates, int num_updates, FullyDistVec< IT, VT > &parents, int source, int dest, BitMapFringe< int64_t, int64_t > &bm_fringe)
void TransposeVector(MPI_Comm &World, const FullyDistSpVec< IU, NV > &x, int32_t &trxlocnz, IU &lenuntil, int32_t *&trxinds, NV *&trxnums, bool indexisvalue)
void LocalSpMV(const SpParMat< IT, bool, UDER > &A, int rowneighs, OptBuf< int32_t, VT > &optbuf, int32_t *&indacc, VT *&numacc, int *sendcnt, int accnz)
void dcsc_gespmv_threaded_setbuffers(const SpDCCols< IT, bool > &A, const int32_t *indx, const VT *numx, int32_t nnzx, int32_t *sendindbuf, VT *sendnumbuf, int *cnts, int *dspls, int p_c)