1#ifndef _MULTIWAY_MERGE_H_
2#define _MULTIWAY_MERGE_H_
43#pragma omp for schedule(static)
45 for (
int i=0; i<
size; i++)
56 for(
int i=0; i<(
ithread+1); i++)
62#pragma omp for schedule(static)
64 for (
int i=0; i<
size; i++)
84template <
typename RT,
typename IT,
typename NT>
91#pragma omp parallel for
107 template <
typename RT,
typename IT,
typename NT>
114 std::tuple<IT,IT,NT>* start =
spTuples->tuples;
120 std::tuple<IT,IT,NT>*
it = std::lower_bound (start, end,
search_tuple, comp);
130template<
class IT,
class NT>
135 std::vector<std::tuple<IT, IT, int>>
heap(
nlists);
138 for(
int i=0; i<
nlists; ++i)
184template<
class SR,
class IT,
class NT>
189 std::vector<std::tuple<IT, IT, int>>
heap(
nlists);
193 for(
int i=0; i<
nlists; ++i)
237 template<
class IT,
class NT>
260 for(
int i=0; i<
nlists; i++)
280 for(
int i=0; i<
nlists; i++)
320 template<
class SR,
class IT,
class NT>
345 for(
int i=0; i<
nlists; i++)
389 for (
size_t j=0;
j < index; ++
j)
411template<
class SR,
class IT,
class NT>
431 std::tuple<IT, IT, NT>*
mergeTups =
new std::tuple<IT, IT, NT>[
ArrSpTups[0]->getnnz()];
433#pragma omp parallel for
435 for(
int i=0; i<
ArrSpTups[0]->getnnz(); i++)
443 for(
int i=0; i<
nlists; ++i)
447 std::cerr <<
"Dimensions of SpTuples do not match on multiwayMerge()" << std::endl;
461 std::vector< std::vector<IT> >
colPtrs;
462 for(
int i=0; i<
nlists; i++)
471#pragma omp parallel for schedule(dynamic)
476 IT t =
static_cast<IT>(0);
496 std::ostringstream
outs;
497 outs <<
"Multiwaymerge: inputNnz/mergedNnz = " <<
ratio << std::endl;
507#pragma omp parallel for schedule(dynamic)
520 for(
int i=0; i<
nlists; i++)
536 template<
class SR,
class IT,
class NT>
556 std::tuple<IT, IT, NT>*
mergeTups =
static_cast<std::tuple<IT, IT, NT>*
>
557 (::operator
new (
sizeof(std::tuple<IT, IT, NT>[
ArrSpTups[0]->getnnz()])));
559#pragma omp parallel for
561 for(
int i=0; i<
ArrSpTups[0]->getnnz(); i++)
572 for(
int i=0; i<
nlists; ++i)
576 std::cerr <<
"Dimensions of SpTuples do not match on multiwayMerge()" << std::endl;
594#pragma omp parallel for
627#pragma omp parallel for schedule(dynamic)
644 std::tuple<IT, IT, NT> *
mergeBuf =
static_cast<std::tuple<IT, IT, NT>*
> (::operator
new (
sizeof(std::tuple<IT, IT, NT>[
mergedNnzAll])));
651#pragma omp parallel for schedule(dynamic)
674 for(
int i=0; i<
nlists; i++)
692 template<
class SR,
class IT,
class NT>
712 std::tuple<IT, IT, NT>*
mergeTups =
static_cast<std::tuple<IT, IT, NT>*
>
713 (::operator
new (
sizeof(std::tuple<IT, IT, NT>[
ArrSpTups[0]->getnnz()])));
715#pragma omp parallel for
717 for(
int i=0; i<
ArrSpTups[0]->getnnz(); i++)
728 for(
int i=0; i<
nlists; ++i)
732 std::cerr <<
"Dimensions of SpTuples do not match on MultiwayMergeHashSliding()" << std::endl;
763#pragma omp parallel for
765 for(
int s = 0; s <
nsplits; s++){
785 size_t*
flopsPerCol =
static_cast<size_t*
> (::operator
new (
sizeof(
size_t[
ndim])));
788#pragma omp parallel for
790 for(
IT c = 0; c <
ndim; c++){
808#pragma omp parallel for
810 for(
int s = 0; s <
nsplits; s++){
830 std::pair<IT, IT>**
rowIdsRange =
static_cast<std::pair<IT, IT>**
> (::operator
new (
sizeof(std::pair<IT, IT>*[
nsplits])));
831 for(
int s = 0; s <
nsplits; s++){
832 rowIdsRange[s] =
static_cast<std::pair<IT, IT>*
> (::operator
new (
sizeof(std::pair<IT, IT>[
nlists])));
840 size_t tid = omp_get_thread_num();
842#pragma omp for schedule(dynamic)
844 for(
int s = 0; s <
nsplits; s++){
889 hash = (hash+1) & (
htSize-1);
954 hash = (hash+1) & (
htSize-1);
982 std::pair<IT, IT>*
windows =
static_cast<std::pair<IT, IT>*
> (::operator
new (
sizeof(std::pair<IT, IT>[
prefixSumWindow[
ndim]])));
985#pragma omp parallel for schedule(dynamic)
987 for(
int s = 0; s <
nsplits; s++){
1013 std::tuple<IT, IT, NT> *
mergeBuf =
static_cast<std::tuple<IT, IT, NT>*
> (::operator
new (
sizeof(std::tuple<IT, IT, NT>[
totalNnz])));
1020 size_t tid = omp_get_thread_num();
1022#pragma omp for schedule(dynamic)
1024 for(
int s = 0; s <
nsplits; s++){
1067 hash = (hash+1) & (
htSize-1);
1082 for(
size_t j = 0;
j < index;
j++){
1102 for (
size_t w = 0; w <
nWindow; w++){
1135 hash = (hash+1) & (
htSize-1);
1150 for(
size_t j = 0;
j < index;
j++){
1191 for(
int i=0; i<
nlists; i++)
static void Print(const std::string &s)
void SerialMerge(const std::vector< SpTuples< IT, NT > * > &ArrSpTups, std::tuple< IT, IT, NT > *ntuples)
IT SerialMergeNNZ(const std::vector< SpTuples< IT, NT > * > &ArrSpTups)
void SerialMergeHash(const std::vector< SpTuples< IT, NT > * > &ArrSpTups, std::tuple< IT, IT, NT > *ntuples, IT *colnnz, IT maxcolnnz, IT startCol, IT endCol, bool sorted)
T * prefixSum(T *in, int size, int nthreads)
SpTuples< IT, NT > * MultiwayMerge(std::vector< SpTuples< IT, NT > * > &ArrSpTups, IT mdim=0, IT ndim=0, bool delarrs=false)
SpTuples< IT, NT > * MultiwayMergeHash(std::vector< SpTuples< IT, NT > * > &ArrSpTups, IT mdim=0, IT ndim=0, bool delarrs=false, bool sorted=true)
IT * SerialMergeNNZHash(const std::vector< SpTuples< IT, NT > * > &ArrSpTups, IT &totnnz, IT &maxnnzPerCol, IT startCol, IT endCol)
std::vector< RT > findColSplitters(SpTuples< IT, NT > *&spTuples, int nsplits)
SpTuples< IT, NT > * MultiwayMergeHashSliding(std::vector< SpTuples< IT, NT > * > &ArrSpTups, IT mdim=0, IT ndim=0, bool delarrs=false, bool sorted=true, IT maxHashTableSize=16384)
std::vector< RT > findColSplittersFinger(SpTuples< IT, NT > *&spTuples, int nsplits)