COMBINATORIAL_BLAS 1.6
 
Loading...
Searching...
No Matches
FullyDistVec.cpp
Go to the documentation of this file.
1/****************************************************************/
2/* Parallel Combinatorial BLAS Library (for Graph Computations) */
3/* version 1.6 -------------------------------------------------*/
4/* date: 6/15/2017 ---------------------------------------------*/
5/* authors: Ariful Azad, Aydin Buluc --------------------------*/
6/****************************************************************/
7/*
8 Copyright (c) 2010-2017, The Regents of the University of California
9
10 Permission is hereby granted, free of charge, to any person obtaining a copy
11 of this software and associated documentation files (the "Software"), to deal
12 in the Software without restriction, including without limitation the rights
13 to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
14 copies of the Software, and to permit persons to whom the Software is
15 furnished to do so, subject to the following conditions:
16
17 The above copyright notice and this permission notice shall be included in
18 all copies or substantial portions of the Software.
19
20 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
21 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
22 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
23 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
24 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
25 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
26 THE SOFTWARE.
27 */
28
29#include "FullyDistVec.h"
30#include "FullyDistSpVec.h"
31#include "Operations.h"
32
33namespace combblas {
34
35template <class IT, class NT>
40
41template <class IT, class NT>
47
48
49template <class IT, class NT>
50FullyDistVec<IT, NT>::FullyDistVec ( std::shared_ptr<CommGrid> grid)
52{ }
53
54template <class IT, class NT>
57{
58 arr.resize(MyLocLength(), initval);
59}
60
61template <class IT, class NT>
62FullyDistVec<IT,NT>::FullyDistVec (const FullyDistSpVec<IT,NT> & rhs) // Conversion copy-constructor
63: FullyDist<IT,NT,typename combblas::disable_if< combblas::is_boolean<NT>::value, NT >::type>(rhs.commGrid,rhs.glen)
64{
65 *this = rhs;
66}
67
68
69template <class IT, class NT>
70template <class ITRHS, class NTRHS>
72: FullyDist<IT,NT,typename combblas::disable_if< combblas::is_boolean<NT>::value, NT >::type>(rhs.commGrid, static_cast<IT>(rhs.glen))
73{
74 arr.resize(static_cast<IT>(rhs.arr.size()), NT());
75
76 for(IT i=0; (unsigned)i < arr.size(); ++i)
77 {
78 arr[i] = static_cast<NT>(rhs.arr[static_cast<ITRHS>(i)]);
79 }
80}
81
87template <class IT, class NT>
88FullyDistVec<IT, NT>::FullyDistVec ( const std::vector<NT> & fillarr, std::shared_ptr<CommGrid> grid )
90{
91 MPI_Comm World = commGrid->GetWorld();
92 int nprocs = commGrid->GetSize();
93 int rank = commGrid->GetRank();
94
95 IT * sizes = new IT[nprocs];
96 IT nsize = fillarr.size();
97 sizes[rank] = nsize;
99 glen = std::accumulate(sizes, sizes+nprocs, static_cast<IT>(0));
100
101 IT lengthuntil = std::accumulate(sizes, sizes+rank, static_cast<IT>(0));
102
103 // Although the found vector is not reshuffled yet, its glen and commGrid are set
104 // We can call the Owner/MyLocLength/LengthUntil functions (to infer future distribution)
105
106 // rebalance/redistribute
107 int * sendcnt = new int[nprocs](); // no need to std::fill as this type of new[] with () will initialize PODs correctly
108 for(IT i=0; i<nsize; ++i)
109 {
110 IT locind;
111 int owner = Owner(i+lengthuntil, locind);
112 ++sendcnt[owner];
113 }
114 int * recvcnt = new int[nprocs];
115 MPI_Alltoall(sendcnt, 1, MPI_INT, recvcnt, 1, MPI_INT, World); // share the counts
116
117 int * sdispls = new int[nprocs];
118 int * rdispls = new int[nprocs];
119 sdispls[0] = 0;
120 rdispls[0] = 0;
121 for(int i=0; i<nprocs-1; ++i)
122 {
123 sdispls[i+1] = sdispls[i] + sendcnt[i];
124 rdispls[i+1] = rdispls[i] + recvcnt[i];
125 }
126 IT totrecv = std::accumulate(recvcnt,recvcnt+nprocs, static_cast<IT>(0));
127 arr.resize(totrecv);
128
129 // data is already in the right order in found.arr
130 MPI_Alltoallv(fillarr.data(), sendcnt, sdispls, MPIType<NT>(), arr.data(), recvcnt, rdispls, MPIType<NT>(), World);
131 DeleteAll(sendcnt, recvcnt, sdispls, rdispls);
132 delete [] sizes;
133}
134
135
136
137template <class IT, class NT>
138std::pair<IT, NT> FullyDistVec<IT,NT>::MinElement() const
139{
140 auto it = min_element(arr.begin(), arr.end());
141 NT localMin = *it;
144
145 IT localMinIdx = TotalLength();
147 {
148 localMinIdx = distance(arr.begin(), it) + LengthUntil();
149 }
151 MPI_Allreduce( &localMinIdx, &globalMinIdx, 1, MPIType<IT>(), MPI_MIN, commGrid->GetWorld()); // it can be MPI_MAX or anything
152
153 return std::make_pair(globalMinIdx, globalMin);
154}
155
156
157template <class IT, class NT>
158template <typename _BinaryOperation>
160{
161 // std::accumulate returns identity for empty sequences
162 NT localsum = std::accumulate( arr.begin(), arr.end(), identity, __binary_op);
163
166 return totalsum;
167}
168
169template <class IT, class NT>
170template <typename OUT, typename _BinaryOperation, typename _UnaryOperation>
172{
173 // std::accumulate returns identity for empty sequences
175
176 if (arr.size() > 0)
177 {
178 typename std::vector< NT >::const_iterator iter = arr.begin();
179 //localsum = __unary_op(*iter);
180 //iter++;
181 while (iter < arr.end())
182 {
184 iter++;
185 }
186 }
187
190 return totalsum;
191}
192
193
195template<class IT, class NT>
197{
198#ifdef DETERMINISTIC
199 MTRand M(1);
200#else
201 MTRand M; // generate random numbers with Mersenne Twister
202#endif
203
204 IT length = TotalLength();
205 std::vector<double> loccands(length);
206 std::vector<NT> loccandints(length);
207 MPI_Comm World = commGrid->GetWorld();
208 int myrank = commGrid->GetRank();
209 if(myrank == 0)
210 {
211 for(int i=0; i<length; ++i)
212 loccands[i] = M.rand();
213 std::transform(loccands.begin(), loccands.end(), loccands.begin(), std::bind2nd( std::multiplies<double>(), nver ));
214
215 for(int i=0; i<length; ++i)
216 loccandints[i] = static_cast<NT>(loccands[i]);
217 }
219 for(IT i=0; i<length; ++i)
220 SetElement(i,loccandints[i]);
221}
222
223template <class IT, class NT>
224template <class ITRHS, class NTRHS>
226{
227 if(static_cast<const void*>(this) != static_cast<const void*>(&rhs))
228 {
229 //FullyDist<IT,NT>::operator= (rhs); // to update glen and commGrid
230 glen = static_cast<IT>(rhs.glen);
232
233 arr.resize(rhs.arr.size(), NT());
234 for(IT i=0; (unsigned)i < arr.size(); ++i)
235 {
236 arr[i] = static_cast<NT>(rhs.arr[static_cast<ITRHS>(i)]);
237 }
238 }
239 return *this;
240}
241
242template <class IT, class NT>
244{
245 if(this != &rhs)
246 {
248 arr = rhs.arr;
249 }
250 return *this;
251}
252
253template <class IT, class NT>
254FullyDistVec< IT,NT > & FullyDistVec<IT,NT>::operator=(const FullyDistSpVec< IT,NT > & rhs) // FullyDistSpVec->FullyDistVec conversion operator
255{
257 arr.resize(rhs.MyLocLength());
258 std::fill(arr.begin(), arr.end(), NT());
259
260 IT spvecsize = rhs.getlocnnz();
261 for(IT i=0; i< spvecsize; ++i)
262 {
263 //if(rhs.ind[i] > arr.size())
264 // cout << "rhs.ind[i]: " << rhs.ind[i] << endl;
265 arr[rhs.ind[i]] = rhs.num[i];
266 }
267 return *this;
268}
269
270
271
278template <class IT, class NT>
280{
281 IT spvecsize = rhs.getlocnnz();
282 #ifdef _OPENMP
283 #pragma omp parallel for
284 #endif
285 for(IT i=0; i< spvecsize; ++i)
286 {
287 if(arr[rhs.ind[i]] == NT()) // not set before
288 arr[rhs.ind[i]] = rhs.num[i];
289 else
290 arr[rhs.ind[i]] += rhs.num[i];
291 }
292 return *this;
293}
294
295
296
297template <class IT, class NT>
299{
300 IT spvecsize = rhs.getlocnnz();
301 for(IT i=0; i< spvecsize; ++i)
302 {
303 arr[rhs.ind[i]] -= rhs.num[i];
304 }
305 return *this;
306}
307
308
313template <class IT, class NT>
314template <typename _BinaryOperation>
316{
317 std::transform ( arr.begin(), arr.end(), rhs.arr.begin(), arr.begin(), __binary_op );
318};
319
324template <class IT, class NT>
325template <typename _BinaryOperation, typename OUT>
327{
328 std::transform ( arr.begin(), arr.end(), rhs.arr.begin(), result.arr.begin(), __binary_op );
329};
330
331
332template <class IT, class NT>
334{
335 if(this != &rhs)
336 {
337 if(!(*commGrid == *rhs.commGrid))
338 {
339 std::cout << "Grids are not comparable elementwise addition" << std::endl;
341 }
342 else
343 {
344 EWise(rhs, std::plus<NT>());
345 }
346 }
347 return *this;
348};
349
350template <class IT, class NT>
352{
353 if(this != &rhs)
354 {
355 if(!(*commGrid == *rhs.commGrid))
356 {
357 std::cout << "Grids are not comparable elementwise addition" << std::endl;
359 }
360 else
361 {
362 EWise(rhs, std::minus<NT>());
363 }
364 }
365 return *this;
366};
367
368template <class IT, class NT>
370{
372 int local = 1;
373 local = (int) std::equal(arr.begin(), arr.end(), rhs.arr.begin(), epsilonequal );
374 int whole = 1;
375 MPI_Allreduce( &local, &whole, 1, MPI_INT, MPI_BAND, commGrid->GetWorld());
376 return static_cast<bool>(whole);
377}
378
379template <class IT, class NT>
380template <typename _Predicate>
382{
383 IT local = count_if( arr.begin(), arr.end(), pred );
384 IT whole = 0;
385 MPI_Allreduce( &local, &whole, 1, MPIType<IT>(), MPI_SUM, commGrid->GetWorld());
386 return whole;
387}
388
391template <class IT, class NT>
392template <typename _Predicate>
394{
396 MPI_Comm World = commGrid->GetWorld();
397 int nprocs = commGrid->GetSize();
398 int rank = commGrid->GetRank();
399
400 IT sizelocal = LocArrSize();
401 IT sizesofar = LengthUntil();
402 for(IT i=0; i<sizelocal; ++i)
403 {
404 if(pred(arr[i]))
405 {
406 found.arr.push_back(i+sizesofar);
407 }
408 }
409 IT * dist = new IT[nprocs];
410 IT nsize = found.arr.size();
411 dist[rank] = nsize;
413 IT lengthuntil = std::accumulate(dist, dist+rank, static_cast<IT>(0));
414 found.glen = std::accumulate(dist, dist+nprocs, static_cast<IT>(0));
415
416 // Although the found vector is not reshuffled yet, its glen and commGrid are set
417 // We can call the Owner/MyLocLength/LengthUntil functions (to infer future distribution)
418
419 // rebalance/redistribute
420 int * sendcnt = new int[nprocs];
421 std::fill(sendcnt, sendcnt+nprocs, 0);
422 for(IT i=0; i<nsize; ++i)
423 {
424 IT locind;
425 int owner = found.Owner(i+lengthuntil, locind);
426 ++sendcnt[owner];
427 }
428 int * recvcnt = new int[nprocs];
429 MPI_Alltoall(sendcnt, 1, MPI_INT, recvcnt, 1, MPI_INT, World); // share the counts
430
431 int * sdispls = new int[nprocs];
432 int * rdispls = new int[nprocs];
433 sdispls[0] = 0;
434 rdispls[0] = 0;
435 for(int i=0; i<nprocs-1; ++i)
436 {
437 sdispls[i+1] = sdispls[i] + sendcnt[i];
438 rdispls[i+1] = rdispls[i] + recvcnt[i];
439 }
440 IT totrecv = std::accumulate(recvcnt,recvcnt+nprocs, static_cast<IT>(0));
441 std::vector<IT> recvbuf(totrecv);
442
443 // data is already in the right order in found.arr
444 MPI_Alltoallv(&(found.arr[0]), sendcnt, sdispls, MPIType<IT>(), &(recvbuf[0]), recvcnt, rdispls, MPIType<IT>(), World);
445 found.arr.swap(recvbuf);
446 delete [] dist;
447 DeleteAll(sendcnt, recvcnt, sdispls, rdispls);
448
449 return found;
450}
451
452
455template <class IT, class NT>
456template <typename _Predicate>
458{
460 size_t size = arr.size();
461 for(size_t i=0; i<size; ++i)
462 {
463 if(pred(arr[i]))
464 {
465 found.ind.push_back( (IT) i);
466 found.num.push_back(arr[i]);
467 }
468 }
469 found.glen = glen;
470 return found;
471}
472
473
475template <class IT, class NT>
477{
479 size_t size = arr.size();
480 for(size_t i=0; i<size; ++i)
481 {
482 if(arr[i]==val)
483 {
484 found.ind.push_back( (IT) i);
485 found.num.push_back(val);
486 }
487 }
488 found.glen = glen;
489 return found;
490}
491
492
493template <class IT, class NT>
494template <class HANDLER>
496{
498 tmpSpVec.ReadDistribute(infile, master, handler);
499
500 *this = tmpSpVec;
501 return infile;
502}
503
504template <class IT, class NT>
505template <class HANDLER>
511
512template <class IT, class NT>
514{
515 int rank = commGrid->GetRank();
516 if (glen == 0)
517 {
518 if(rank == 0)
519 std::cout << "FullyDistVec::SetElement can't be called on an empty vector." << std::endl;
520 return;
521 }
522 IT locind;
523 int owner = Owner(indx, locind);
524 if(commGrid->GetRank() == owner)
525 {
526 if (locind > (LocArrSize() -1))
527 {
528 std::cout << "FullyDistVec::SetElement cannot expand array" << std::endl;
529 }
530 else if (locind < 0)
531 {
532 std::cout << "FullyDistVec::SetElement local index < 0" << std::endl;
533 }
534 else
535 {
536 arr[locind] = numx;
537 }
538 }
539}
540
541template <class IT, class NT>
543{
544 NT ret;
545 MPI_Comm World = commGrid->GetWorld();
546 int rank = commGrid->GetRank();
547 if (glen == 0)
548 {
549 if(rank == 0)
550 std::cout << "FullyDistVec::GetElement can't be called on an empty vector." << std::endl;
551
552 return NT();
553 }
554 IT locind;
555 int owner = Owner(indx, locind);
556 if(commGrid->GetRank() == owner)
557 {
558 if (locind > (LocArrSize() -1))
559 {
560 std::cout << "FullyDistVec::GetElement local index > size" << std::endl;
561 ret = NT();
562
563 }
564 else if (locind < 0)
565 {
566 std::cout << "FullyDistVec::GetElement local index < 0" << std::endl;
567 ret = NT();
568 }
569 else
570 {
571 ret = arr[locind];
572 }
573 }
575 return ret;
576}
577
578// Write to file using MPI-2
579template <class IT, class NT>
581{
582 int nprocs, rank;
583 MPI_Comm World = commGrid->GetWorld();
587 char _fn[] = "temp_fullydistvec"; // AL: this is to avoid the problem that C++ string literals are const char* while C string literals are char*, leading to a const warning (technically error, but compilers are tolerant)
589 IT lengthuntil = LengthUntil();
590
591 // The disp displacement argument specifies the position
592 // (absolute offset in bytes from the beginning of the file)
593 char native[] = "native"; // AL: this is to avoid the problem that C++ string literals are const char* while C string literals are char*, leading to a const warning (technically error, but compilers are tolerant)
595
596 IT count = LocArrSize();
599
600 // Now let processor-0 read the file and print
601 IT * counts = new IT[nprocs];
602 MPI_Gather(&count, 1, MPIType<IT>(), counts, 1, MPIType<IT>(), 0, World); // gather at root=0
603 if(rank == 0)
604 {
605 FILE * f = fopen("temp_fullydistvec", "r");
606 if(!f)
607 {
608 std::cerr << "Problem reading binary input file\n";
609 return;
610 }
611 IT maxd = *std::max_element(counts, counts+nprocs);
612 NT * data = new NT[maxd];
613
614 for(int i=0; i<nprocs; ++i)
615 {
616 // read counts[i] integers and print them
617 size_t result = fread(data, sizeof(NT), counts[i],f);
618 if (result != (unsigned)counts[i]) { std::cout << "Error in fread, only " << result << " entries read" << std::endl; }
619
620 std::cout << "Elements stored on proc " << i << ": {" ;
621 for (int j = 0; j < counts[i]; j++)
622 {
623 std::cout << data[j] << ",";
624 }
625 std::cout << "}" << std::endl;
626 }
627 delete [] data;
628 delete [] counts;
629 }
630}
631
632template <class IT, class NT>
633template <typename _UnaryOperation, typename IRRELEVANT_NT>
635{
636 typename std::vector< IT >::const_iterator miter = mask.ind.begin();
637 while (miter < mask.ind.end())
638 {
639 IT index = *miter++;
640 arr[index] = __unary_op(arr[index]);
641 }
642}
643
644template <class IT, class NT>
645template <typename _BinaryOperation, typename _BinaryPredicate, class NT2>
647{
648 if(*(commGrid) == *(other.commGrid))
649 {
650 if(glen != other.glen)
651 {
652 std::ostringstream outs;
653 outs << "Vector dimensions don't match (" << glen << " vs " << other.glen << ") for FullyDistVec::EWiseApply\n";
656 }
657 else
658 {
659 typename std::vector< NT >::iterator thisIter = arr.begin();
660 typename std::vector< NT2 >::const_iterator otherIter = other.arr.begin();
661 while (thisIter < arr.end())
662 {
663 if (_do_op(*thisIter, *otherIter, false, false))
664 *thisIter = __binary_op(*thisIter, *otherIter, false, false);
665 thisIter++;
666 otherIter++;
667 }
668 }
669 }
670 else
671 {
672 std::ostringstream outs;
673 outs << "Grids are not comparable for FullyDistVec<IT,NT>::EWiseApply" << std::endl;
676 }
677}
678
679
680// Note (Ariful): multithreded implemented only when applyNulls=false.
681// TODO: employ multithreding when applyNulls=true
682template <class IT, class NT>
683template <typename _BinaryOperation, typename _BinaryPredicate, class NT2>
685{
686 if(*(commGrid) == *(other.commGrid))
687 {
688 if(glen != other.glen)
689 {
690 std::cerr << "Vector dimensions don't match (" << glen << " vs " << other.glen << ") for FullyDistVec::EWiseApply\n";
692 }
693 else
694 {
695 typename std::vector< IT >::const_iterator otherInd = other.ind.begin();
696 typename std::vector< NT2 >::const_iterator otherNum = other.num.begin();
697
698 if (applyNulls) // scan the entire dense vector and apply sparse elements as they appear
699 {
700 for(IT i=0; (unsigned)i < arr.size(); ++i)
701 {
702 if (otherInd == other.ind.end() || i < *otherInd)
703 {
704 if (_do_op(arr[i], nullValue, false, true))
705 arr[i] = __binary_op(arr[i], nullValue, false, true);
706 }
707 else
708 {
709 if (_do_op(arr[i], *otherNum, false, false))
710 arr[i] = __binary_op(arr[i], *otherNum, false, false);
711 otherInd++;
712 otherNum++;
713 }
714 }
715 }
716 else // scan the sparse vector only
717 {
718 /*
719 for(otherInd = other.ind.begin(); otherInd < other.ind.end(); otherInd++, otherNum++)
720 {
721 if (_do_op(arr[*otherInd], *otherNum, false, false))
722 arr[*otherInd] = __binary_op(arr[*otherInd], *otherNum, false, false);
723 }*/
724
725 IT spsize = other.ind.size();
726#ifdef _OPENMP
727#pragma omp parallel for
728#endif
729 for(IT i=0; i< spsize; i++)
730 {
731 if (_do_op(arr[other.ind[i]], other.num[i], false, false))
732 arr[other.ind[i]] = __binary_op(arr[other.ind[i]], other.num[i], false, false);
733 }
734 }
735 }
736 }
737 else
738 {
739 std::cout << "Grids are not comparable elementwise apply" << std::endl;
741 }
742}
743
744
745template <class IT, class NT>
747{
748 MPI_Comm World = commGrid->GetWorld();
750 IT nnz = LocArrSize();
751 std::pair<NT,IT> * vecpair = new std::pair<NT,IT>[nnz];
752 int nprocs = commGrid->GetSize();
753 int rank = commGrid->GetRank();
754
755 IT * dist = new IT[nprocs];
756 dist[rank] = nnz;
758 IT sizeuntil = LengthUntil(); // size = length, for dense vectors
759 for(IT i=0; i< nnz; ++i)
760 {
761 vecpair[i].first = arr[i]; // we'll sort wrt numerical values
762 vecpair[i].second = i + sizeuntil;
763 }
765
766 std::vector< IT > narr(nnz);
767 for(IT i=0; i< nnz; ++i)
768 {
769 arr[i] = vecpair[i].first; // sorted range (change the object itself)
770 narr[i] = vecpair[i].second; // inverse permutation stored as numerical values
771 }
772 delete [] vecpair;
773 delete [] dist;
774
775 temp.glen = glen;
776 temp.arr = narr;
777 return temp;
778}
779
780
781// Randomly permutes an already existing vector
782template <class IT, class NT>
784{
785#ifdef DETERMINISTIC
786 uint64_t seed = 1383098845;
787#else
788 uint64_t seed= time(NULL);
789#endif
790
791 MTRand M(seed); // generate random numbers with Mersenne Twister
792 MPI_Comm World = commGrid->GetWorld();
793 int nprocs = commGrid->GetSize();
794 int rank = commGrid->GetRank();
795 IT size = LocArrSize();
796
797#ifdef COMBBLAS_LEGACY
798 std::pair<double,NT> * vecpair = new std::pair<double,NT>[size];
799 IT * dist = new IT[nprocs];
800 dist[rank] = size;
802 for(int i=0; i<size; ++i)
803 {
804 vecpair[i].first = M.rand();
805 vecpair[i].second = arr[i];
806 }
807 // less< pair<T1,T2> > works correctly (sorts wrt first elements)
809 std::vector< NT > nnum(size);
810 for(int i=0; i<size; ++i) nnum[i] = vecpair[i].second;
811 DeleteAll(vecpair, dist);
812 arr.swap(nnum);
813#else
814 std::vector< std::vector< NT > > data_send(nprocs);
815 for(int i=0; i<size; ++i)
816 {
817 // send each entry to a random process
819 data_send[dest].push_back(arr[i]);
820 }
821 int * sendcnt = new int[nprocs];
822 int * sdispls = new int[nprocs];
823 for(int i=0; i<nprocs; ++i) sendcnt[i] = (int) data_send[i].size();
824
825 int * rdispls = new int[nprocs];
826 int * recvcnt = new int[nprocs];
827 MPI_Alltoall(sendcnt, 1, MPI_INT, recvcnt, 1, MPI_INT, World); // share the request counts
828 sdispls[0] = 0;
829 rdispls[0] = 0;
830 for(int i=0; i<nprocs-1; ++i)
831 {
832 sdispls[i+1] = sdispls[i] + sendcnt[i];
833 rdispls[i+1] = rdispls[i] + recvcnt[i];
834 }
835 IT totrecv = std::accumulate(recvcnt,recvcnt+nprocs, static_cast<IT>(0));
836 if(totrecv > std::numeric_limits<int>::max())
837 {
838 std::cout << "COMBBLAS_WARNING: total data to receive exceeds max int: " << totrecv << std::endl;
839 }
840 std::vector<NT>().swap(arr); // make space for temporaries
841
842 NT * sendbuf = new NT[size];
843 for(int i=0; i<nprocs; ++i)
844 {
845 std::copy(data_send[i].begin(), data_send[i].end(), sendbuf+sdispls[i]);
846 std::vector<NT>().swap(data_send[i]); // free memory
847 }
848 NT * recvbuf = new NT[totrecv];
850 //std::random_shuffle(recvbuf, recvbuf+ totrecv);
851 std::default_random_engine gen(seed);
852 std::shuffle(recvbuf, recvbuf+ totrecv,gen); // locally shuffle data
853
857 int64_t glenuntil = std::accumulate(localcounts, localcounts+rank, static_cast<int64_t>(0));
858
859 std::vector< std::vector< IT > > locs_send(nprocs);
860 for(IT i=0; i< totrecv; ++i) // determine new locations w/ prefix sums
861 {
863 int owner = Owner(glenuntil+i, remotelocind);
864 locs_send[owner].push_back(remotelocind);
865 data_send[owner].push_back(recvbuf[i]);
866 }
867
868 for(int i=0; i<nprocs; ++i) sendcnt[i] = (int) data_send[i].size(); // = locs_send.size()
870 sdispls[0] = 0;
871 rdispls[0] = 0;
872 for(int i=0; i<nprocs-1; ++i)
873 {
874 sdispls[i+1] = sdispls[i] + sendcnt[i];
875 rdispls[i+1] = rdispls[i] + recvcnt[i];
876 }
877 IT newsize = std::accumulate(recvcnt,recvcnt+nprocs, static_cast<IT>(0));
878 if(newsize > std::numeric_limits<int>::max())
879 {
880 std::cout << "COMBBLAS_WARNING: total data to receive exceeds max int: " << newsize << std::endl;
881 }
882 // re-use the receive buffer as sendbuf of second stage
883 IT totalsend = std::accumulate(sendcnt, sendcnt+nprocs, static_cast<IT>(0));
884 if(totalsend != totrecv || newsize != size)
885 {
886 std::cout << "COMBBLAS_WARNING: sending different sized data than received: " << totalsend << "=" << totrecv << " , " << newsize << "=" << size << std::endl;
887 }
888 for(int i=0; i<nprocs; ++i)
889 {
890 std::copy(data_send[i].begin(), data_send[i].end(), recvbuf+sdispls[i]);
891 std::vector<NT>().swap(data_send[i]); // free memory
892 }
893 // re-use the send buffer as receive buffer of second stage
895 delete [] recvbuf;
896 IT * newinds = new IT[totalsend];
897 for(int i=0; i<nprocs; ++i)
898 {
899 std::copy(locs_send[i].begin(), locs_send[i].end(), newinds+sdispls[i]);
900 std::vector<IT>().swap(locs_send[i]); // free memory
901 }
902 IT * indsbuf = new IT[size];
904 DeleteAll(newinds, sendcnt, sdispls, rdispls, recvcnt);
905 arr.resize(size);
906 for(IT i=0; i<size; ++i)
907 {
908 arr[indsbuf[i]] = sendbuf[i];
909 }
910#endif
911}
912
913// ABAB: In its current form, unless LengthUntil returns NT
914// this won't work reliably for anything other than integers
915template <class IT, class NT>
917{
918 glen = globalsize;
919 IT length = MyLocLength(); // only needs glen to determine length
920
921 arr.resize(length);
922 SpHelper::iota(arr.begin(), arr.end(), LengthUntil() + first); // global across processors
923}
924
925template <class IT, class NT>
927{
928 if(!(*commGrid == *ri.commGrid))
929 {
930 std::cout << "Grids are not comparable for dense vector subsref" << std::endl;
931 return FullyDistVec<IT,NT>();
932 }
933
934 MPI_Comm World = commGrid->GetWorld();
935 FullyDistVec<IT,NT> Indexed(commGrid, ri.glen, NT()); // length(Indexed) = length(ri)
936 int nprocs = commGrid->GetSize();
937 std::vector< std::vector< IT > > data_req(nprocs);
938 std::vector< std::vector< IT > > revr_map(nprocs); // to put the incoming data to the correct location
939
940 IT riloclen = ri.LocArrSize();
941 for(IT i=0; i < riloclen; ++i)
942 {
943 IT locind;
944 int owner = Owner(ri.arr[i], locind); // numerical values in ri are 0-based
945 data_req[owner].push_back(locind);
946 revr_map[owner].push_back(i);
947 }
948 IT * sendbuf = new IT[riloclen];
949 int * sendcnt = new int[nprocs];
950 int * sdispls = new int[nprocs];
951 for(int i=0; i<nprocs; ++i)
952 sendcnt[i] = (int) data_req[i].size();
953
954 int * rdispls = new int[nprocs];
955 int * recvcnt = new int[nprocs];
956 MPI_Alltoall(sendcnt, 1, MPI_INT, recvcnt, 1, MPI_INT, World); // share the request counts
957 sdispls[0] = 0;
958 rdispls[0] = 0;
959 for(int i=0; i<nprocs-1; ++i)
960 {
961 sdispls[i+1] = sdispls[i] + sendcnt[i];
962 rdispls[i+1] = rdispls[i] + recvcnt[i];
963 }
964 IT totrecv = std::accumulate(recvcnt,recvcnt+nprocs, static_cast<IT>(0));
965 for(int i=0; i<nprocs; ++i)
966 {
967 std::copy(data_req[i].begin(), data_req[i].end(), sendbuf+sdispls[i]);
968 std::vector<IT>().swap(data_req[i]);
969 }
970
971 IT * reversemap = new IT[riloclen];
972 for(int i=0; i<nprocs; ++i)
973 {
974 std::copy(revr_map[i].begin(), revr_map[i].end(), reversemap+sdispls[i]); // reversemap array is unique
975 std::vector<IT>().swap(revr_map[i]);
976 }
977
978 IT * recvbuf = new IT[totrecv];
979 MPI_Alltoallv(sendbuf, sendcnt, sdispls, MPIType<IT>(), recvbuf, recvcnt, rdispls, MPIType<IT>(), World); // request data
980 delete [] sendbuf;
981
982 // We will return the requested data,
983 // our return will be as big as the request
984 // as we are indexing a dense vector, all elements exist
985 // so the displacement boundaries are the same as rdispls
986 NT * databack = new NT[totrecv];
987 for(int i=0; i<nprocs; ++i)
988 {
989 for(int j = rdispls[i]; j < rdispls[i] + recvcnt[i]; ++j) // fetch the numerical values
990 {
991 databack[j] = arr[recvbuf[j]];
992 }
993 }
994 delete [] recvbuf;
995 NT * databuf = new NT[riloclen];
996
997 // the response counts are the same as the request counts
998 MPI_Alltoallv(databack, recvcnt, rdispls, MPIType<NT>(), databuf, sendcnt, sdispls, MPIType<NT>(), World); // send data
999 DeleteAll(rdispls, recvcnt, databack);
1000
1001 // Now create the output from databuf
1002 // Indexed.arr is already allocated in contructor
1003 for(int i=0; i<nprocs; ++i)
1004 {
1005 for(int j=sdispls[i]; j< sdispls[i]+sendcnt[i]; ++j)
1006 {
1007 Indexed.arr[reversemap[j]] = databuf[j];
1008 }
1009 }
1011 return Indexed;
1012}
1013
1014
1015template <class IT, class NT>
1017{
1018 IT totl = TotalLength();
1019 if (commGrid->GetRank() == 0)
1020 std::cout << "As a whole, " << vectorname << " has length " << totl << std::endl;
1021}
1022
1023
1024
1025
1026
1027template <class IT, class NT>
1029{
1030 if(*(commGrid) == *(other.commGrid))
1031 {
1032 if(glen != other.glen)
1033 {
1034 std::cerr << "Vector dimensions don't match (" << glen << " vs " << other.glen << ") for FullyDistVec::Set\n";
1036 }
1037 else
1038 {
1039
1040 IT spvecsize = other.getlocnnz();
1041#ifdef _OPENMP
1042#pragma omp parallel for
1043#endif
1044 for(IT i=0; i< spvecsize; ++i)
1045 {
1046 arr[other.ind[i]] = other.num[i];
1047 }
1048 }
1049 }
1050 else
1051 {
1052 std::cout << "Grids are not comparable for Set" << std::endl;
1054 }
1055}
1056
1057
1058
1059
1060// General purpose set operation on dense vector by a sparse vector
1061
1062template <class IT, class NT>
1063template <class NT1, typename _BinaryOperationIdx, typename _BinaryOperationVal>
1065{
1066 if(*(commGrid) != *(spVec.commGrid))
1067 {
1068 std::cout << "Grids are not comparable for GSet" << std::endl;
1070 }
1071
1072 IT spVecSize = spVec.getlocnnz();
1073 if(spVecSize==0) return;
1074
1075
1076 MPI_Comm World = commGrid->GetWorld();
1077 int nprocs = commGrid->GetSize();
1078 int myrank;
1079 MPI_Comm_rank(MPI_COMM_WORLD, &myrank);
1080
1081
1082 std::vector< std::vector< NT > > datsent(nprocs);
1083 std::vector< std::vector< IT > > indsent(nprocs);
1084 IT lengthUntil = spVec.LengthUntil();
1085
1086 for(IT k=0; k < spVecSize; ++k)
1087 {
1088 IT locind;
1089 // get global index of the dense vector from the value. Most often a select operator.
1090 // If the first operand is selected, then invert; otherwise, EwiseApply.
1091 IT globind = __binopIdx(spVec.num[k], spVec.ind[k] + lengthUntil);
1092 int owner = Owner(globind, locind); // get local index
1093 NT val = __binopVal(spVec.num[k], spVec.ind[k] + lengthUntil);
1094 if(globind < glen) // prevent index greater than size of the composed vector
1095 {
1096 datsent[owner].push_back(val);
1097 indsent[owner].push_back(locind); // so that we don't need no correction at the recipient
1098 }
1099 }
1100
1101
1102 for(int j = 0; j < datsent[myrank].size(); ++j) // directly set local entries
1103 {
1104 arr[indsent[myrank][j]] = datsent[myrank][j];
1105 }
1106
1107
1108 //MPI_Win win;
1109 //MPI_Win_create(&arr[0], LocArrSize() * sizeof(NT), sizeof(NT), MPI_INFO_NULL, World, &win);
1110 //MPI_Win_fence(0, win);
1111 for(int i=0; i<nprocs; ++i)
1112 {
1113 if(i!=myrank)
1114 {
1116 for(int j = 0; j < datsent[i].size(); ++j)
1117 {
1118 MPI_Put(&datsent[i][j], 1, MPIType<NT>(), i, indsent[i][j], 1, MPIType<NT>(), win);
1119 }
1120 MPI_Win_unlock(i, win);
1121 }
1122 }
1123 //MPI_Win_fence(0, win);
1124 //MPI_Win_free(&win);
1125
1126}
1127
1128
1129
1130// General purpose get operation on dense vector by a sparse vector
1131// Get the element of the dense vector indexed by the value of the sparse vector
1132// invert and get might not work in the presence of repeated values
1133
1134template <class IT, class NT>
1135template <class NT1, typename _BinaryOperationIdx>
1137{
1138 if(*(commGrid) != *(spVec.commGrid))
1139 {
1140 std::cout << "Grids are not comparable for GGet" << std::endl;
1142 }
1143
1144 MPI_Comm World = commGrid->GetWorld();
1145 int nprocs = commGrid->GetSize();
1146 int myrank;
1147 MPI_Comm_rank(MPI_COMM_WORLD, &myrank);
1148
1149
1150 std::vector< std::vector< NT > > spIdx(nprocs);
1151 std::vector< std::vector< IT > > indsent(nprocs);
1152 IT lengthUntil = spVec.LengthUntil();
1153 IT spVecSize = spVec.getlocnnz();
1154
1156 res.ind.resize(spVecSize);
1157 res.num.resize(spVecSize);
1158
1159
1160 for(IT k=0; k < spVecSize; ++k)
1161 {
1162 IT locind;
1163 // get global index of the dense vector from the value. Most often a select operator.
1164 // If the first operand is selected, then invert; otherwise, EwiseApply.
1165 IT globind = __binopIdx(spVec.num[k], spVec.ind[k] + lengthUntil);
1166 int owner = Owner(globind, locind); // get local index
1167 //NT val = __binopVal(spVec.num[k], spVec.ind[k] + lengthUntil);
1168 if(globind < glen) // prevent index greater than size of the composed vector
1169 {
1170 spIdx[owner].push_back(k); // position of spVec
1171 indsent[owner].push_back(locind); // so that we don't need no correction at the recipient
1172 }
1173 else
1174 res.num[k] = nullValue;
1175 res.ind[k] = spVec.ind[k];
1176 }
1177
1178
1179 for(int j = 0; j < indsent[myrank].size(); ++j) // directly get local entries
1180 {
1181 res.num[spIdx[myrank][j]] = arr[indsent[myrank][j]];
1182 }
1183
1184
1185 MPI_Win win;
1186 MPI_Win_create(&arr[0], LocArrSize() * sizeof(NT), sizeof(NT), MPI_INFO_NULL, World, &win);
1187 for(int i=0; i<nprocs; ++i)
1188 {
1189 if(i!=myrank)
1190 {
1192 for(int j = 0; j < indsent[i].size(); ++j)
1193 {
1194 MPI_Get(&res.num[spIdx[i][j]], 1, MPIType<NT>(), i, indsent[i][j], 1, MPIType<NT>(), win);
1195 }
1196 MPI_Win_unlock(i, win);
1197 }
1198 }
1199 MPI_Win_free(&win);
1200
1201 return res;
1202}
1203
1204}
int64_t IT
double NT
double rand()
uint32 randInt()
std::shared_ptr< CommGrid > commGrid
FullyDistVec< IT, IT > FindInds(_Predicate pred) const
Return the indices where pred is true.
std::ifstream & ReadDistribute(std::ifstream &infile, int master, HANDLER handler)
NT Reduce(_BinaryOperation __binary_op, NT identity) const
friend FullyDistSpVec< IU, typename promote_trait< NU1, NU2 >::T_promote > EWiseApply(const FullyDistSpVec< IU, NU1 > &V, const FullyDistVec< IU, NU2 > &W, _BinaryOperation _binary_op, typename promote_trait< NU1, NU2 >::T_promote zero)
FullyDistVec< IT, NT > operator()(const FullyDistVec< IT, IT > &ri) const
FullyDistSpVec< IT, NT > Find(_Predicate pred) const
Return the elements for which pred is true.
bool operator==(const FullyDistVec< IT, NT > &rhs) const
void PrintInfo(std::string vectorname) const
void SaveGathered(std::ofstream &outfile, int master, HANDLER handler, bool printProcSplits=false)
FullyDistVec< IT, NT > & operator-=(const FullyDistSpVec< IT, NT > &rhs)
void SetElement(IT indx, NT numx)
void iota(IT globalsize, NT first)
void Set(const FullyDistSpVec< IT, NT > &rhs)
friend class FullyDistVec
FullyDistSpVec< IT, NT > GGet(const FullyDistSpVec< IT, NT1 > &spVec, _BinaryOperationIdx __binopIdx, NT nullValue)
IT Count(_Predicate pred) const
Return the number of elements for which pred is true.
FullyDistVec< IT, NT > & operator+=(const FullyDistSpVec< IT, NT > &rhs)
void SelectCandidates(double nver)
ABAB: Put concept check, NT should be integer for this to make sense.
void Apply(_UnaryOperation __unary_op)
void EWiseOut(const FullyDistVec< IT, NT > &rhs, _BinaryOperation __binary_op, FullyDistVec< IT, OUT > &result)
void GSet(const FullyDistSpVec< IT, NT1 > &spVec, _BinaryOperationIdx __binopIdx, _BinaryOperationVal __binopVal, MPI_Win win)
NT GetElement(IT indx) const
FullyDistVec< IT, NT > & operator=(const FullyDistVec< ITRHS, NTRHS > &rhs)
std::pair< IT, NT > MinElement() const
FullyDistVec< IT, IT > sort()
static void iota(_ForwardIter __first, _ForwardIter __last, T __val)
Definition SpHelper.h:226
static void Print(const std::string &s)
static void MemoryEfficientPSort(std::pair< KEY, VAL > *array, IT length, IT *dist, const MPI_Comm &comm)
int size
Definition common.h:20
int rank
int nprocs
Definition comms.cpp:55
long int64_t
Definition compat.h:21
#define GRIDMISMATCH
Definition SpDefs.h:72
void DeleteAll(A arr1)
Definition Deleter.h:48
unsigned int uint32_t
Definition stdint.h:80