COMBINATORIAL_BLAS 1.6
 
Loading...
Searching...
No Matches
psort_splitters.h
Go to the documentation of this file.
1/*
2Copyright (c) 2009, David Cheng, Viral B. Shah.
3
4Permission is hereby granted, free of charge, to any person obtaining a copy
5of this software and associated documentation files (the "Software"), to deal
6in the Software without restriction, including without limitation the rights
7to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
8copies of the Software, and to permit persons to whom the Software is
9furnished to do so, subject to the following conditions:
10
11The above copyright notice and this permission notice shall be included in
12all copies or substantial portions of the Software.
13
14THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
17AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
19OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
20THE SOFTWARE.
21*/
22
23#ifndef PSORT_SPLITTERS_H
24#define PSORT_SPLITTERS_H
25
26#include "psort_util.h"
27
28
29namespace vpsort {
30 using namespace std;
31
32 template<typename SplitType>
33 class Split {
34 public:
35 template<typename _RandomAccessIter, typename _Compare, typename _Distance>
36 void split (_RandomAccessIter first,
37 _RandomAccessIter last,
38 _Distance *dist,
39 _Compare comp,
40 vector< vector<_Distance> > &right_ends,
41 MPI_Datatype &MPI_valueType, MPI_Datatype &MPI_distanceType,
42 MPI_Comm comm) {
43
44 SplitType *s = static_cast<SplitType *>(this);
45 s->real_split(first, last, dist, comp, right_ends,
46 MPI_valueType, MPI_distanceType, comm);
47 }
48
49 char *description () {
50 SplitType *s = static_cast<SplitType *>(this);
51 return s->real_description ();
52 }
53 };
54
55 class MedianSplit : public Split<MedianSplit> {
56 public:
58 string s ("Median splitter");
59 return const_cast<char*>(s.c_str());
60 }
61
62 template<typename _RandomAccessIter, typename _Compare, typename _Distance>
63 void real_split (_RandomAccessIter first,
64 _RandomAccessIter last,
65 _Distance *dist,
66 _Compare comp,
67 vector< vector<_Distance> > &right_ends,
68 MPI_Datatype &MPI_valueType,
69 MPI_Datatype &MPI_distanceType,
70 MPI_Comm comm) {
71
72 typedef typename iterator_traits<_RandomAccessIter>::value_type _ValueType;
73
74 int nproc, rank;
75 MPI_Comm_size (comm, &nproc);
76 MPI_Comm_rank (comm, &rank);
77
78 int n_real = nproc;
79 for (int i = 0; i < nproc; ++i)
80 if (dist[i] == 0) { n_real = i; break; }
81
82 copy (dist, dist + nproc, right_ends[nproc].begin());
83
84 // union of [0, right_end[i+1]) on each processor produces dist[i] total values
85 // AL: conversion to remove dependence on C99 feature.
86 // original: _Distance targets[nproc-1];
87 vector<_Distance> targets_v(nproc-1);
88 _Distance *targets = &targets_v.at(0);
89 partial_sum (dist, dist + (nproc - 1), targets);
90
91 // keep a list of ranges, trying to "activate" them at each branch
92 vector< pair<_RandomAccessIter, _RandomAccessIter> > d_ranges(nproc - 1);
93 vector< pair<_Distance *, _Distance *> > t_ranges(nproc - 1);
94 d_ranges[0] = pair<_RandomAccessIter, _RandomAccessIter>(first, last);
95 t_ranges[0] = pair<_Distance *, _Distance *>(targets, targets + (nproc - 1));
96
97 // invariant: subdist[i][rank] == d_ranges[i].second - d_ranges[i].first
98 // amount of data each proc still has in the search
99 vector< vector<_Distance> > subdist(nproc - 1, vector<_Distance>(nproc));
100 copy (dist, dist + nproc, subdist[0].begin());
101
102 // for each processor, d_ranges - first
103 vector< vector<_Distance> > outleft(nproc - 1, vector<_Distance>(nproc, 0));
104
105#ifdef PSORTDEBUG
106 double t_begin=0, t_query=0, t_bsearch=0, t_gather=0, t_finish=0;
107 double t_allquery=0, t_allbsearch=0, t_allgather=0, t_allfinish=0;
108#endif
109 for (int n_act = 1; n_act > 0; ) {
110
111 for (int k=0; k<n_act; ++k) {
112 assert(subdist[k][rank] == d_ranges[k].second - d_ranges[k].first);
113 }
114
115#ifdef PSORTDEBUG
116 MPI_Barrier (MPI_COMM_WORLD);
117 t_begin = MPI_Wtime();
118#endif
119
120 //------- generate n_act guesses
121 // for the allgather, make a flat array of nproc chunks, each with n_act elts
122 _ValueType * mymedians = new _ValueType[n_act];
123 _ValueType * medians = new _ValueType[nproc*n_act];
124 for (int k = 0; k < n_act; ++k) {
125 _ValueType *ptr = d_ranges[k].first;
126 _Distance index = subdist[k][rank] / 2;
127 mymedians[k] = ptr[index];
128 }
129 MPI_Allgather (mymedians, n_act, MPI_valueType, medians, n_act, MPI_valueType, comm);
130 delete [] mymedians;
131
132 // compute the weighted median of medians
133 vector<_ValueType> queries(n_act);
134
135 for (int k = 0; k < n_act; ++k)
136 {
137 // AL: conversion to remove dependence on C99 feature.
138 // original: _Distance ms_perm[n_real];
139 vector<_Distance> ms_perm_v(n_real);
140 _Distance *ms_perm = &ms_perm_v.at(0);
141 for (int i = 0; i < n_real; ++i) ms_perm[i] = i * n_act + k;
142 sort (ms_perm, ms_perm + n_real,
143 PermCompare< _ValueType, _Compare> (medians, comp));
144
145 _Distance mid = accumulate( subdist[k].begin(),
146 subdist[k].end(),
147 0) / 2;
148 _Distance query_ind = -1;
149 for (int i = 0; i < n_real; ++i)
150 {
151 if (subdist[k][ms_perm[i] / n_act] == 0) continue;
152
153 mid -= subdist[k][ms_perm[i] / n_act];
154 if (mid <= 0)
155 {
156 query_ind = ms_perm[i];
157 break;
158 }
159 }
160
161 assert(query_ind >= 0);
162 queries[k] = medians[query_ind];
163 }
164 delete [] medians;
165
166#ifdef PSORTDEBUG
167 MPI_Barrier (MPI_COMM_WORLD);
168 t_query = MPI_Wtime() - t_begin;
169#endif
170
171 //------- find min and max ranks of the guesses
172 // AL: conversion to remove dependence on C99 feature.
173 // original: _Distance ind_local[2 * n_act];
174 vector<_Distance> ind_local_v(2 * n_act);
175 _Distance *ind_local = &ind_local_v.at(0);
176 for (int k = 0; k < n_act; ++k) {
177 pair<_RandomAccessIter, _RandomAccessIter>
178 ind_local_p = equal_range (d_ranges[k].first,
179 d_ranges[k].second,
180 queries[k], comp);
181
182 ind_local[2 * k] = ind_local_p.first - first;
183 ind_local[2 * k + 1] = ind_local_p.second - first;
184 }
185
186#ifdef PSORTDEBUG
187 MPI_Barrier (MPI_COMM_WORLD);
188 t_bsearch = MPI_Wtime() - t_begin - t_query;
189#endif
190
191 // AL: conversion to remove dependence on C99 feature.
192 // original: _Distance ind_all[2 * n_act * nproc];
193 vector<_Distance> ind_all_v(2 * n_act * nproc);
194 _Distance *ind_all = &ind_all_v.at(0);
195 MPI_Allgather (ind_local, 2 * n_act, MPI_distanceType,
196 ind_all, 2 * n_act, MPI_distanceType, comm);
197 // sum to get the global range of indices
198 vector<pair<_Distance, _Distance> > ind_global(n_act);
199 for (int k = 0; k < n_act; ++k) {
200 ind_global[k] = make_pair(0, 0);
201 for (int i = 0; i < nproc; ++i) {
202 ind_global[k].first += ind_all[2 * (i * n_act + k)];
203 ind_global[k].second += ind_all[2 * (i * n_act + k) + 1];
204 }
205 }
206
207#ifdef PSORTDEBUG
208 MPI_Barrier (MPI_COMM_WORLD);
209 t_gather = MPI_Wtime() - t_begin - t_query - t_bsearch;
210#endif
211
212 // state to pass on to next iteration
213 vector< pair<_RandomAccessIter, _RandomAccessIter> > d_ranges_x(nproc - 1);
214 vector< pair<_Distance *, _Distance *> > t_ranges_x(nproc - 1);
215 vector< vector<_Distance> > subdist_x(nproc - 1, vector<_Distance>(nproc));
216 vector< vector<_Distance> > outleft_x(nproc - 1, vector<_Distance>(nproc, 0));
217 int n_act_x = 0;
218
219 for (int k = 0; k < n_act; ++k) {
220 _Distance *split_low = lower_bound (t_ranges[k].first,
221 t_ranges[k].second,
222 ind_global[k].first);
223 _Distance *split_high = upper_bound (t_ranges[k].first,
224 t_ranges[k].second,
225 ind_global[k].second);
226
227 // iterate over targets we hit
228 for (_Distance *s = split_low; s != split_high; ++s) {
229 assert (*s > 0);
230 // a bit sloppy: if more than one target in range, excess won't zero out
231 _Distance excess = *s - ind_global[k].first;
232 // low procs to high take excess for stability
233 for (int i = 0; i < nproc; ++i) {
234 _Distance amount = min (ind_all[2 * (i * n_act + k)] + excess,
235 ind_all[2 * (i * n_act + k) + 1]);
236 right_ends[(s - targets) + 1][i] = amount;
237 excess -= amount - ind_all[2 * (i * n_act + k)];
238 }
239 }
240
241 if ((split_low - t_ranges[k].first) > 0) {
242 t_ranges_x[n_act_x] = make_pair (t_ranges[k].first, split_low);
243 // lop off local_ind_low..end
244 d_ranges_x[n_act_x] = make_pair (d_ranges[k].first,
245 first + ind_local[2 * k]);
246 for (int i = 0; i < nproc; ++i) {
247 subdist_x[n_act_x][i] = ind_all[2 * (i * n_act + k)] - outleft[k][i];
248 outleft_x[n_act_x][i] = outleft[k][i];
249 }
250 ++n_act_x;
251 }
252
253 if ((t_ranges[k].second - split_high) > 0) {
254 t_ranges_x[n_act_x] = make_pair (split_high, t_ranges[k].second);
255 // lop off begin..local_ind_high
256 d_ranges_x[n_act_x] = make_pair (first + ind_local[2 * k + 1],
257 d_ranges[k].second);
258 for (int i = 0; i < nproc; ++i) {
259 subdist_x[n_act_x][i] = outleft[k][i]
260 + subdist[k][i] - ind_all[2 * (i * n_act + k) + 1];
261 outleft_x[n_act_x][i] = ind_all[2 * (i * n_act + k) + 1];
262 }
263 ++n_act_x;
264 }
265 }
266
267 t_ranges = t_ranges_x;
268 d_ranges = d_ranges_x;
269 subdist = subdist_x;
270 outleft = outleft_x;
271 n_act = n_act_x;
272
273#ifdef PSORTDEBUG
274 MPI_Barrier (MPI_COMM_WORLD);
275 t_finish = MPI_Wtime() - t_begin - t_query - t_bsearch - t_gather;
276 t_allquery += t_query;
277 t_allbsearch += t_bsearch;
278 t_allgather += t_gather;
279 t_allfinish += t_finish;
280#endif
281 }
282
283#ifdef PSORTDEBUG
284 if (rank == 0)
285 std::cout << "t_query = " << t_allquery
286 << ", t_bsearch = " << t_allbsearch
287 << ", t_gather = " << t_allgather
288 << ", t_finish = " << t_allfinish << std::endl;
289#endif
290 }
291
292 private:
293 template <typename T, typename _Compare>
294 class PermCompare {
295 private:
296 T *weights;
297 _Compare comp;
298 public:
299 PermCompare(T *w, _Compare c) : weights(w), comp(c) {}
300 bool operator()(int a, int b) {
301 return comp(weights[a], weights[b]);
302 }
303 };
304
305 };
306
307
308 class SampleSplit : public Split<SampleSplit> {
309 public:
311 string s ("Sample splitter");
312 return const_cast<char*>(s.c_str());
313 }
314
315 template<typename _RandomAccessIter, typename _Compare, typename _Distance>
316 void real_split (_RandomAccessIter first,
317 _RandomAccessIter last,
318 _Distance *dist,
319 _Compare comp,
320 vector< vector<_Distance> > &right_ends,
321 MPI_Datatype &MPI_valueType,
322 MPI_Datatype &MPI_distanceType,
323 MPI_Comm comm) {
324
325 int nproc, rank;
326 MPI_Comm_size (comm, &nproc);
327 MPI_Comm_rank (comm, &rank);
328
329 // union of [0, right_end) on each processor produces split_ind total values
330 _Distance targets[nproc-1];
331 partial_sum (dist, dist + (nproc - 1), targets);
332
333 fill (right_ends[0].begin(), right_ends[0].end(), 0);
334 for (int i = 0; i < nproc-1; ++i) {
335 sample_split_iter (first, last, dist, targets[i], comp,
336 right_ends[i + 1],
337 MPI_valueType, MPI_distanceType, comm);
338 }
339 copy (dist, dist + nproc, right_ends[nproc].begin());
340 }
341
342
343 private:
344 // return an integer uniformly distributed from [0, max)
345 inline static int random_number(int max) {
346#ifdef _MSC_VER
347 return (int) (max * (double(rand()) / RAND_MAX));
348#else
349 return (int) (max * drand48());
350#endif
351 }
352
353 template<typename _RandomAccessIter, typename _Compare, typename _Distance>
354 static void sample_split_iter (_RandomAccessIter first,
355 _RandomAccessIter last,
356 _Distance *dist,
357 _Distance target,
358 _Compare comp,
359 vector<_Distance> &split_inds,
360 MPI_Datatype &MPI_valueType,
361 MPI_Datatype &MPI_distanceType,
362 MPI_Comm comm) {
363
364 typedef typename iterator_traits<_RandomAccessIter>::value_type _ValueType;
365
366 int nproc, rank;
367 MPI_Comm_size (comm, &nproc);
368 MPI_Comm_rank (comm, &rank);
369
370 // invariant: subdist[rank] == last - start
371 _Distance subdist[nproc]; // amount of data each proc still has in the search
372 _Distance outleft[nproc]; // keep track of start - first
373 copy (dist, dist + nproc, subdist);
374 fill (outleft, outleft + nproc, 0);
375
376 _RandomAccessIter start = first;
377 _ValueType query;
378 _Distance sampler;
379
380 while (true) {
381 assert(subdist[rank] == last - start);
382
383 // generate a guess
384 // root decides who gets to guess
385 if (!rank) {
386 _Distance distcum[nproc];
387 partial_sum (subdist, subdist + nproc, distcum);
388 // ensure sampler is a rank with data; lower is that first rank
389 _Distance lower = lower_bound(distcum, distcum + nproc, 1) - distcum;
390 sampler = lower_bound (distcum + lower, distcum + nproc,
391 random_number (distcum[nproc - 1]))
392 - distcum;
393 }
394 MPI_Bcast (&sampler, 1, MPI_distanceType, 0, comm);
395
396 // take the guess
397 if (rank == sampler) {
398 query = *(start + random_number (subdist[rank]));
399 }
400 MPI_Bcast (&query, 1, MPI_valueType, sampler, comm);
401
402 // find min and max ranks of the guess
403 pair<_RandomAccessIter, _RandomAccessIter> ind_range_p =
404 equal_range (start, last, query, comp);
405
406 // get the absolute local index of the query
407 _Distance ind_range[2] = {
408 ind_range_p.first - first, ind_range_p.second - first
409 };
410
411 _Distance ind_all[2 * nproc];
412 MPI_Allgather (ind_range, 2, MPI_distanceType,
413 ind_all, 2 , MPI_distanceType, comm);
414
415 // scan to get the global range of indices of the query
416 _Distance g_ind_min = 0, g_ind_max = 0;
417 for (int i = 0; i < nproc; ++i) {
418 g_ind_min += ind_all[2 * i];
419 g_ind_max += ind_all[2 * i + 1];
420 }
421
422 // if target in range, low procs to high take excess for stability
423 if (g_ind_min <= target && target <= g_ind_max) {
424 _Distance excess = target - g_ind_min;
425 for (int i = 0; i < nproc; ++i) {
426 split_inds[i] = min (ind_all[2 * i] + excess, ind_all[2 * i + 1]);
427 excess -= split_inds[i] - ind_all[2 * i];
428 }
429 return;
430 }
431
432 // set up for next iteration of binary search
433 if (target < g_ind_min) {
434 last = first + ind_all[2 * rank];
435 assert(outleft[rank] == start - first);
436 for (int i = 0; i < nproc; ++i) {
437 subdist[i] = ind_all[2 * i] - outleft[i];
438 }
439 } else {
440 start = first + ind_all[2 * rank + 1];
441 for (int i = 0; i < nproc; ++i) {
442 subdist[i] = outleft[i] + subdist[i] - ind_all[2 * i + 1];
443 outleft[i] = ind_all[2 * i + 1];
444 }
445 }
446 }
447 }
448 };
449
450} /* namespace vpsort */
451
452#endif /* PSORT_SPLITTERS_H */
453
void real_split(_RandomAccessIter first, _RandomAccessIter last, _Distance *dist, _Compare comp, vector< vector< _Distance > > &right_ends, MPI_Datatype &MPI_valueType, MPI_Datatype &MPI_distanceType, MPI_Comm comm)
void real_split(_RandomAccessIter first, _RandomAccessIter last, _Distance *dist, _Compare comp, vector< vector< _Distance > > &right_ends, MPI_Datatype &MPI_valueType, MPI_Datatype &MPI_distanceType, MPI_Comm comm)
void split(_RandomAccessIter first, _RandomAccessIter last, _Distance *dist, _Compare comp, vector< vector< _Distance > > &right_ends, MPI_Datatype &MPI_valueType, MPI_Datatype &MPI_distanceType, MPI_Comm comm)
char * description()
int rank
Definition psort.h:28