182 _Compare comp,
long *dist,
187 typedef typename iterator_traits<_RandomAccessIter>::value_type _ValueType;
188 typedef typename iterator_traits<_RandomAccessIter>::difference_type _Distance;
191 MPI_Comm_size(comm, &nproc);
192 MPI_Comm_rank(comm, &
rank);
194 MPI_Datatype MPI_valueType, MPI_distanceType;
195 MPI_Type_contiguous (
sizeof(_ValueType), MPI_CHAR, &MPI_valueType);
196 MPI_Type_commit (&MPI_valueType);
197 MPI_Type_contiguous (
sizeof(_Distance), MPI_CHAR, &MPI_distanceType);
198 MPI_Type_commit (&MPI_distanceType);
200 _Distance n_loc = last - first;
202 progress (
rank, 0,
"Sample splitters", comm);
206 _ValueType *s_splitters =
new _ValueType[n_samp];
207 sample_splitters(first, n_loc, s_splitters, n_samp);
210 long n_parts = nproc * k - 1;
211 _ValueType *p_splitters =
new _ValueType[n_parts];
213 MPI_Gather(s_splitters, n_samp, MPI_valueType,
214 NULL, 0, MPI_valueType, 0, comm);
216 _ValueType *gath_splitters =
new _ValueType[nproc * n_samp];
217 MPI_Gather(s_splitters, n_samp, MPI_valueType,
218 gath_splitters, n_samp, MPI_valueType,
222 sort(gath_splitters, gath_splitters + (nproc * n_samp), comp);
225 double stride = (double) (nproc * n_samp) / (nproc * k);
226 for (
int i=0; i<n_parts; ++i) {
227 p_splitters[i] = gath_splitters[(int) (stride * (
double) (i + 1))];
229 delete [] gath_splitters;
232 MPI_Bcast(p_splitters, n_parts, MPI_valueType, 0, comm);
234 delete [] s_splitters;
236 progress (
rank, 1,
"Partition", comm);
241 _Distance *part_ind =
new _Distance[n_loc];
242 long *part_lsize =
new long[n_parts + 1];
243 for (
long i=0; i<=n_parts; ++i) part_lsize[i] = 0;
245 for (
long i=0; i<n_loc; ++i) {
247 part_ind[i] = lower_bound(p_splitters, p_splitters + n_parts, *(first + i))
249 ++part_lsize[part_ind[i]];
252 delete [] p_splitters;
254 long *boundaries =
new long[nproc - 1];
256 MPI_Reduce(part_lsize, NULL, n_parts + 1, MPI_LONG, MPI_SUM, 0, comm);
259 long *part_gsize =
new long[n_parts + 1];
260 MPI_Reduce(part_lsize, part_gsize, n_parts + 1, MPI_LONG, MPI_SUM, 0, comm);
261 set_splitters (part_gsize, n_parts + 1, nproc, dist, boundaries);
262 delete [] part_gsize;
264 MPI_Bcast(boundaries, nproc - 1, MPI_LONG, 0, comm);
268 long *part_proc =
new long[n_parts + 1];
269 int out_counts[nproc];
270 for (
int i=0; i<nproc; ++i) out_counts[i] = 0;
272 for (
long i=0; i<=n_parts; ++i) {
273 if (cur < nproc - 1 && i > boundaries[cur]) {
277 out_counts[cur] += part_lsize[i];
280 delete [] boundaries;
281 delete [] part_lsize;
284 _ValueType **s_bufs =
new _ValueType*[nproc];
285 for (
int i=0; i<nproc; ++i) {
287 s_bufs[i] =
new _ValueType[out_counts[i]];
289 for (
int i=0; i<n_loc; ++i) {
290 int dest = part_proc[part_ind[i]];
291 s_bufs[dest][curs[dest]++] = *(first + i);
297 progress (
rank, 2,
"Alltoall", comm);
300 int in_counts[nproc];
301 MPI_Alltoall(out_counts, 1, MPI_INT, in_counts, 1, MPI_INT, comm);
303 int offsets[nproc + 1];
305 for (
int i=1; i<=nproc; ++i) {
306 offsets[i] = offsets[i-1] + in_counts[i-1];
308 int n_loct = offsets[nproc];
310 MPI_Request send_reqs[nproc];
311 MPI_Request recv_reqs[nproc];
312 _ValueType *trans_data =
new _ValueType[n_loct];
314 for (
int i=0; i<nproc; ++i) {
315 MPI_Irecv(&trans_data[offsets[i]], in_counts[i],
318 for (
int i=1; i<=nproc; ++i) {
319 int dest = (
rank + i) % nproc;
320 MPI_Isend(s_bufs[dest], out_counts[dest], MPI_valueType,
324 MPI_Waitall(nproc, send_reqs, MPI_STATUS_IGNORE);
325 for (
int i=0; i < nproc; ++i) {
330 MPI_Waitall (nproc, recv_reqs, MPI_STATUS_IGNORE);
332 progress (
rank, 3,
"Sequential sort", comm);
334 mysort.
seqsort (trans_data, trans_data + n_loct, comp);
336 progress (
rank, 4,
"Adjust boundaries", comm);
339 int trans_dist[nproc], dist_out[nproc];
340 MPI_Allgather(&n_loct, 1, MPI_INT, trans_dist, 1, MPI_INT, comm);
341 for (
int i=0; i<nproc; ++i) dist_out[i] = dist[i];
342 redist(trans_dist, trans_data, dist_out, first,
rank, nproc, MPI_valueType, comm);
343 delete [] trans_data;
345 progress (
rank, 5,
"Finish", comm);
347 MPI_Type_free (&MPI_valueType);