64 _RandomAccessIter last,
67 vector< vector<_Distance> > &right_ends,
68 MPI_Datatype &MPI_valueType,
69 MPI_Datatype &MPI_distanceType,
72 typedef typename iterator_traits<_RandomAccessIter>::value_type _ValueType;
75 MPI_Comm_size (comm, &nproc);
76 MPI_Comm_rank (comm, &
rank);
79 for (
int i = 0; i < nproc; ++i)
80 if (dist[i] == 0) { n_real = i;
break; }
82 copy (dist, dist + nproc, right_ends[nproc].begin());
87 vector<_Distance> targets_v(nproc-1);
88 _Distance *targets = &targets_v.at(0);
89 partial_sum (dist, dist + (nproc - 1), targets);
92 vector< pair<_RandomAccessIter, _RandomAccessIter> > d_ranges(nproc - 1);
93 vector< pair<_Distance *, _Distance *> > t_ranges(nproc - 1);
94 d_ranges[0] = pair<_RandomAccessIter, _RandomAccessIter>(first, last);
95 t_ranges[0] = pair<_Distance *, _Distance *>(targets, targets + (nproc - 1));
99 vector< vector<_Distance> > subdist(nproc - 1, vector<_Distance>(nproc));
100 copy (dist, dist + nproc, subdist[0].begin());
103 vector< vector<_Distance> > outleft(nproc - 1, vector<_Distance>(nproc, 0));
106 double t_begin=0, t_query=0, t_bsearch=0, t_gather=0, t_finish=0;
107 double t_allquery=0, t_allbsearch=0, t_allgather=0, t_allfinish=0;
109 for (
int n_act = 1; n_act > 0; ) {
111 for (
int k=0; k<n_act; ++k) {
112 assert(subdist[k][
rank] == d_ranges[k].second - d_ranges[k].first);
116 MPI_Barrier (MPI_COMM_WORLD);
117 t_begin = MPI_Wtime();
122 _ValueType * mymedians =
new _ValueType[n_act];
123 _ValueType * medians =
new _ValueType[nproc*n_act];
124 for (
int k = 0; k < n_act; ++k) {
125 _ValueType *ptr = d_ranges[k].first;
126 _Distance index = subdist[k][
rank] / 2;
127 mymedians[k] = ptr[index];
129 MPI_Allgather (mymedians, n_act, MPI_valueType, medians, n_act, MPI_valueType, comm);
133 vector<_ValueType> queries(n_act);
135 for (
int k = 0; k < n_act; ++k)
139 vector<_Distance> ms_perm_v(n_real);
140 _Distance *ms_perm = &ms_perm_v.at(0);
141 for (
int i = 0; i < n_real; ++i) ms_perm[i] = i * n_act + k;
142 sort (ms_perm, ms_perm + n_real,
143 PermCompare< _ValueType, _Compare> (medians, comp));
145 _Distance mid = accumulate( subdist[k].begin(),
148 _Distance query_ind = -1;
149 for (
int i = 0; i < n_real; ++i)
151 if (subdist[k][ms_perm[i] / n_act] == 0)
continue;
153 mid -= subdist[k][ms_perm[i] / n_act];
156 query_ind = ms_perm[i];
161 assert(query_ind >= 0);
162 queries[k] = medians[query_ind];
167 MPI_Barrier (MPI_COMM_WORLD);
168 t_query = MPI_Wtime() - t_begin;
174 vector<_Distance> ind_local_v(2 * n_act);
175 _Distance *ind_local = &ind_local_v.at(0);
176 for (
int k = 0; k < n_act; ++k) {
177 pair<_RandomAccessIter, _RandomAccessIter>
178 ind_local_p = equal_range (d_ranges[k].first,
182 ind_local[2 * k] = ind_local_p.first - first;
183 ind_local[2 * k + 1] = ind_local_p.second - first;
187 MPI_Barrier (MPI_COMM_WORLD);
188 t_bsearch = MPI_Wtime() - t_begin - t_query;
193 vector<_Distance> ind_all_v(2 * n_act * nproc);
194 _Distance *ind_all = &ind_all_v.at(0);
195 MPI_Allgather (ind_local, 2 * n_act, MPI_distanceType,
196 ind_all, 2 * n_act, MPI_distanceType, comm);
198 vector<pair<_Distance, _Distance> > ind_global(n_act);
199 for (
int k = 0; k < n_act; ++k) {
200 ind_global[k] = make_pair(0, 0);
201 for (
int i = 0; i < nproc; ++i) {
202 ind_global[k].first += ind_all[2 * (i * n_act + k)];
203 ind_global[k].second += ind_all[2 * (i * n_act + k) + 1];
208 MPI_Barrier (MPI_COMM_WORLD);
209 t_gather = MPI_Wtime() - t_begin - t_query - t_bsearch;
213 vector< pair<_RandomAccessIter, _RandomAccessIter> > d_ranges_x(nproc - 1);
214 vector< pair<_Distance *, _Distance *> > t_ranges_x(nproc - 1);
215 vector< vector<_Distance> > subdist_x(nproc - 1, vector<_Distance>(nproc));
216 vector< vector<_Distance> > outleft_x(nproc - 1, vector<_Distance>(nproc, 0));
219 for (
int k = 0; k < n_act; ++k) {
220 _Distance *split_low = lower_bound (t_ranges[k].first,
222 ind_global[k].first);
223 _Distance *split_high = upper_bound (t_ranges[k].first,
225 ind_global[k].second);
228 for (_Distance *s = split_low; s != split_high; ++s) {
231 _Distance excess = *s - ind_global[k].first;
233 for (
int i = 0; i < nproc; ++i) {
234 _Distance amount = min (ind_all[2 * (i * n_act + k)] + excess,
235 ind_all[2 * (i * n_act + k) + 1]);
236 right_ends[(s - targets) + 1][i] = amount;
237 excess -= amount - ind_all[2 * (i * n_act + k)];
241 if ((split_low - t_ranges[k].first) > 0) {
242 t_ranges_x[n_act_x] = make_pair (t_ranges[k].first, split_low);
244 d_ranges_x[n_act_x] = make_pair (d_ranges[k].first,
245 first + ind_local[2 * k]);
246 for (
int i = 0; i < nproc; ++i) {
247 subdist_x[n_act_x][i] = ind_all[2 * (i * n_act + k)] - outleft[k][i];
248 outleft_x[n_act_x][i] = outleft[k][i];
253 if ((t_ranges[k].second - split_high) > 0) {
254 t_ranges_x[n_act_x] = make_pair (split_high, t_ranges[k].second);
256 d_ranges_x[n_act_x] = make_pair (first + ind_local[2 * k + 1],
258 for (
int i = 0; i < nproc; ++i) {
259 subdist_x[n_act_x][i] = outleft[k][i]
260 + subdist[k][i] - ind_all[2 * (i * n_act + k) + 1];
261 outleft_x[n_act_x][i] = ind_all[2 * (i * n_act + k) + 1];
267 t_ranges = t_ranges_x;
268 d_ranges = d_ranges_x;
274 MPI_Barrier (MPI_COMM_WORLD);
275 t_finish = MPI_Wtime() - t_begin - t_query - t_bsearch - t_gather;
276 t_allquery += t_query;
277 t_allbsearch += t_bsearch;
278 t_allgather += t_gather;
279 t_allfinish += t_finish;
285 std::cout <<
"t_query = " << t_allquery
286 <<
", t_bsearch = " << t_allbsearch
287 <<
", t_gather = " << t_allgather
288 <<
", t_finish = " << t_allfinish << std::endl;