COMBINATORIAL_BLAS  1.6
psort_splitters.h
Go to the documentation of this file.
1 /*
2 Copyright (c) 2009, David Cheng, Viral B. Shah.
3 
4 Permission is hereby granted, free of charge, to any person obtaining a copy
5 of this software and associated documentation files (the "Software"), to deal
6 in the Software without restriction, including without limitation the rights
7 to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
8 copies of the Software, and to permit persons to whom the Software is
9 furnished to do so, subject to the following conditions:
10 
11 The above copyright notice and this permission notice shall be included in
12 all copies or substantial portions of the Software.
13 
14 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
17 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
19 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
20 THE SOFTWARE.
21 */
22 
23 #ifndef PSORT_SPLITTERS_H
24 #define PSORT_SPLITTERS_H
25 
26 #include "psort_util.h"
27 
28 
29 namespace vpsort {
30  using namespace std;
31 
32  template<typename SplitType>
33  class Split {
34  public:
35  template<typename _RandomAccessIter, typename _Compare, typename _Distance>
36  void split (_RandomAccessIter first,
37  _RandomAccessIter last,
38  _Distance *dist,
39  _Compare comp,
40  vector< vector<_Distance> > &right_ends,
41  MPI_Datatype &MPI_valueType, MPI_Datatype &MPI_distanceType,
42  MPI_Comm comm) {
43 
44  SplitType *s = static_cast<SplitType *>(this);
45  s->real_split(first, last, dist, comp, right_ends,
46  MPI_valueType, MPI_distanceType, comm);
47  }
48 
49  char *description () {
50  SplitType *s = static_cast<SplitType *>(this);
51  return s->real_description ();
52  }
53  };
54 
55  class MedianSplit : public Split<MedianSplit> {
56  public:
57  char *real_description () {
58  string s ("Median splitter");
59  return const_cast<char*>(s.c_str());
60  }
61 
62  template<typename _RandomAccessIter, typename _Compare, typename _Distance>
63  void real_split (_RandomAccessIter first,
64  _RandomAccessIter last,
65  _Distance *dist,
66  _Compare comp,
67  vector< vector<_Distance> > &right_ends,
68  MPI_Datatype &MPI_valueType,
69  MPI_Datatype &MPI_distanceType,
70  MPI_Comm comm) {
71 
72  typedef typename iterator_traits<_RandomAccessIter>::value_type _ValueType;
73 
74  int nproc, rank;
75  MPI_Comm_size (comm, &nproc);
76  MPI_Comm_rank (comm, &rank);
77 
78  int n_real = nproc;
79  for (int i = 0; i < nproc; ++i)
80  if (dist[i] == 0) { n_real = i; break; }
81 
82  copy (dist, dist + nproc, right_ends[nproc].begin());
83 
84  // union of [0, right_end[i+1]) on each processor produces dist[i] total values
85  // AL: conversion to remove dependence on C99 feature.
86  // original: _Distance targets[nproc-1];
87  vector<_Distance> targets_v(nproc-1);
88  _Distance *targets = &targets_v.at(0);
89  partial_sum (dist, dist + (nproc - 1), targets);
90 
91  // keep a list of ranges, trying to "activate" them at each branch
92  vector< pair<_RandomAccessIter, _RandomAccessIter> > d_ranges(nproc - 1);
93  vector< pair<_Distance *, _Distance *> > t_ranges(nproc - 1);
94  d_ranges[0] = pair<_RandomAccessIter, _RandomAccessIter>(first, last);
95  t_ranges[0] = pair<_Distance *, _Distance *>(targets, targets + (nproc - 1));
96 
97  // invariant: subdist[i][rank] == d_ranges[i].second - d_ranges[i].first
98  // amount of data each proc still has in the search
99  vector< vector<_Distance> > subdist(nproc - 1, vector<_Distance>(nproc));
100  copy (dist, dist + nproc, subdist[0].begin());
101 
102  // for each processor, d_ranges - first
103  vector< vector<_Distance> > outleft(nproc - 1, vector<_Distance>(nproc, 0));
104 
105 #ifdef PSORTDEBUG
106  double t_begin=0, t_query=0, t_bsearch=0, t_gather=0, t_finish=0;
107  double t_allquery=0, t_allbsearch=0, t_allgather=0, t_allfinish=0;
108 #endif
109  for (int n_act = 1; n_act > 0; ) {
110 
111  for (int k=0; k<n_act; ++k) {
112  assert(subdist[k][rank] == d_ranges[k].second - d_ranges[k].first);
113  }
114 
115 #ifdef PSORTDEBUG
116  MPI_Barrier (MPI_COMM_WORLD);
117  t_begin = MPI_Wtime();
118 #endif
119 
120  //------- generate n_act guesses
121  // for the allgather, make a flat array of nproc chunks, each with n_act elts
122  _ValueType * mymedians = new _ValueType[n_act];
123  _ValueType * medians = new _ValueType[nproc*n_act];
124  for (int k = 0; k < n_act; ++k) {
125  _ValueType *ptr = d_ranges[k].first;
126  _Distance index = subdist[k][rank] / 2;
127  mymedians[k] = ptr[index];
128  }
129  MPI_Allgather (mymedians, n_act, MPI_valueType, medians, n_act, MPI_valueType, comm);
130  delete [] mymedians;
131 
132  // compute the weighted median of medians
133  vector<_ValueType> queries(n_act);
134 
135  for (int k = 0; k < n_act; ++k)
136  {
137  // AL: conversion to remove dependence on C99 feature.
138  // original: _Distance ms_perm[n_real];
139  vector<_Distance> ms_perm_v(n_real);
140  _Distance *ms_perm = &ms_perm_v.at(0);
141  for (int i = 0; i < n_real; ++i) ms_perm[i] = i * n_act + k;
142  sort (ms_perm, ms_perm + n_real,
143  PermCompare< _ValueType, _Compare> (medians, comp));
144 
145  _Distance mid = accumulate( subdist[k].begin(),
146  subdist[k].end(),
147  0) / 2;
148  _Distance query_ind = -1;
149  for (int i = 0; i < n_real; ++i)
150  {
151  if (subdist[k][ms_perm[i] / n_act] == 0) continue;
152 
153  mid -= subdist[k][ms_perm[i] / n_act];
154  if (mid <= 0)
155  {
156  query_ind = ms_perm[i];
157  break;
158  }
159  }
160 
161  assert(query_ind >= 0);
162  queries[k] = medians[query_ind];
163  }
164  delete [] medians;
165 
166 #ifdef PSORTDEBUG
167  MPI_Barrier (MPI_COMM_WORLD);
168  t_query = MPI_Wtime() - t_begin;
169 #endif
170 
171  //------- find min and max ranks of the guesses
172  // AL: conversion to remove dependence on C99 feature.
173  // original: _Distance ind_local[2 * n_act];
174  vector<_Distance> ind_local_v(2 * n_act);
175  _Distance *ind_local = &ind_local_v.at(0);
176  for (int k = 0; k < n_act; ++k) {
177  pair<_RandomAccessIter, _RandomAccessIter>
178  ind_local_p = equal_range (d_ranges[k].first,
179  d_ranges[k].second,
180  queries[k], comp);
181 
182  ind_local[2 * k] = ind_local_p.first - first;
183  ind_local[2 * k + 1] = ind_local_p.second - first;
184  }
185 
186 #ifdef PSORTDEBUG
187  MPI_Barrier (MPI_COMM_WORLD);
188  t_bsearch = MPI_Wtime() - t_begin - t_query;
189 #endif
190 
191  // AL: conversion to remove dependence on C99 feature.
192  // original: _Distance ind_all[2 * n_act * nproc];
193  vector<_Distance> ind_all_v(2 * n_act * nproc);
194  _Distance *ind_all = &ind_all_v.at(0);
195  MPI_Allgather (ind_local, 2 * n_act, MPI_distanceType,
196  ind_all, 2 * n_act, MPI_distanceType, comm);
197  // sum to get the global range of indices
198  vector<pair<_Distance, _Distance> > ind_global(n_act);
199  for (int k = 0; k < n_act; ++k) {
200  ind_global[k] = make_pair(0, 0);
201  for (int i = 0; i < nproc; ++i) {
202  ind_global[k].first += ind_all[2 * (i * n_act + k)];
203  ind_global[k].second += ind_all[2 * (i * n_act + k) + 1];
204  }
205  }
206 
207 #ifdef PSORTDEBUG
208  MPI_Barrier (MPI_COMM_WORLD);
209  t_gather = MPI_Wtime() - t_begin - t_query - t_bsearch;
210 #endif
211 
212  // state to pass on to next iteration
213  vector< pair<_RandomAccessIter, _RandomAccessIter> > d_ranges_x(nproc - 1);
214  vector< pair<_Distance *, _Distance *> > t_ranges_x(nproc - 1);
215  vector< vector<_Distance> > subdist_x(nproc - 1, vector<_Distance>(nproc));
216  vector< vector<_Distance> > outleft_x(nproc - 1, vector<_Distance>(nproc, 0));
217  int n_act_x = 0;
218 
219  for (int k = 0; k < n_act; ++k) {
220  _Distance *split_low = lower_bound (t_ranges[k].first,
221  t_ranges[k].second,
222  ind_global[k].first);
223  _Distance *split_high = upper_bound (t_ranges[k].first,
224  t_ranges[k].second,
225  ind_global[k].second);
226 
227  // iterate over targets we hit
228  for (_Distance *s = split_low; s != split_high; ++s) {
229  assert (*s > 0);
230  // a bit sloppy: if more than one target in range, excess won't zero out
231  _Distance excess = *s - ind_global[k].first;
232  // low procs to high take excess for stability
233  for (int i = 0; i < nproc; ++i) {
234  _Distance amount = min (ind_all[2 * (i * n_act + k)] + excess,
235  ind_all[2 * (i * n_act + k) + 1]);
236  right_ends[(s - targets) + 1][i] = amount;
237  excess -= amount - ind_all[2 * (i * n_act + k)];
238  }
239  }
240 
241  if ((split_low - t_ranges[k].first) > 0) {
242  t_ranges_x[n_act_x] = make_pair (t_ranges[k].first, split_low);
243  // lop off local_ind_low..end
244  d_ranges_x[n_act_x] = make_pair (d_ranges[k].first,
245  first + ind_local[2 * k]);
246  for (int i = 0; i < nproc; ++i) {
247  subdist_x[n_act_x][i] = ind_all[2 * (i * n_act + k)] - outleft[k][i];
248  outleft_x[n_act_x][i] = outleft[k][i];
249  }
250  ++n_act_x;
251  }
252 
253  if ((t_ranges[k].second - split_high) > 0) {
254  t_ranges_x[n_act_x] = make_pair (split_high, t_ranges[k].second);
255  // lop off begin..local_ind_high
256  d_ranges_x[n_act_x] = make_pair (first + ind_local[2 * k + 1],
257  d_ranges[k].second);
258  for (int i = 0; i < nproc; ++i) {
259  subdist_x[n_act_x][i] = outleft[k][i]
260  + subdist[k][i] - ind_all[2 * (i * n_act + k) + 1];
261  outleft_x[n_act_x][i] = ind_all[2 * (i * n_act + k) + 1];
262  }
263  ++n_act_x;
264  }
265  }
266 
267  t_ranges = t_ranges_x;
268  d_ranges = d_ranges_x;
269  subdist = subdist_x;
270  outleft = outleft_x;
271  n_act = n_act_x;
272 
273 #ifdef PSORTDEBUG
274  MPI_Barrier (MPI_COMM_WORLD);
275  t_finish = MPI_Wtime() - t_begin - t_query - t_bsearch - t_gather;
276  t_allquery += t_query;
277  t_allbsearch += t_bsearch;
278  t_allgather += t_gather;
279  t_allfinish += t_finish;
280 #endif
281  }
282 
283 #ifdef PSORTDEBUG
284  if (rank == 0)
285  std::cout << "t_query = " << t_allquery
286  << ", t_bsearch = " << t_allbsearch
287  << ", t_gather = " << t_allgather
288  << ", t_finish = " << t_allfinish << std::endl;
289 #endif
290  }
291 
292  private:
293  template <typename T, typename _Compare>
294  class PermCompare {
295  private:
296  T *weights;
297  _Compare comp;
298  public:
299  PermCompare(T *w, _Compare c) : weights(w), comp(c) {}
300  bool operator()(int a, int b) {
301  return comp(weights[a], weights[b]);
302  }
303  };
304 
305  };
306 
307 
308  class SampleSplit : public Split<SampleSplit> {
309  public:
310  char *real_description () {
311  string s ("Sample splitter");
312  return const_cast<char*>(s.c_str());
313  }
314 
315  template<typename _RandomAccessIter, typename _Compare, typename _Distance>
316  void real_split (_RandomAccessIter first,
317  _RandomAccessIter last,
318  _Distance *dist,
319  _Compare comp,
320  vector< vector<_Distance> > &right_ends,
321  MPI_Datatype &MPI_valueType,
322  MPI_Datatype &MPI_distanceType,
323  MPI_Comm comm) {
324 
325  int nproc, rank;
326  MPI_Comm_size (comm, &nproc);
327  MPI_Comm_rank (comm, &rank);
328 
329  // union of [0, right_end) on each processor produces split_ind total values
330  _Distance targets[nproc-1];
331  partial_sum (dist, dist + (nproc - 1), targets);
332 
333  fill (right_ends[0].begin(), right_ends[0].end(), 0);
334  for (int i = 0; i < nproc-1; ++i) {
335  sample_split_iter (first, last, dist, targets[i], comp,
336  right_ends[i + 1],
337  MPI_valueType, MPI_distanceType, comm);
338  }
339  copy (dist, dist + nproc, right_ends[nproc].begin());
340  }
341 
342 
343  private:
344  // return an integer uniformly distributed from [0, max)
345  inline static int random_number(int max) {
346 #ifdef _MSC_VER
347  return (int) (max * (double(rand()) / RAND_MAX));
348 #else
349  return (int) (max * drand48());
350 #endif
351  }
352 
353  template<typename _RandomAccessIter, typename _Compare, typename _Distance>
354  static void sample_split_iter (_RandomAccessIter first,
355  _RandomAccessIter last,
356  _Distance *dist,
357  _Distance target,
358  _Compare comp,
359  vector<_Distance> &split_inds,
360  MPI_Datatype &MPI_valueType,
361  MPI_Datatype &MPI_distanceType,
362  MPI_Comm comm) {
363 
364  typedef typename iterator_traits<_RandomAccessIter>::value_type _ValueType;
365 
366  int nproc, rank;
367  MPI_Comm_size (comm, &nproc);
368  MPI_Comm_rank (comm, &rank);
369 
370  // invariant: subdist[rank] == last - start
371  _Distance subdist[nproc]; // amount of data each proc still has in the search
372  _Distance outleft[nproc]; // keep track of start - first
373  copy (dist, dist + nproc, subdist);
374  fill (outleft, outleft + nproc, 0);
375 
376  _RandomAccessIter start = first;
377  _ValueType query;
378  _Distance sampler;
379 
380  while (true) {
381  assert(subdist[rank] == last - start);
382 
383  // generate a guess
384  // root decides who gets to guess
385  if (!rank) {
386  _Distance distcum[nproc];
387  partial_sum (subdist, subdist + nproc, distcum);
388  // ensure sampler is a rank with data; lower is that first rank
389  _Distance lower = lower_bound(distcum, distcum + nproc, 1) - distcum;
390  sampler = lower_bound (distcum + lower, distcum + nproc,
391  random_number (distcum[nproc - 1]))
392  - distcum;
393  }
394  MPI_Bcast (&sampler, 1, MPI_distanceType, 0, comm);
395 
396  // take the guess
397  if (rank == sampler) {
398  query = *(start + random_number (subdist[rank]));
399  }
400  MPI_Bcast (&query, 1, MPI_valueType, sampler, comm);
401 
402  // find min and max ranks of the guess
403  pair<_RandomAccessIter, _RandomAccessIter> ind_range_p =
404  equal_range (start, last, query, comp);
405 
406  // get the absolute local index of the query
407  _Distance ind_range[2] = {
408  ind_range_p.first - first, ind_range_p.second - first
409  };
410 
411  _Distance ind_all[2 * nproc];
412  MPI_Allgather (ind_range, 2, MPI_distanceType,
413  ind_all, 2 , MPI_distanceType, comm);
414 
415  // scan to get the global range of indices of the query
416  _Distance g_ind_min = 0, g_ind_max = 0;
417  for (int i = 0; i < nproc; ++i) {
418  g_ind_min += ind_all[2 * i];
419  g_ind_max += ind_all[2 * i + 1];
420  }
421 
422  // if target in range, low procs to high take excess for stability
423  if (g_ind_min <= target && target <= g_ind_max) {
424  _Distance excess = target - g_ind_min;
425  for (int i = 0; i < nproc; ++i) {
426  split_inds[i] = min (ind_all[2 * i] + excess, ind_all[2 * i + 1]);
427  excess -= split_inds[i] - ind_all[2 * i];
428  }
429  return;
430  }
431 
432  // set up for next iteration of binary search
433  if (target < g_ind_min) {
434  last = first + ind_all[2 * rank];
435  assert(outleft[rank] == start - first);
436  for (int i = 0; i < nproc; ++i) {
437  subdist[i] = ind_all[2 * i] - outleft[i];
438  }
439  } else {
440  start = first + ind_all[2 * rank + 1];
441  for (int i = 0; i < nproc; ++i) {
442  subdist[i] = outleft[i] + subdist[i] - ind_all[2 * i + 1];
443  outleft[i] = ind_all[2 * i + 1];
444  }
445  }
446  }
447  }
448  };
449 
450 } /* namespace vpsort */
451 
452 #endif /* PSORT_SPLITTERS_H */
453 
Definition: psort.h:28
char * description()
void real_split(_RandomAccessIter first, _RandomAccessIter last, _Distance *dist, _Compare comp, vector< vector< _Distance > > &right_ends, MPI_Datatype &MPI_valueType, MPI_Datatype &MPI_distanceType, MPI_Comm comm)
void split(_RandomAccessIter first, _RandomAccessIter last, _Distance *dist, _Compare comp, vector< vector< _Distance > > &right_ends, MPI_Datatype &MPI_valueType, MPI_Datatype &MPI_distanceType, MPI_Comm comm)
int rank
void real_split(_RandomAccessIter first, _RandomAccessIter last, _Distance *dist, _Compare comp, vector< vector< _Distance > > &right_ends, MPI_Datatype &MPI_valueType, MPI_Datatype &MPI_distanceType, MPI_Comm comm)