Boost.Sort
integer_sort.hpp
Go to the documentation of this file.
1 // Details for templated Spreadsort-based integer_sort.
2 
3 // Copyright Steven J. Ross 2001 - 2014.
4 // Distributed under the Boost Software License, Version 1.0.
5 // (See accompanying file LICENSE_1_0.txt or copy at
6 // http://www.boost.org/LICENSE_1_0.txt)
7 
8 // See http://www.boost.org/libs/sort for library home page.
9 
10 /*
11 Some improvements suggested by:
12 Phil Endecott and Frank Gennari
13 */
14 
15 #ifndef BOOST_SORT_SPREADSORT_DETAIL_INTEGER_SORT_HPP
16 #define BOOST_SORT_SPREADSORT_DETAIL_INTEGER_SORT_HPP
17 #include <algorithm>
18 #include <vector>
19 #include <limits>
20 #include <functional>
21 #include <boost/static_assert.hpp>
22 #include <boost/serialization/static_warning.hpp>
23 #include <boost/utility/enable_if.hpp>
26 #include <boost/cstdint.hpp>
27 
28 //! \cond DETAIL
29 
30 namespace boost {
31 namespace sort {
32  namespace detail {
33  // Return true if the list is sorted. Otherwise, find the minimum and
34  // maximum using <.
35  template <class RandomAccessIter>
36  inline bool
37  is_sorted_or_find_extremes(RandomAccessIter current, RandomAccessIter last,
38  RandomAccessIter & max, RandomAccessIter & min)
39  {
40  min = max = current;
41  //This assumes we have more than 1 element based on prior checks.
42  while (!(*(current + 1) < *current)) {
43  //If everything is in sorted order, return
44  if (++current == last - 1)
45  return true;
46  }
47 
48  //The maximum is the last sorted element
49  max = current;
50  //Start from the first unsorted element
51  while (++current < last) {
52  if (*max < *current)
53  max = current;
54  else if (*current < *min)
55  min = current;
56  }
57  return false;
58  }
59 
60  // Return true if the list is sorted. Otherwise, find the minimum and
61  // maximum.
62  // Use a user-defined comparison operator
63  template <class RandomAccessIter, class Compare>
64  inline bool
65  is_sorted_or_find_extremes(RandomAccessIter current, RandomAccessIter last,
66  RandomAccessIter & max, RandomAccessIter & min, Compare comp)
67  {
68  min = max = current;
69  while (!comp(*(current + 1), *current)) {
70  //If everything is in sorted order, return
71  if (++current == last - 1)
72  return true;
73  }
74 
75  //The maximum is the last sorted element
76  max = current;
77  while (++current < last) {
78  if (comp(*max, *current))
79  max = current;
80  else if (comp(*current, *min))
81  min = current;
82  }
83  return false;
84  }
85 
86  //Gets a non-negative right bit shift to operate as a logarithmic divisor
87  template<unsigned log_mean_bin_size>
88  inline int
89  get_log_divisor(size_t count, int log_range)
90  {
91  int log_divisor;
92  //If we can finish in one iteration without exceeding either
93  //(2 to the max_finishing_splits) or n bins, do so
94  if ((log_divisor = log_range - rough_log_2_size(count)) <= 0 &&
95  log_range <= max_finishing_splits)
96  log_divisor = 0;
97  else {
98  //otherwise divide the data into an optimized number of pieces
99  log_divisor += log_mean_bin_size;
100  //Cannot exceed max_splits or cache misses slow down bin lookups
101  if ((log_range - log_divisor) > max_splits)
102  log_divisor = log_range - max_splits;
103  }
104  return log_divisor;
105  }
106 
107  //Implementation for recursive integer sorting
108  template <class RandomAccessIter, class Div_type, class Size_type>
109  inline void
110  spreadsort_rec(RandomAccessIter first, RandomAccessIter last,
111  std::vector<RandomAccessIter> &bin_cache, unsigned cache_offset
112  , size_t *bin_sizes)
113  {
114  //This step is roughly 10% of runtime, but it helps avoid worst-case
115  //behavior and improve behavior with real data
116  //If you know the maximum and minimum ahead of time, you can pass those
117  //values in and skip this step for the first iteration
118  RandomAccessIter max, min;
119  if (is_sorted_or_find_extremes(first, last, max, min))
120  return;
121  RandomAccessIter * target_bin;
122  unsigned log_divisor = get_log_divisor<int_log_mean_bin_size>(
123  last - first, rough_log_2_size(Size_type((*max >> 0) - (*min >> 0))));
124  Div_type div_min = *min >> log_divisor;
125  Div_type div_max = *max >> log_divisor;
126  unsigned bin_count = unsigned(div_max - div_min) + 1;
127  unsigned cache_end;
128  RandomAccessIter * bins =
129  size_bins(bin_sizes, bin_cache, cache_offset, cache_end, bin_count);
130 
131  //Calculating the size of each bin; this takes roughly 10% of runtime
132  for (RandomAccessIter current = first; current != last;)
133  bin_sizes[size_t((*(current++) >> log_divisor) - div_min)]++;
134  //Assign the bin positions
135  bins[0] = first;
136  for (unsigned u = 0; u < bin_count - 1; u++)
137  bins[u + 1] = bins[u] + bin_sizes[u];
138 
139  RandomAccessIter nextbinstart = first;
140  //Swap into place
141  //This dominates runtime, mostly in the swap and bin lookups
142  for (unsigned u = 0; u < bin_count - 1; ++u) {
143  RandomAccessIter * local_bin = bins + u;
144  nextbinstart += bin_sizes[u];
145  //Iterating over each element in this bin
146  for (RandomAccessIter current = *local_bin; current < nextbinstart;
147  ++current) {
148  //Swapping elements in current into place until the correct
149  //element has been swapped in
150  for (target_bin = (bins + ((*current >> log_divisor) - div_min));
151  target_bin != local_bin;
152  target_bin = bins + ((*current >> log_divisor) - div_min)) {
153  //3-way swap; this is about 1% faster than a 2-way swap
154  //The main advantage is less copies are involved per item
155  //put in the correct place
156  typename std::iterator_traits<RandomAccessIter>::value_type tmp;
157  RandomAccessIter b = (*target_bin)++;
158  RandomAccessIter * b_bin = bins + ((*b >> log_divisor) - div_min);
159  if (b_bin != local_bin) {
160  RandomAccessIter c = (*b_bin)++;
161  tmp = *c;
162  *c = *b;
163  }
164  else
165  tmp = *b;
166  *b = *current;
167  *current = tmp;
168  }
169  }
170  *local_bin = nextbinstart;
171  }
172  bins[bin_count - 1] = last;
173 
174  //If we've bucketsorted, the array is sorted and we should skip recursion
175  if (!log_divisor)
176  return;
177  //log_divisor is the remaining range; calculating the comparison threshold
178  size_t max_count =
180  int_log_finishing_count>(log_divisor);
181 
182  //Recursing
183  RandomAccessIter lastPos = first;
184  for (unsigned u = cache_offset; u < cache_end; lastPos = bin_cache[u],
185  ++u) {
186  Size_type count = bin_cache[u] - lastPos;
187  //don't sort unless there are at least two items to Compare
188  if (count < 2)
189  continue;
190  //using std::sort if its worst-case is better
191  if (count < max_count)
192  std::sort(lastPos, bin_cache[u]);
193  else
194  spreadsort_rec<RandomAccessIter, Div_type, Size_type>(lastPos,
195  bin_cache[u],
196  bin_cache,
197  cache_end,
198  bin_sizes);
199  }
200  }
201 
202  //Generic bitshift-based 3-way swapping code
203  template <class RandomAccessIter, class Div_type, class Right_shift>
204  inline void inner_swap_loop(RandomAccessIter * bins,
205  const RandomAccessIter & next_bin_start, unsigned ii, Right_shift &rshift
206  , const unsigned log_divisor, const Div_type div_min)
207  {
208  RandomAccessIter * local_bin = bins + ii;
209  for (RandomAccessIter current = *local_bin; current < next_bin_start;
210  ++current) {
211  for (RandomAccessIter * target_bin =
212  (bins + (rshift(*current, log_divisor) - div_min));
213  target_bin != local_bin;
214  target_bin = bins + (rshift(*current, log_divisor) - div_min)) {
215  typename std::iterator_traits<RandomAccessIter>::value_type tmp;
216  RandomAccessIter b = (*target_bin)++;
217  RandomAccessIter * b_bin =
218  bins + (rshift(*b, log_divisor) - div_min);
219  //Three-way swap; if the item to be swapped doesn't belong
220  //in the current bin, swap it to where it belongs
221  if (b_bin != local_bin) {
222  RandomAccessIter c = (*b_bin)++;
223  tmp = *c;
224  *c = *b;
225  }
226  //Note: we could increment current once the swap is done in this case
227  //but that seems to impair performance
228  else
229  tmp = *b;
230  *b = *current;
231  *current = tmp;
232  }
233  }
234  *local_bin = next_bin_start;
235  }
236 
237  //Standard swapping wrapper for ascending values
238  template <class RandomAccessIter, class Div_type, class Right_shift>
239  inline void swap_loop(RandomAccessIter * bins,
240  RandomAccessIter & next_bin_start, unsigned ii, Right_shift &rshift
241  , const size_t *bin_sizes
242  , const unsigned log_divisor, const Div_type div_min)
243  {
244  next_bin_start += bin_sizes[ii];
245  inner_swap_loop<RandomAccessIter, Div_type, Right_shift>(bins,
246  next_bin_start, ii, rshift, log_divisor, div_min);
247  }
248 
249  //Functor implementation for recursive sorting
250  template <class RandomAccessIter, class Div_type, class Right_shift,
251  class Compare, class Size_type, unsigned log_mean_bin_size,
252  unsigned log_min_split_count, unsigned log_finishing_count>
253  inline void
254  spreadsort_rec(RandomAccessIter first, RandomAccessIter last,
255  std::vector<RandomAccessIter> &bin_cache, unsigned cache_offset
256  , size_t *bin_sizes, Right_shift rshift, Compare comp)
257  {
258  RandomAccessIter max, min;
259  if (is_sorted_or_find_extremes(first, last, max, min, comp))
260  return;
261  unsigned log_divisor = get_log_divisor<log_mean_bin_size>(last - first,
262  rough_log_2_size(Size_type(rshift(*max, 0) - rshift(*min, 0))));
263  Div_type div_min = rshift(*min, log_divisor);
264  Div_type div_max = rshift(*max, log_divisor);
265  unsigned bin_count = unsigned(div_max - div_min) + 1;
266  unsigned cache_end;
267  RandomAccessIter * bins = size_bins(bin_sizes, bin_cache, cache_offset,
268  cache_end, bin_count);
269 
270  //Calculating the size of each bin
271  for (RandomAccessIter current = first; current != last;)
272  bin_sizes[unsigned(rshift(*(current++), log_divisor) - div_min)]++;
273  bins[0] = first;
274  for (unsigned u = 0; u < bin_count - 1; u++)
275  bins[u + 1] = bins[u] + bin_sizes[u];
276 
277  //Swap into place
278  RandomAccessIter next_bin_start = first;
279  for (unsigned u = 0; u < bin_count - 1; ++u)
280  swap_loop<RandomAccessIter, Div_type, Right_shift>(bins, next_bin_start,
281  u, rshift, bin_sizes, log_divisor, div_min);
282  bins[bin_count - 1] = last;
283 
284  //If we've bucketsorted, the array is sorted
285  if (!log_divisor)
286  return;
287 
288  //Recursing
289  size_t max_count = get_min_count<log_mean_bin_size, log_min_split_count,
290  log_finishing_count>(log_divisor);
291  RandomAccessIter lastPos = first;
292  for (unsigned u = cache_offset; u < cache_end; lastPos = bin_cache[u],
293  ++u) {
294  size_t count = bin_cache[u] - lastPos;
295  if (count < 2)
296  continue;
297  if (count < max_count)
298  std::sort(lastPos, bin_cache[u], comp);
299  else
300  spreadsort_rec<RandomAccessIter, Div_type, Right_shift, Compare,
301  Size_type, log_mean_bin_size, log_min_split_count, log_finishing_count>
302  (lastPos, bin_cache[u], bin_cache, cache_end, bin_sizes, rshift, comp);
303  }
304  }
305 
306  //Functor implementation for recursive sorting with only Shift overridden
307  template <class RandomAccessIter, class Div_type, class Right_shift,
308  class Size_type, unsigned log_mean_bin_size,
309  unsigned log_min_split_count, unsigned log_finishing_count>
310  inline void
311  spreadsort_rec(RandomAccessIter first, RandomAccessIter last,
312  std::vector<RandomAccessIter> &bin_cache, unsigned cache_offset
313  , size_t *bin_sizes, Right_shift rshift)
314  {
315  RandomAccessIter max, min;
316  if (is_sorted_or_find_extremes(first, last, max, min))
317  return;
318  unsigned log_divisor = get_log_divisor<log_mean_bin_size>(last - first,
319  rough_log_2_size(Size_type(rshift(*max, 0) - rshift(*min, 0))));
320  Div_type div_min = rshift(*min, log_divisor);
321  Div_type div_max = rshift(*max, log_divisor);
322  unsigned bin_count = unsigned(div_max - div_min) + 1;
323  unsigned cache_end;
324  RandomAccessIter * bins = size_bins(bin_sizes, bin_cache, cache_offset,
325  cache_end, bin_count);
326 
327  //Calculating the size of each bin
328  for (RandomAccessIter current = first; current != last;)
329  bin_sizes[unsigned(rshift(*(current++), log_divisor) - div_min)]++;
330  bins[0] = first;
331  for (unsigned u = 0; u < bin_count - 1; u++)
332  bins[u + 1] = bins[u] + bin_sizes[u];
333 
334  //Swap into place
335  RandomAccessIter nextbinstart = first;
336  for (unsigned ii = 0; ii < bin_count - 1; ++ii)
337  swap_loop<RandomAccessIter, Div_type, Right_shift>(bins, nextbinstart,
338  ii, rshift, bin_sizes, log_divisor, div_min);
339  bins[bin_count - 1] = last;
340 
341  //If we've bucketsorted, the array is sorted
342  if (!log_divisor)
343  return;
344 
345  //Recursing
346  size_t max_count = get_min_count<log_mean_bin_size, log_min_split_count,
347  log_finishing_count>(log_divisor);
348  RandomAccessIter lastPos = first;
349  for (unsigned u = cache_offset; u < cache_end; lastPos = bin_cache[u],
350  ++u) {
351  size_t count = bin_cache[u] - lastPos;
352  if (count < 2)
353  continue;
354  if (count < max_count)
355  std::sort(lastPos, bin_cache[u]);
356  else
357  spreadsort_rec<RandomAccessIter, Div_type, Right_shift, Size_type,
358  log_mean_bin_size, log_min_split_count, log_finishing_count>(lastPos,
359  bin_cache[u], bin_cache, cache_end, bin_sizes, rshift);
360  }
361  }
362 
363  //Holds the bin vector and makes the initial recursive call
364  template <class RandomAccessIter, class Div_type>
365  //Only use spreadsort if the integer can fit in a size_t
366  inline typename boost::enable_if_c< sizeof(Div_type) <= sizeof(size_t),
367  void >::type
368  integer_sort(RandomAccessIter first, RandomAccessIter last, Div_type)
369  {
370  size_t bin_sizes[1 << max_finishing_splits];
371  std::vector<RandomAccessIter> bin_cache;
372  spreadsort_rec<RandomAccessIter, Div_type, size_t>(first, last,
373  bin_cache, 0, bin_sizes);
374  }
375 
376  //Holds the bin vector and makes the initial recursive call
377  template <class RandomAccessIter, class Div_type>
378  //Only use spreadsort if the integer can fit in a uintmax_t
379  inline typename boost::enable_if_c< (sizeof(Div_type) > sizeof(size_t))
380  && sizeof(Div_type) <= sizeof(boost::uintmax_t), void >::type
381  integer_sort(RandomAccessIter first, RandomAccessIter last, Div_type)
382  {
383  size_t bin_sizes[1 << max_finishing_splits];
384  std::vector<RandomAccessIter> bin_cache;
385  spreadsort_rec<RandomAccessIter, Div_type, boost::uintmax_t>(first,
386  last, bin_cache, 0, bin_sizes);
387  }
388 
389  template <class RandomAccessIter, class Div_type>
390  inline typename boost::disable_if_c< sizeof(Div_type) <= sizeof(size_t)
391  || sizeof(Div_type) <= sizeof(boost::uintmax_t), void >::type
392  //defaulting to std::sort when integer_sort won't work
393  integer_sort(RandomAccessIter first, RandomAccessIter last, Div_type)
394  {
395  //Warning that we're using std::sort, even though integer_sort was called
396  BOOST_STATIC_WARNING( sizeof(Div_type) <= sizeof(size_t) );
397  std::sort(first, last);
398  }
399 
400 
401  //Same for the full functor version
402  template <class RandomAccessIter, class Div_type, class Right_shift,
403  class Compare>
404  //Only use spreadsort if the integer can fit in a size_t
405  inline typename boost::enable_if_c< sizeof(Div_type) <= sizeof(size_t),
406  void >::type
407  integer_sort(RandomAccessIter first, RandomAccessIter last, Div_type,
408  Right_shift shift, Compare comp)
409  {
410  size_t bin_sizes[1 << max_finishing_splits];
411  std::vector<RandomAccessIter> bin_cache;
412  spreadsort_rec<RandomAccessIter, Div_type, Right_shift, Compare,
415  (first, last, bin_cache, 0, bin_sizes, shift, comp);
416  }
417 
418  template <class RandomAccessIter, class Div_type, class Right_shift,
419  class Compare>
420  //Only use spreadsort if the integer can fit in a uintmax_t
421  inline typename boost::enable_if_c< (sizeof(Div_type) > sizeof(size_t))
422  && sizeof(Div_type) <= sizeof(boost::uintmax_t), void >::type
423  integer_sort(RandomAccessIter first, RandomAccessIter last, Div_type,
424  Right_shift shift, Compare comp)
425  {
426  size_t bin_sizes[1 << max_finishing_splits];
427  std::vector<RandomAccessIter> bin_cache;
428  spreadsort_rec<RandomAccessIter, Div_type, Right_shift, Compare,
429  boost::uintmax_t, int_log_mean_bin_size,
431  (first, last, bin_cache, 0, bin_sizes, shift, comp);
432  }
433 
434  template <class RandomAccessIter, class Div_type, class Right_shift,
435  class Compare>
436  inline typename boost::disable_if_c< sizeof(Div_type) <= sizeof(size_t)
437  || sizeof(Div_type) <= sizeof(boost::uintmax_t), void >::type
438  //defaulting to std::sort when integer_sort won't work
439  integer_sort(RandomAccessIter first, RandomAccessIter last, Div_type,
440  Right_shift shift, Compare comp)
441  {
442  //Warning that we're using std::sort, even though integer_sort was called
443  BOOST_STATIC_WARNING( sizeof(Div_type) <= sizeof(size_t) );
444  std::sort(first, last, comp);
445  }
446 
447 
448  //Same for the right shift version
449  template <class RandomAccessIter, class Div_type, class Right_shift>
450  //Only use spreadsort if the integer can fit in a size_t
451  inline typename boost::enable_if_c< sizeof(Div_type) <= sizeof(size_t),
452  void >::type
453  integer_sort(RandomAccessIter first, RandomAccessIter last, Div_type,
454  Right_shift shift)
455  {
456  size_t bin_sizes[1 << max_finishing_splits];
457  std::vector<RandomAccessIter> bin_cache;
458  spreadsort_rec<RandomAccessIter, Div_type, Right_shift, size_t,
461  (first, last, bin_cache, 0, bin_sizes, shift);
462  }
463 
464  template <class RandomAccessIter, class Div_type, class Right_shift>
465  //Only use spreadsort if the integer can fit in a uintmax_t
466  inline typename boost::enable_if_c< (sizeof(Div_type) > sizeof(size_t))
467  && sizeof(Div_type) <= sizeof(boost::uintmax_t), void >::type
468  integer_sort(RandomAccessIter first, RandomAccessIter last, Div_type,
469  Right_shift shift)
470  {
471  size_t bin_sizes[1 << max_finishing_splits];
472  std::vector<RandomAccessIter> bin_cache;
473  spreadsort_rec<RandomAccessIter, Div_type, Right_shift,
474  boost::uintmax_t, int_log_mean_bin_size,
476  (first, last, bin_cache, 0, bin_sizes, shift);
477  }
478 
479  template <class RandomAccessIter, class Div_type, class Right_shift>
480  inline typename boost::disable_if_c< sizeof(Div_type) <= sizeof(size_t)
481  || sizeof(Div_type) <= sizeof(boost::uintmax_t), void >::type
482  //defaulting to std::sort when integer_sort won't work
483  integer_sort(RandomAccessIter first, RandomAccessIter last, Div_type,
484  Right_shift shift)
485  {
486  //Warning that we're using std::sort, even though integer_sort was called
487  BOOST_STATIC_WARNING( sizeof(Div_type) <= sizeof(size_t) );
488  std::sort(first, last);
489  }
490  }
491 }
492 }
493 //! \endcond
494 
495 #endif
496 
void integer_sort(RandomAccessIter first, RandomAccessIter last)
Integer sort algorithm using random access iterators. (All variants fall back to std::sort if the dat...
Definition: integer_sort.hpp:78
Definition: constants.hpp:11
Definition: constants.hpp:22
size_t get_min_count(unsigned log_range)
Definition: spreadsort_common.hpp:51
Definition: constants.hpp:19
unsigned rough_log_2_size(const T &input)
Definition: spreadsort_common.hpp:34
bool is_sorted_or_find_extremes(RandomAccessIter current, RandomAccessIter last, Div_type &max, Div_type &min, Right_shift rshift)
Definition: float_sort.hpp:54
RandomAccessIter * size_bins(size_t *bin_sizes, std::vector< RandomAccessIter > &bin_cache, unsigned cache_offset, unsigned &cache_end, unsigned bin_count)
Definition: spreadsort_common.hpp:106
Definition: constants.hpp:24