Boost.Sort
spreadsort_common.hpp
Go to the documentation of this file.
1 // Contains get_min_count, the core optimization of the spreadsort algorithm.
2 // Also has other helper functions commonly useful across variants.
3 
4 // Copyright Steven J. Ross 2001 - 2014.
5 // Distributed under the Boost Software License, Version 1.0.
6 // (See accompanying file LICENSE_1_0.txt or copy at
7 // http://www.boost.org/LICENSE_1_0.txt)
8 
9 // See http://www.boost.org/libs/sort for library home page.
10 
11 /*
12 Some improvements suggested by:
13 Phil Endecott and Frank Gennari
14 */
15 
16 #ifndef BOOST_SORT_SPREADSORT_DETAIL_SPREAD_SORT_COMMON_HPP
17 #define BOOST_SORT_SPREADSORT_DETAIL_SPREAD_SORT_COMMON_HPP
18 #include <algorithm>
19 #include <vector>
20 #include <cstring>
21 #include <limits>
22 #include <functional>
23 #include <boost/static_assert.hpp>
24 #include <boost/serialization/static_warning.hpp>
26 #include <boost/cstdint.hpp>
27 
28 namespace boost {
29 namespace sort {
30  namespace detail {
31  //This only works on unsigned data types
32  template <typename T>
33  inline unsigned
34  rough_log_2_size(const T& input)
35  {
36  unsigned result = 0;
37  //The && is necessary on some compilers to avoid infinite loops
38  //it doesn't significantly impair performance
39  while ((input >> result) && (result < (8*sizeof(T)))) ++result;
40  return result;
41  }
42 
43  //Gets the minimum size to call spreadsort on to control worst-case runtime.
44  //This is called for a set of bins, instead of bin-by-bin, to minimize
45  //runtime overhead.
46  //This could be replaced by a lookup table of sizeof(Div_type)*8 but this
47  //function is more general.
48  template<unsigned log_mean_bin_size,
49  unsigned log_min_split_count, unsigned log_finishing_count>
50  inline size_t
51  get_min_count(unsigned log_range)
52  {
53  const size_t typed_one = 1;
54  const unsigned min_size = log_mean_bin_size + log_min_split_count;
55  //Assuring that constants have valid settings
56  BOOST_STATIC_ASSERT(log_min_split_count <= max_splits &&
57  log_min_split_count > 0);
58  BOOST_STATIC_ASSERT(max_splits > 1 &&
59  max_splits < (8 * sizeof(unsigned)));
60  BOOST_STATIC_ASSERT(max_finishing_splits >= max_splits &&
61  max_finishing_splits < (8 * sizeof(unsigned)));
62  BOOST_STATIC_ASSERT(log_mean_bin_size >= 0);
63  BOOST_STATIC_ASSERT(log_finishing_count >= 0);
64  //if we can complete in one iteration, do so
65  //This first check allows the compiler to optimize never-executed code out
66  if (log_finishing_count < min_size) {
67  if (log_range <= min_size && log_range <= max_splits) {
68  //Return no smaller than a certain minimum limit
69  if (log_range <= log_finishing_count)
70  return typed_one << log_finishing_count;
71  return typed_one << log_range;
72  }
73  }
74  const unsigned base_iterations = max_splits - log_min_split_count;
75  //sum of n to n + x = ((x + 1) * (n + (n + x)))/2 + log_mean_bin_size
76  const unsigned base_range =
77  ((base_iterations + 1) * (max_splits + log_min_split_count))/2
78  + log_mean_bin_size;
79  //Calculating the required number of iterations, and returning
80  //1 << (iteration_count + min_size)
81  if (log_range < base_range) {
82  unsigned result = log_min_split_count;
83  for (unsigned offset = min_size; offset < log_range;
84  offset += ++result);
85  //Preventing overflow; this situation shouldn't occur
86  if ((result + log_mean_bin_size) >= (8 * sizeof(size_t)))
87  return typed_one << ((8 * sizeof(size_t)) - 1);
88  return typed_one << (result + log_mean_bin_size);
89  }
90  //A quick division can calculate the worst-case runtime for larger ranges
91  unsigned remainder = log_range - base_range;
92  //the max_splits - 1 is used to calculate the ceiling of the division
93  unsigned bit_length = ((((max_splits - 1) + remainder)/max_splits)
94  + base_iterations + min_size);
95  //Preventing overflow; this situation shouldn't occur
96  if (bit_length >= (8 * sizeof(size_t)))
97  return typed_one << ((8 * sizeof(size_t)) - 1);
98  //n(log_range)/max_splits + C, optimizing worst-case performance
99  return typed_one << bit_length;
100  }
101 
102  // Resizes the bin cache and bin sizes, and initializes each bin size to 0.
103  // This generates the memory overhead to use in radix sorting.
104  template <class RandomAccessIter>
105  inline RandomAccessIter *
106  size_bins(size_t *bin_sizes, std::vector<RandomAccessIter>
107  &bin_cache, unsigned cache_offset, unsigned &cache_end, unsigned bin_count)
108  {
109  // Clear the bin sizes
110  for (size_t u = 0; u < bin_count; u++)
111  bin_sizes[u] = 0;
112  //Make sure there is space for the bins
113  cache_end = cache_offset + bin_count;
114  if (cache_end > bin_cache.size())
115  bin_cache.resize(cache_end);
116  return &(bin_cache[cache_offset]);
117  }
118  }
119 }
120 }
121 
122 #endif
Definition: constants.hpp:11
Definition: constants.hpp:22
size_t get_min_count(unsigned log_range)
Definition: spreadsort_common.hpp:51
Definition: constants.hpp:19
unsigned rough_log_2_size(const T &input)
Definition: spreadsort_common.hpp:34
RandomAccessIter * size_bins(size_t *bin_sizes, std::vector< RandomAccessIter > &bin_cache, unsigned cache_offset, unsigned &cache_end, unsigned bin_count)
Definition: spreadsort_common.hpp:106