spreadsort_common.hpp 4.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125
  1. // Contains get_min_count, the core optimization of the spreadsort algorithm.
  2. // Also has other helper functions commonly useful across variants.
  3. // Copyright Steven J. Ross 2001 - 2014.
  4. // Distributed under the Boost Software License, Version 1.0.
  5. // (See accompanying file LICENSE_1_0.txt or copy at
  6. // http://www.boost.org/LICENSE_1_0.txt)
  7. // See http://www.boost.org/libs/sort for library home page.
  8. /*
  9. Some improvements suggested by:
  10. Phil Endecott and Frank Gennari
  11. */
  12. #ifndef BOOST_SORT_SPREADSORT_DETAIL_SPREAD_SORT_COMMON_HPP
  13. #define BOOST_SORT_SPREADSORT_DETAIL_SPREAD_SORT_COMMON_HPP
  14. #include <algorithm>
  15. #include <vector>
  16. #include <cstring>
  17. #include <limits>
  18. #include <functional>
  19. #include <boost/static_assert.hpp>
  20. #include <boost/serialization/static_warning.hpp>
  21. #include <boost/sort/pdqsort/pdqsort.hpp>
  22. #include <boost/sort/spreadsort/detail/constants.hpp>
  23. #include <boost/cstdint.hpp>
  24. namespace boost {
  25. namespace sort {
  26. namespace spreadsort {
  27. namespace detail {
  28. //This only works on unsigned data types
  29. template <typename T>
  30. inline unsigned
  31. rough_log_2_size(const T& input)
  32. {
  33. unsigned result = 0;
  34. //The && is necessary on some compilers to avoid infinite loops
  35. //it doesn't significantly impair performance
  36. while ((result < (8*sizeof(T))) && (input >> result)) ++result;
  37. return result;
  38. }
  39. //Gets the minimum size to call spreadsort on to control worst-case runtime.
  40. //This is called for a set of bins, instead of bin-by-bin, to minimize
  41. //runtime overhead.
  42. //This could be replaced by a lookup table of sizeof(Div_type)*8 but this
  43. //function is more general.
  44. template<unsigned log_mean_bin_size,
  45. unsigned log_min_split_count, unsigned log_finishing_count>
  46. inline size_t
  47. get_min_count(unsigned log_range)
  48. {
  49. const size_t typed_one = 1;
  50. const unsigned min_size = log_mean_bin_size + log_min_split_count;
  51. //Assuring that constants have valid settings
  52. BOOST_STATIC_ASSERT(log_min_split_count <= max_splits &&
  53. log_min_split_count > 0);
  54. BOOST_STATIC_ASSERT(max_splits > 1 &&
  55. max_splits < (8 * sizeof(unsigned)));
  56. BOOST_STATIC_ASSERT(max_finishing_splits >= max_splits &&
  57. max_finishing_splits < (8 * sizeof(unsigned)));
  58. BOOST_STATIC_ASSERT(log_mean_bin_size >= 0);
  59. BOOST_STATIC_ASSERT(log_finishing_count >= 0);
  60. //if we can complete in one iteration, do so
  61. //This first check allows the compiler to optimize never-executed code out
  62. if (log_finishing_count < min_size) {
  63. if (log_range <= min_size && log_range <= max_splits) {
  64. //Return no smaller than a certain minimum limit
  65. if (log_range <= log_finishing_count)
  66. return typed_one << log_finishing_count;
  67. return typed_one << log_range;
  68. }
  69. }
  70. const unsigned base_iterations = max_splits - log_min_split_count;
  71. //sum of n to n + x = ((x + 1) * (n + (n + x)))/2 + log_mean_bin_size
  72. const unsigned base_range =
  73. ((base_iterations + 1) * (max_splits + log_min_split_count))/2
  74. + log_mean_bin_size;
  75. //Calculating the required number of iterations, and returning
  76. //1 << (iteration_count + min_size)
  77. if (log_range < base_range) {
  78. unsigned result = log_min_split_count;
  79. for (unsigned offset = min_size; offset < log_range;
  80. offset += ++result);
  81. //Preventing overflow; this situation shouldn't occur
  82. if ((result + log_mean_bin_size) >= (8 * sizeof(size_t)))
  83. return typed_one << ((8 * sizeof(size_t)) - 1);
  84. return typed_one << (result + log_mean_bin_size);
  85. }
  86. //A quick division can calculate the worst-case runtime for larger ranges
  87. unsigned remainder = log_range - base_range;
  88. //the max_splits - 1 is used to calculate the ceiling of the division
  89. unsigned bit_length = ((((max_splits - 1) + remainder)/max_splits)
  90. + base_iterations + min_size);
  91. //Preventing overflow; this situation shouldn't occur
  92. if (bit_length >= (8 * sizeof(size_t)))
  93. return typed_one << ((8 * sizeof(size_t)) - 1);
  94. //n(log_range)/max_splits + C, optimizing worst-case performance
  95. return typed_one << bit_length;
  96. }
  97. // Resizes the bin cache and bin sizes, and initializes each bin size to 0.
  98. // This generates the memory overhead to use in radix sorting.
  99. template <class RandomAccessIter>
  100. inline RandomAccessIter *
  101. size_bins(size_t *bin_sizes, std::vector<RandomAccessIter>
  102. &bin_cache, unsigned cache_offset, unsigned &cache_end, unsigned bin_count)
  103. {
  104. // Clear the bin sizes
  105. for (size_t u = 0; u < bin_count; u++)
  106. bin_sizes[u] = 0;
  107. //Make sure there is space for the bins
  108. cache_end = cache_offset + bin_count;
  109. if (cache_end > bin_cache.size())
  110. bin_cache.resize(cache_end);
  111. return &(bin_cache[cache_offset]);
  112. }
  113. }
  114. }
  115. }
  116. }
  117. #endif