integer_sort.hpp 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315
  1. //Templated Spreadsort-based implementation of integer_sort
  2. // Copyright Steven J. Ross 2001 - 2014.
  3. // Distributed under the Boost Software License, Version 1.0.
  4. // (See accompanying file LICENSE_1_0.txt or copy at
  5. // http://www.boost.org/LICENSE_1_0.txt)
  6. // See http://www.boost.org/libs/sort/ for library home page.
  7. /*
  8. Some improvements suggested by:
  9. Phil Endecott and Frank Gennari
  10. Doxygen comments by Paul A. Bristow Jan 2015
  11. */
  12. #ifndef BOOST_INTEGER_SORT_HPP
  13. #define BOOST_INTEGER_SORT_HPP
  14. #include <algorithm>
  15. #include <vector>
  16. #include <cstring>
  17. #include <limits>
  18. #include <boost/static_assert.hpp>
  19. #include <boost/sort/spreadsort/detail/constants.hpp>
  20. #include <boost/sort/spreadsort/detail/integer_sort.hpp>
  21. #include <boost/range/begin.hpp>
  22. #include <boost/range/end.hpp>
  23. namespace boost {
  24. namespace sort {
  25. namespace spreadsort {
  26. //Top-level sorting call for integers.
  27. /*! \brief Integer sort algorithm using random access iterators.
  28. (All variants fall back to @c boost::sort::pdqsort if the data size is too small, < @c detail::min_sort_size).
  29. \details @c integer_sort is a fast templated in-place hybrid radix/comparison algorithm,
  30. which in testing tends to be roughly 50% to 2X faster than @c std::sort for large tests (>=100kB).\n
  31. Worst-case performance is <em> O(N * (lg(range)/s + s)) </em>,
  32. so @c integer_sort is asymptotically faster
  33. than pure comparison-based algorithms. @c s is @c max_splits, which defaults to 11,
  34. so its worst-case with default settings for 32-bit integers is
  35. <em> O(N * ((32/11) </em> slow radix-based iterations fast comparison-based iterations).\n\n
  36. Some performance plots of runtime vs. n and log(range) are provided:\n
  37. <a href="../../doc/graph/windows_integer_sort.htm"> windows_integer_sort</a>
  38. \n
  39. <a href="../../doc/graph/osx_integer_sort.htm"> osx_integer_sort</a>
  40. \param[in] first Iterator pointer to first element.
  41. \param[in] last Iterator pointing to one beyond the end of data.
  42. \pre [@c first, @c last) is a valid range.
  43. \pre @c RandomAccessIter @c value_type is mutable.
  44. \pre @c RandomAccessIter @c value_type is <a href="http://en.cppreference.com/w/cpp/concept/LessThanComparable">LessThanComparable</a>
  45. \pre @c RandomAccessIter @c value_type supports the @c operator>>,
  46. which returns an integer-type right-shifted a specified number of bits.
  47. \post The elements in the range [@c first, @c last) are sorted in ascending order.
  48. \throws std::exception Propagates exceptions if any of the element comparisons, the element swaps (or moves),
  49. the right shift, subtraction of right-shifted elements, functors, or any operations on iterators throw.
  50. \warning Throwing an exception may cause data loss. This will also throw if a small vector resize throws, in which case there will be no data loss.
  51. \warning Invalid arguments cause undefined behaviour.
  52. \note @c spreadsort function provides a wrapper that calls the fastest sorting algorithm available for a data type,
  53. enabling faster generic-programming.
  54. \remark The lesser of <em> O(N*log(N)) </em> comparisons and <em> O(N*log(K/S + S)) </em>operations worst-case, where:
  55. \remark * N is @c last - @c first,
  56. \remark * K is the log of the range in bits (32 for 32-bit integers using their full range),
  57. \remark * S is a constant called max_splits, defaulting to 11 (except for strings where it is the log of the character size).
  58. */
  59. template <class RandomAccessIter>
  60. inline void integer_sort(RandomAccessIter first, RandomAccessIter last)
  61. {
  62. // Don't sort if it's too small to optimize.
  63. if (last - first < detail::min_sort_size)
  64. boost::sort::pdqsort(first, last);
  65. else
  66. detail::integer_sort(first, last, *first >> 0);
  67. }
  68. /*! \brief Integer sort algorithm using range.
  69. (All variants fall back to @c boost::sort::pdqsort if the data size is too small, < @c detail::min_sort_size).
  70. \details @c integer_sort is a fast templated in-place hybrid radix/comparison algorithm,
  71. which in testing tends to be roughly 50% to 2X faster than @c std::sort for large tests (>=100kB).\n
  72. Worst-case performance is <em> O(N * (lg(range)/s + s)) </em>,
  73. so @c integer_sort is asymptotically faster
  74. than pure comparison-based algorithms. @c s is @c max_splits, which defaults to 11,
  75. so its worst-case with default settings for 32-bit integers is
  76. <em> O(N * ((32/11) </em> slow radix-based iterations fast comparison-based iterations).\n\n
  77. Some performance plots of runtime vs. n and log(range) are provided:\n
  78. <a href="../../doc/graph/windows_integer_sort.htm"> windows_integer_sort</a>
  79. \n
  80. <a href="../../doc/graph/osx_integer_sort.htm"> osx_integer_sort</a>
  81. \param[in] range Range [first, last) for sorting.
  82. \pre [@c first, @c last) is a valid range.
  83. \post The elements in the range [@c first, @c last) are sorted in ascending order.
  84. \throws std::exception Propagates exceptions if any of the element comparisons, the element swaps (or moves),
  85. the right shift, subtraction of right-shifted elements, functors, or any operations on iterators throw.
  86. \warning Throwing an exception may cause data loss. This will also throw if a small vector resize throws, in which case there will be no data loss.
  87. \warning Invalid arguments cause undefined behaviour.
  88. \note @c spreadsort function provides a wrapper that calls the fastest sorting algorithm available for a data type,
  89. enabling faster generic-programming.
  90. \remark The lesser of <em> O(N*log(N)) </em> comparisons and <em> O(N*log(K/S + S)) </em>operations worst-case, where:
  91. \remark * N is @c last - @c first,
  92. \remark * K is the log of the range in bits (32 for 32-bit integers using their full range),
  93. \remark * S is a constant called max_splits, defaulting to 11 (except for strings where it is the log of the character size).
  94. */
  95. template <class Range>
  96. inline void integer_sort(Range& range)
  97. {
  98. integer_sort(boost::begin(range), boost::end(range));
  99. }
  100. /*! \brief Integer sort algorithm using random access iterators with both right-shift and user-defined comparison operator.
  101. (All variants fall back to @c boost::sort::pdqsort if the data size is too small, < @c detail::min_sort_size).
  102. \details @c integer_sort is a fast templated in-place hybrid radix/comparison algorithm,
  103. which in testing tends to be roughly 50% to 2X faster than @c std::sort for large tests (>=100kB).\n
  104. Worst-case performance is <em> O(N * (lg(range)/s + s)) </em>,
  105. so @c integer_sort is asymptotically faster
  106. than pure comparison-based algorithms. @c s is @c max_splits, which defaults to 11,
  107. so its worst-case with default settings for 32-bit integers is
  108. <em> O(N * ((32/11) </em> slow radix-based iterations fast comparison-based iterations).\n\n
  109. Some performance plots of runtime vs. n and log(range) are provided:\n
  110. <a href="../../doc/graph/windows_integer_sort.htm"> windows_integer_sort</a>
  111. \n
  112. <a href="../../doc/graph/osx_integer_sort.htm"> osx_integer_sort</a>
  113. \param[in] first Iterator pointer to first element.
  114. \param[in] last Iterator pointing to one beyond the end of data.
  115. \param[in] shift Functor that returns the result of shifting the value_type right a specified number of bits.
  116. \param[in] comp A binary functor that returns whether the first element passed to it should go before the second in order.
  117. \pre [@c first, @c last) is a valid range.
  118. \pre @c RandomAccessIter @c value_type is mutable.
  119. \post The elements in the range [@c first, @c last) are sorted in ascending order.
  120. \return @c void.
  121. \throws std::exception Propagates exceptions if any of the element comparisons, the element swaps (or moves),
  122. the right shift, subtraction of right-shifted elements, functors,
  123. or any operations on iterators throw.
  124. \warning Throwing an exception may cause data loss. This will also throw if a small vector resize throws, in which case there will be no data loss.
  125. \warning Invalid arguments cause undefined behaviour.
  126. \note @c spreadsort function provides a wrapper that calls the fastest sorting algorithm available for a data type,
  127. enabling faster generic-programming.
  128. \remark The lesser of <em> O(N*log(N)) </em> comparisons and <em> O(N*log(K/S + S)) </em>operations worst-case, where:
  129. \remark * N is @c last - @c first,
  130. \remark * K is the log of the range in bits (32 for 32-bit integers using their full range),
  131. \remark * S is a constant called max_splits, defaulting to 11 (except for strings where it is the log of the character size).
  132. */
  133. template <class RandomAccessIter, class Right_shift, class Compare>
  134. inline void integer_sort(RandomAccessIter first, RandomAccessIter last,
  135. Right_shift shift, Compare comp) {
  136. if (last - first < detail::min_sort_size)
  137. boost::sort::pdqsort(first, last, comp);
  138. else
  139. detail::integer_sort(first, last, shift(*first, 0), shift, comp);
  140. }
  141. /*! \brief Integer sort algorithm using range with both right-shift and user-defined comparison operator.
  142. (All variants fall back to @c boost::sort::pdqsort if the data size is too small, < @c detail::min_sort_size).
  143. \details @c integer_sort is a fast templated in-place hybrid radix/comparison algorithm,
  144. which in testing tends to be roughly 50% to 2X faster than @c std::sort for large tests (>=100kB).\n
  145. Worst-case performance is <em> O(N * (lg(range)/s + s)) </em>,
  146. so @c integer_sort is asymptotically faster
  147. than pure comparison-based algorithms. @c s is @c max_splits, which defaults to 11,
  148. so its worst-case with default settings for 32-bit integers is
  149. <em> O(N * ((32/11) </em> slow radix-based iterations fast comparison-based iterations).\n\n
  150. Some performance plots of runtime vs. n and log(range) are provided:\n
  151. <a href="../../doc/graph/windows_integer_sort.htm"> windows_integer_sort</a>
  152. \n
  153. <a href="../../doc/graph/osx_integer_sort.htm"> osx_integer_sort</a>
  154. \param[in] range Range [first, last) for sorting.
  155. \param[in] shift Functor that returns the result of shifting the value_type right a specified number of bits.
  156. \param[in] comp A binary functor that returns whether the first element passed to it should go before the second in order.
  157. \pre [@c first, @c last) is a valid range.
  158. \post The elements in the range [@c first, @c last) are sorted in ascending order.
  159. \return @c void.
  160. \throws std::exception Propagates exceptions if any of the element comparisons, the element swaps (or moves),
  161. the right shift, subtraction of right-shifted elements, functors,
  162. or any operations on iterators throw.
  163. \warning Throwing an exception may cause data loss. This will also throw if a small vector resize throws, in which case there will be no data loss.
  164. \warning Invalid arguments cause undefined behaviour.
  165. \note @c spreadsort function provides a wrapper that calls the fastest sorting algorithm available for a data type,
  166. enabling faster generic-programming.
  167. \remark The lesser of <em> O(N*log(N)) </em> comparisons and <em> O(N*log(K/S + S)) </em>operations worst-case, where:
  168. \remark * N is @c last - @c first,
  169. \remark * K is the log of the range in bits (32 for 32-bit integers using their full range),
  170. \remark * S is a constant called max_splits, defaulting to 11 (except for strings where it is the log of the character size).
  171. */
  172. template <class Range, class Right_shift, class Compare>
  173. inline void integer_sort(Range& range, Right_shift shift, Compare comp)
  174. {
  175. integer_sort(boost::begin(range), boost::end(range), shift, comp);
  176. }
  177. /*! \brief Integer sort algorithm using random access iterators with just right-shift functor.
  178. (All variants fall back to @c boost::sort::pdqsort if the data size is too small, < @c detail::min_sort_size).
  179. \details @c integer_sort is a fast templated in-place hybrid radix/comparison algorithm,
  180. which in testing tends to be roughly 50% to 2X faster than @c std::sort for large tests (>=100kB).\n
  181. \par Performance:
  182. Worst-case performance is <em> O(N * (lg(range)/s + s)) </em>,
  183. so @c integer_sort is asymptotically faster
  184. than pure comparison-based algorithms. @c s is @c max_splits, which defaults to 11,
  185. so its worst-case with default settings for 32-bit integers is
  186. <em> O(N * ((32/11) </em> slow radix-based iterations fast comparison-based iterations).\n\n
  187. Some performance plots of runtime vs. n and log(range) are provided:\n
  188. * <a href="../../doc/graph/windows_integer_sort.htm"> windows_integer_sort</a>\n
  189. * <a href="../../doc/graph/osx_integer_sort.htm"> osx_integer_sort</a>
  190. \param[in] first Iterator pointer to first element.
  191. \param[in] last Iterator pointing to one beyond the end of data.
  192. \param[in] shift A functor that returns the result of shifting the value_type right a specified number of bits.
  193. \pre [@c first, @c last) is a valid range.
  194. \pre @c RandomAccessIter @c value_type is mutable.
  195. \pre @c RandomAccessIter @c value_type is <a href="http://en.cppreference.com/w/cpp/concept/LessThanComparable">LessThanComparable</a>
  196. \post The elements in the range [@c first, @c last) are sorted in ascending order.
  197. \throws std::exception Propagates exceptions if any of the element comparisons, the element swaps (or moves),
  198. the right shift, subtraction of right-shifted elements, functors,
  199. or any operations on iterators throw.
  200. \warning Throwing an exception may cause data loss. This will also throw if a small vector resize throws, in which case there will be no data loss.
  201. \warning Invalid arguments cause undefined behaviour.
  202. \note @c spreadsort function provides a wrapper that calls the fastest sorting algorithm available for a data type,
  203. enabling faster generic-programming.
  204. \remark The lesser of <em> O(N*log(N)) </em> comparisons and <em> O(N*log(K/S + S)) </em>operations worst-case, where:
  205. \remark * N is @c last - @c first,
  206. \remark * K is the log of the range in bits (32 for 32-bit integers using their full range),
  207. \remark * S is a constant called max_splits, defaulting to 11 (except for strings where it is the log of the character size).
  208. */
  209. template <class RandomAccessIter, class Right_shift>
  210. inline void integer_sort(RandomAccessIter first, RandomAccessIter last,
  211. Right_shift shift) {
  212. if (last - first < detail::min_sort_size)
  213. boost::sort::pdqsort(first, last);
  214. else
  215. detail::integer_sort(first, last, shift(*first, 0), shift);
  216. }
  217. /*! \brief Integer sort algorithm using range with just right-shift functor.
  218. (All variants fall back to @c boost::sort::pdqsort if the data size is too small, < @c detail::min_sort_size).
  219. \details @c integer_sort is a fast templated in-place hybrid radix/comparison algorithm,
  220. which in testing tends to be roughly 50% to 2X faster than @c std::sort for large tests (>=100kB).\n
  221. \par Performance:
  222. Worst-case performance is <em> O(N * (lg(range)/s + s)) </em>,
  223. so @c integer_sort is asymptotically faster
  224. than pure comparison-based algorithms. @c s is @c max_splits, which defaults to 11,
  225. so its worst-case with default settings for 32-bit integers is
  226. <em> O(N * ((32/11) </em> slow radix-based iterations fast comparison-based iterations).\n\n
  227. Some performance plots of runtime vs. n and log(range) are provided:\n
  228. * <a href="../../doc/graph/windows_integer_sort.htm"> windows_integer_sort</a>\n
  229. * <a href="../../doc/graph/osx_integer_sort.htm"> osx_integer_sort</a>
  230. \param[in] range Range [first, last) for sorting.
  231. \param[in] shift A functor that returns the result of shifting the value_type right a specified number of bits.
  232. \pre [@c first, @c last) is a valid range.
  233. \post The elements in the range [@c first, @c last) are sorted in ascending order.
  234. \throws std::exception Propagates exceptions if any of the element comparisons, the element swaps (or moves),
  235. the right shift, subtraction of right-shifted elements, functors,
  236. or any operations on iterators throw.
  237. \warning Throwing an exception may cause data loss. This will also throw if a small vector resize throws, in which case there will be no data loss.
  238. \warning Invalid arguments cause undefined behaviour.
  239. \note @c spreadsort function provides a wrapper that calls the fastest sorting algorithm available for a data type,
  240. enabling faster generic-programming.
  241. \remark The lesser of <em> O(N*log(N)) </em> comparisons and <em> O(N*log(K/S + S)) </em>operations worst-case, where:
  242. \remark * N is @c last - @c first,
  243. \remark * K is the log of the range in bits (32 for 32-bit integers using their full range),
  244. \remark * S is a constant called max_splits, defaulting to 11 (except for strings where it is the log of the character size).
  245. */
  246. template <class Range, class Right_shift>
  247. inline void integer_sort(Range& range, Right_shift shift)
  248. {
  249. integer_sort(boost::begin(range), boost::end(range), shift);
  250. }
  251. }
  252. }
  253. }
  254. #endif