string_sort.hpp 38 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741
  1. //Templated hybrid string_sort
  2. // Copyright Steven J. Ross 2001 - 2009.
  3. // Distributed under the Boost Software License, Version 1.0.
  4. // (See accompanying file LICENSE_1_0.txt or copy at
  5. // http://www.boost.org/LICENSE_1_0.txt)
  6. // See http://www.boost.org/libs/sort/ for library home page.
  7. /*
  8. Some improvements suggested by:
  9. Phil Endecott and Frank Gennari
  10. */
  11. #ifndef BOOST_STRING_SORT_HPP
  12. #define BOOST_STRING_SORT_HPP
  13. #include <algorithm>
  14. #include <vector>
  15. #include <cstring>
  16. #include <limits>
  17. #include <boost/static_assert.hpp>
  18. #include <boost/sort/spreadsort/detail/constants.hpp>
  19. #include <boost/sort/spreadsort/detail/string_sort.hpp>
  20. #include <boost/range/begin.hpp>
  21. #include <boost/range/end.hpp>
  22. namespace boost {
  23. namespace sort {
  24. namespace spreadsort {
  25. /*! \brief String sort algorithm using random access iterators, allowing character-type overloads.\n
  26. (All variants fall back to @c boost::sort::pdqsort if the data size is too small, < @c detail::min_sort_size).
  27. \details @c string_sort is a fast templated in-place hybrid radix/comparison algorithm,
  28. which in testing tends to be roughly 50% to 2X faster than @c std::sort for large tests (>=100kB).\n
  29. \par
  30. Worst-case performance is <em> O(N * (lg(range)/s + s)) </em>,
  31. so @c string_sort is asymptotically faster
  32. than pure comparison-based algorithms. \n\n
  33. Some performance plots of runtime vs. n and log(range) are provided:\n
  34. <a href="../../doc/graph/windows_string_sort.htm"> windows_string_sort</a>\n
  35. <a href="../../doc/graph/osx_string_sort.htm"> osx_string_sort</a>
  36. \tparam RandomAccessIter <a href="http://www.cplusplus.com/reference/iterator/RandomAccessIterator/">Random access iterator</a>
  37. \tparam Unsigned_char_type Unsigned character type used for string.
  38. \param[in] first Iterator pointer to first element.
  39. \param[in] last Iterator pointing to one beyond the end of data.
  40. \param[in] unused value with the same type as the result of the [] operator, defining the Unsigned_char_type. The actual value is unused.
  41. \pre [@c first, @c last) is a valid range.
  42. \pre @c RandomAccessIter @c value_type is mutable.
  43. \pre @c RandomAccessIter @c value_type is <a href="http://en.cppreference.com/w/cpp/concept/LessThanComparable">LessThanComparable</a>
  44. \pre @c RandomAccessIter @c value_type supports the @c operator>>,
  45. which returns an integer-type right-shifted a specified number of bits.
  46. \post The elements in the range [@c first, @c last) are sorted in ascending order.
  47. \throws std::exception Propagates exceptions if any of the element comparisons, the element swaps (or moves),
  48. the right shift, subtraction of right-shifted elements, functors,
  49. or any operations on iterators throw.
  50. \warning Throwing an exception may cause data loss. This will also throw if a small vector resize throws, in which case there will be no data loss.
  51. \warning Invalid arguments cause undefined behaviour.
  52. \note @c spreadsort function provides a wrapper that calls the fastest sorting algorithm available for a data type,
  53. enabling faster generic-programming.
  54. \remark The lesser of <em> O(N*log(N)) </em> comparisons and <em> O(N*log(K/S + S)) </em>operations worst-case, where:
  55. \remark * N is @c last - @c first,
  56. \remark * K is the log of the range in bits (32 for 32-bit integers using their full range),
  57. \remark * S is a constant called max_splits, defaulting to 11 (except for strings where it is the log of the character size).
  58. */
  59. template <class RandomAccessIter, class Unsigned_char_type>
  60. inline void string_sort(RandomAccessIter first, RandomAccessIter last,
  61. Unsigned_char_type unused)
  62. {
  63. //Don't sort if it's too small to optimize
  64. if (last - first < detail::min_sort_size)
  65. boost::sort::pdqsort(first, last);
  66. else
  67. detail::string_sort(first, last, unused);
  68. }
  69. /*! \brief String sort algorithm using range, allowing character-type overloads.\n
  70. (All variants fall back to @c boost::sort::pdqsort if the data size is too small, < @c detail::min_sort_size).
  71. \details @c string_sort is a fast templated in-place hybrid radix/comparison algorithm,
  72. which in testing tends to be roughly 50% to 2X faster than @c std::sort for large tests (>=100kB).\n
  73. \par
  74. Worst-case performance is <em> O(N * (lg(range)/s + s)) </em>,
  75. so @c string_sort is asymptotically faster
  76. than pure comparison-based algorithms. \n\n
  77. Some performance plots of runtime vs. n and log(range) are provided:\n
  78. <a href="../../doc/graph/windows_string_sort.htm"> windows_string_sort</a>\n
  79. <a href="../../doc/graph/osx_string_sort.htm"> osx_string_sort</a>
  80. \tparam Unsigned_char_type Unsigned character type used for string.
  81. \param[in] range Range [first, last) for sorting.
  82. \param[in] unused value with the same type as the result of the [] operator, defining the Unsigned_char_type. The actual value is unused.
  83. \pre [@c first, @c last) is a valid range.
  84. \post The elements in the range [@c first, @c last) are sorted in ascending order.
  85. \throws std::exception Propagates exceptions if any of the element comparisons, the element swaps (or moves),
  86. the right shift, subtraction of right-shifted elements, functors,
  87. or any operations on iterators throw.
  88. \warning Throwing an exception may cause data loss. This will also throw if a small vector resize throws, in which case there will be no data loss.
  89. \warning Invalid arguments cause undefined behaviour.
  90. \note @c spreadsort function provides a wrapper that calls the fastest sorting algorithm available for a data type,
  91. enabling faster generic-programming.
  92. \remark The lesser of <em> O(N*log(N)) </em> comparisons and <em> O(N*log(K/S + S)) </em>operations worst-case, where:
  93. \remark * N is @c last - @c first,
  94. \remark * K is the log of the range in bits (32 for 32-bit integers using their full range),
  95. \remark * S is a constant called max_splits, defaulting to 11 (except for strings where it is the log of the character size).
  96. */
  97. template <class Range, class Unsigned_char_type>
  98. inline void string_sort(Range& range, Unsigned_char_type unused)
  99. {
  100. string_sort(boost::begin(range), boost::end(range), unused);
  101. }
  102. /*! \brief String sort algorithm using random access iterators, wraps using default of unsigned char.
  103. (All variants fall back to @c boost::sort::pdqsort if the data size is too small, < @c detail::min_sort_size).
  104. \details @c string_sort is a fast templated in-place hybrid radix/comparison algorithm,
  105. which in testing tends to be roughly 50% to 2X faster than @c std::sort for large tests (>=100kB).\n
  106. Worst-case performance is <em> O(N * (lg(range)/s + s)) </em>,
  107. so @c string_sort is asymptotically faster
  108. than pure comparison-based algorithms. \n\n
  109. Some performance plots of runtime vs. n and log(range) are provided:\n
  110. <a href="../../doc/graph/windows_string_sort.htm"> windows_string_sort</a>
  111. \n
  112. <a href="../../doc/graph/osx_string_sort.htm"> osx_string_sort</a>
  113. \param[in] first Iterator pointer to first element.
  114. \param[in] last Iterator pointing to one beyond the end of data.
  115. \pre [@c first, @c last) is a valid range.
  116. \pre @c RandomAccessIter @c value_type is mutable.
  117. \pre @c RandomAccessIter @c value_type is <a href="http://en.cppreference.com/w/cpp/concept/LessThanComparable">LessThanComparable</a>
  118. \pre @c RandomAccessIter @c value_type supports the @c operator>>,
  119. which returns an integer-type right-shifted a specified number of bits.
  120. \post The elements in the range [@c first, @c last) are sorted in ascending order.
  121. \throws std::exception Propagates exceptions if any of the element comparisons, the element swaps (or moves),
  122. the right shift, subtraction of right-shifted elements, functors,
  123. or any operations on iterators throw.
  124. \warning Throwing an exception may cause data loss. This will also throw if a small vector resize throws, in which case there will be no data loss.
  125. \warning Invalid arguments cause undefined behaviour.
  126. \note @c spreadsort function provides a wrapper that calls the fastest sorting algorithm available for a data type,
  127. enabling faster generic-programming.
  128. \remark The lesser of <em> O(N*log(N)) </em> comparisons and <em> O(N*log(K/S + S)) </em>operations worst-case, where:
  129. \remark * N is @c last - @c first,
  130. \remark * K is the log of the range in bits (32 for 32-bit integers using their full range),
  131. \remark * S is a constant called max_splits, defaulting to 11 (except for strings where it is the log of the character size).
  132. */
  133. template <class RandomAccessIter>
  134. inline void string_sort(RandomAccessIter first, RandomAccessIter last)
  135. {
  136. unsigned char unused = '\0';
  137. string_sort(first, last, unused);
  138. }
  139. /*! \brief String sort algorithm using range, wraps using default of unsigned char.
  140. (All variants fall back to @c boost::sort::pdqsort if the data size is too small, < @c detail::min_sort_size).
  141. \details @c string_sort is a fast templated in-place hybrid radix/comparison algorithm,
  142. which in testing tends to be roughly 50% to 2X faster than @c std::sort for large tests (>=100kB).\n
  143. Worst-case performance is <em> O(N * (lg(range)/s + s)) </em>,
  144. so @c string_sort is asymptotically faster
  145. than pure comparison-based algorithms. \n\n
  146. Some performance plots of runtime vs. n and log(range) are provided:\n
  147. <a href="../../doc/graph/windows_string_sort.htm"> windows_string_sort</a>
  148. \n
  149. <a href="../../doc/graph/osx_string_sort.htm"> osx_string_sort</a>
  150. \param[in] range Range [first, last) for sorting.
  151. \pre [@c first, @c last) is a valid range.
  152. \post The elements in the range [@c first, @c last) are sorted in ascending order.
  153. \throws std::exception Propagates exceptions if any of the element comparisons, the element swaps (or moves),
  154. the right shift, subtraction of right-shifted elements, functors,
  155. or any operations on iterators throw.
  156. \warning Throwing an exception may cause data loss. This will also throw if a small vector resize throws, in which case there will be no data loss.
  157. \warning Invalid arguments cause undefined behaviour.
  158. \note @c spreadsort function provides a wrapper that calls the fastest sorting algorithm available for a data type,
  159. enabling faster generic-programming.
  160. \remark The lesser of <em> O(N*log(N)) </em> comparisons and <em> O(N*log(K/S + S)) </em>operations worst-case, where:
  161. \remark * N is @c last - @c first,
  162. \remark * K is the log of the range in bits (32 for 32-bit integers using their full range),
  163. \remark * S is a constant called max_splits, defaulting to 11 (except for strings where it is the log of the character size).
  164. */
  165. template <class Range>
  166. inline void string_sort(Range& range)
  167. {
  168. string_sort(boost::begin(range), boost::end(range));
  169. }
  170. /*! \brief String sort algorithm using random access iterators, allowing character-type overloads.
  171. (All variants fall back to @c boost::sort::pdqsort if the data size is too small, < detail::min_sort_size).
  172. \details @c string_sort is a fast templated in-place hybrid radix/comparison algorithm,
  173. which in testing tends to be roughly 50% to 2X faster than @c std::sort for large tests (>=100kB).\n
  174. \par
  175. Worst-case performance is <em> O(N * (lg(range)/s + s)) </em>,
  176. so @c string_sort is asymptotically faster
  177. than pure comparison-based algorithms. \n\n
  178. Some performance plots of runtime vs. n and log(range) are provided:\n
  179. <a href="../../doc/graph/windows_string_sort.htm"> windows_string_sort</a>\n
  180. <a href="../../doc/graph/osx_string_sort.htm"> osx_string_sort</a>
  181. \tparam RandomAccessIter <a href="http://www.cplusplus.com/reference/iterator/RandomAccessIterator/">Random access iterator</a>
  182. \tparam Comp Functor type to use for comparison.
  183. \tparam Unsigned_char_type Unsigned character type used for string.
  184. \param[in] first Iterator pointer to first element.
  185. \param[in] last Iterator pointing to one beyond the end of data.
  186. \param[in] comp A binary functor that returns whether the first element passed to it should go before the second in order.
  187. \param[in] unused value with the same type as the result of the [] operator, defining the Unsigned_char_type. The actual value is unused.
  188. \pre [@c first, @c last) is a valid range.
  189. \pre @c RandomAccessIter @c value_type is mutable.
  190. \pre @c RandomAccessIter @c value_type is <a href="http://en.cppreference.com/w/cpp/concept/LessThanComparable">LessThanComparable</a>
  191. \pre @c RandomAccessIter @c value_type supports the @c operator>>,
  192. which returns an integer-type right-shifted a specified number of bits.
  193. \post The elements in the range [@c first, @c last) are sorted in ascending order.
  194. \return @c void.
  195. \throws std::exception Propagates exceptions if any of the element comparisons, the element swaps (or moves),
  196. the right shift, subtraction of right-shifted elements, functors,
  197. or any operations on iterators throw.
  198. \warning Throwing an exception may cause data loss. This will also throw if a small vector resize throws, in which case there will be no data loss.
  199. \warning Invalid arguments cause undefined behaviour.
  200. \note @c spreadsort function provides a wrapper that calls the fastest sorting algorithm available for a data type,
  201. enabling faster generic-programming.
  202. \remark The lesser of <em> O(N*log(N)) </em> comparisons and <em> O(N*log(K/S + S)) </em>operations worst-case, where:
  203. \remark * N is @c last - @c first,
  204. \remark * K is the log of the range in bits (32 for 32-bit integers using their full range),
  205. \remark * S is a constant called max_splits, defaulting to 11 (except for strings where it is the log of the character size).
  206. */
  207. template <class RandomAccessIter, class Compare, class Unsigned_char_type>
  208. inline void reverse_string_sort(RandomAccessIter first,
  209. RandomAccessIter last, Compare comp, Unsigned_char_type unused)
  210. {
  211. //Don't sort if it's too small to optimize.
  212. if (last - first < detail::min_sort_size)
  213. boost::sort::pdqsort(first, last, comp);
  214. else
  215. detail::reverse_string_sort(first, last, unused);
  216. }
  217. /*! \brief String sort algorithm using range, allowing character-type overloads.
  218. (All variants fall back to @c boost::sort::pdqsort if the data size is too small, < detail::min_sort_size).
  219. \details @c string_sort is a fast templated in-place hybrid radix/comparison algorithm,
  220. which in testing tends to be roughly 50% to 2X faster than @c std::sort for large tests (>=100kB).\n
  221. Worst-case performance is <em> O(N * (lg(range)/s + s)) </em>,
  222. so @c string_sort is asymptotically faster
  223. than pure comparison-based algorithms. \n\n
  224. Some performance plots of runtime vs. n and log(range) are provided:\n
  225. <a href="../../doc/graph/windows_integer_sort.htm"> windows_integer_sort</a>
  226. \n
  227. <a href="../../doc/graph/osx_integer_sort.htm"> osx_integer_sort</a>
  228. \tparam Comp Functor type to use for comparison.
  229. \tparam Unsigned_char_type Unsigned character type used for string.
  230. \param[in] range Range [first, last) for sorting.
  231. \param[in] comp A binary functor that returns whether the first element passed to it should go before the second in order.
  232. \param[in] unused value with the same type as the result of the [] operator, defining the Unsigned_char_type. The actual value is unused.
  233. \pre [@c first, @c last) is a valid range.
  234. \post The elements in the range [@c first, @c last) are sorted in ascending order.
  235. \return @c void.
  236. \throws std::exception Propagates exceptions if any of the element comparisons, the element swaps (or moves),
  237. the right shift, subtraction of right-shifted elements, functors,
  238. or any operations on iterators throw.
  239. \warning Throwing an exception may cause data loss. This will also throw if a small vector resize throws, in which case there will be no data loss.
  240. \warning Invalid arguments cause undefined behaviour.
  241. \note @c spreadsort function provides a wrapper that calls the fastest sorting algorithm available for a data type,
  242. enabling faster generic-programming.
  243. \remark The lesser of <em> O(N*log(N)) </em> comparisons and <em> O(N*log(K/S + S)) </em>operations worst-case, where:
  244. \remark * N is @c last - @c first,
  245. \remark * K is the log of the range in bits (32 for 32-bit integers using their full range),
  246. \remark * S is a constant called max_splits, defaulting to 11 (except for strings where it is the log of the character size).
  247. */
  248. template <class Range, class Compare, class Unsigned_char_type>
  249. inline void reverse_string_sort(Range& range, Compare comp, Unsigned_char_type unused)
  250. {
  251. reverse_string_sort(boost::begin(range), boost::end(range), comp, unused);
  252. }
  253. /*! \brief String sort algorithm using random access iterators, wraps using default of @c unsigned char.
  254. (All variants fall back to @c boost::sort::pdqsort if the data size is too small, < @c detail::min_sort_size).
  255. \details @c string_sort is a fast templated in-place hybrid radix/comparison algorithm,
  256. which in testing tends to be roughly 50% to 2X faster than @c std::sort for large tests (>=100kB).\n
  257. \par
  258. Worst-case performance is <em> O(N * (lg(range)/s + s)) </em>,
  259. so @c string_sort is asymptotically faster
  260. than pure comparison-based algorithms.\n\n
  261. Some performance plots of runtime vs. n and log(range) are provided:\n
  262. <a href="../../doc/graph/windows_string_sort.htm"> windows_string_sort</a>\n
  263. <a href="../../doc/graph/osx_string_sort.htm"> osx_string_sort</a>
  264. \param[in] first Iterator pointer to first element.
  265. \param[in] last Iterator pointing to one beyond the end of data.
  266. \param[in] comp A binary functor that returns whether the first element passed to it should go before the second in order.
  267. \pre [@c first, @c last) is a valid range.
  268. \pre @c RandomAccessIter @c value_type is mutable.
  269. \pre @c RandomAccessIter @c value_type is <a href="http://en.cppreference.com/w/cpp/concept/LessThanComparable">LessThanComparable</a>
  270. \pre @c RandomAccessIter @c value_type supports the @c operator>>,
  271. which returns an integer-type right-shifted a specified number of bits.
  272. \post The elements in the range [@c first, @c last) are sorted in ascending order.
  273. \return @c void.
  274. \throws std::exception Propagates exceptions if any of the element comparisons, the element swaps (or moves),
  275. the right shift, subtraction of right-shifted elements, functors,
  276. or any operations on iterators throw.
  277. \warning Throwing an exception may cause data loss. This will also throw if a small vector resize throws, in which case there will be no data loss.
  278. \warning Invalid arguments cause undefined behaviour.
  279. \note @c spreadsort function provides a wrapper that calls the fastest sorting algorithm available for a data type,
  280. enabling faster generic-programming.
  281. \remark The lesser of <em> O(N*log(N)) </em> comparisons and <em> O(N*log(K/S + S)) </em>operations worst-case, where:
  282. \remark * N is @c last - @c first,
  283. \remark * K is the log of the range in bits (32 for 32-bit integers using their full range),
  284. \remark * S is a constant called max_splits, defaulting to 11 (except for strings where it is the log of the character size).
  285. */
  286. template <class RandomAccessIter, class Compare>
  287. inline void reverse_string_sort(RandomAccessIter first,
  288. RandomAccessIter last, Compare comp)
  289. {
  290. unsigned char unused = '\0';
  291. reverse_string_sort(first, last, comp, unused);
  292. }
  293. /*! \brief String sort algorithm using range, wraps using default of @c unsigned char.
  294. (All variants fall back to @c boost::sort::pdqsort if the data size is too small, < @c detail::min_sort_size).
  295. \details @c string_sort is a fast templated in-place hybrid radix/comparison algorithm,
  296. which in testing tends to be roughly 50% to 2X faster than @c std::sort for large tests (>=100kB).\n
  297. \par
  298. Worst-case performance is <em> O(N * (lg(range)/s + s)) </em>,
  299. so @c string_sort is asymptotically faster
  300. than pure comparison-based algorithms. \n\n
  301. Some performance plots of runtime vs. n and log(range) are provided:\n
  302. <a href="../../doc/graph/windows_string_sort.htm"> windows_string_sort</a>\n
  303. <a href="../../doc/graph/osx_string_sort.htm"> osx_string_sort</a>
  304. \param[in] range Range [first, last) for sorting.
  305. \param[in] comp A binary functor that returns whether the first element passed to it should go before the second in order.
  306. \pre [@c first, @c last) is a valid range.
  307. \post The elements in the range [@c first, @c last) are sorted in ascending order.
  308. \return @c void.
  309. \throws std::exception Propagates exceptions if any of the element comparisons, the element swaps (or moves),
  310. the right shift, subtraction of right-shifted elements, functors,
  311. or any operations on iterators throw.
  312. \warning Throwing an exception may cause data loss. This will also throw if a small vector resize throws, in which case there will be no data loss.
  313. \warning Invalid arguments cause undefined behaviour.
  314. \note @c spreadsort function provides a wrapper that calls the fastest sorting algorithm available for a data type,
  315. enabling faster generic-programming.
  316. \remark The lesser of <em> O(N*log(N)) </em> comparisons and <em> O(N*log(K/S + S)) </em>operations worst-case, where:
  317. \remark * N is @c last - @c first,
  318. \remark * K is the log of the range in bits (32 for 32-bit integers using their full range),
  319. \remark * S is a constant called max_splits, defaulting to 11 (except for strings where it is the log of the character size).
  320. */
  321. template <class Range, class Compare>
  322. inline void reverse_string_sort(Range& range, Compare comp)
  323. {
  324. reverse_string_sort(boost::begin(range), boost::end(range), comp);
  325. }
  326. /*! \brief String sort algorithm using random access iterators, wraps using default of @c unsigned char.
  327. (All variants fall back to @c boost::sort::pdqsort if the data size is too small, < @c detail::min_sort_size).
  328. \details @c string_sort is a fast templated in-place hybrid radix/comparison algorithm,
  329. which in testing tends to be roughly 50% to 2X faster than @c std::sort for large tests (>=100kB).\n
  330. \par
  331. Worst-case performance is <em> O(N * (lg(range)/s + s)) </em>,
  332. so @c string_sort is asymptotically faster
  333. than pure comparison-based algorithms. \n\n
  334. Some performance plots of runtime vs. n and log(range) are provided:\n
  335. <a href="../../doc/graph/windows_string_sort.htm"> windows_string_sort</a>\n
  336. <a href="../../doc/graph/osx_string_sort.htm"> osx_string_sort</a>
  337. \param[in] first Iterator pointer to first element.
  338. \param[in] last Iterator pointing to one beyond the end of data.
  339. \param[in] get_character Bracket functor equivalent to @c operator[], taking a number corresponding to the character offset.
  340. \param[in] length Functor to get the length of the string in characters.
  341. \pre [@c first, @c last) is a valid range.
  342. \pre @c RandomAccessIter @c value_type is mutable.
  343. \pre @c RandomAccessIter @c value_type is <a href="http://en.cppreference.com/w/cpp/concept/LessThanComparable">LessThanComparable</a>
  344. \pre @c RandomAccessIter @c value_type supports the @c operator>>,
  345. which returns an integer-type right-shifted a specified number of bits.
  346. \post The elements in the range [@c first, @c last) are sorted in ascending order.
  347. \return @c void.
  348. \throws std::exception Propagates exceptions if any of the element comparisons, the element swaps (or moves),
  349. the right shift, subtraction of right-shifted elements, functors,
  350. or any operations on iterators throw.
  351. \warning Throwing an exception may cause data loss. This will also throw if a small vector resize throws, in which case there will be no data loss.
  352. \warning Invalid arguments cause undefined behaviour.
  353. \note @c spreadsort function provides a wrapper that calls the fastest sorting algorithm available for a data type,
  354. enabling faster generic-programming.
  355. \remark The lesser of <em> O(N*log(N)) </em> comparisons and <em> O(N*log(K/S + S)) </em>operations worst-case, where:
  356. \remark * N is @c last - @c first,
  357. \remark * K is the log of the range in bits (32 for 32-bit integers using their full range),
  358. \remark * S is a constant called max_splits, defaulting to 11 (except for strings where it is the log of the character size).
  359. */
  360. template <class RandomAccessIter, class Get_char, class Get_length>
  361. inline void string_sort(RandomAccessIter first, RandomAccessIter last,
  362. Get_char get_character, Get_length length)
  363. {
  364. //Don't sort if it's too small to optimize
  365. if (last - first < detail::min_sort_size)
  366. boost::sort::pdqsort(first, last);
  367. else {
  368. //skipping past empties, which allows us to get the character type
  369. //.empty() is not used so as not to require a user declaration of it
  370. while (!length(*first)) {
  371. if (++first == last)
  372. return;
  373. }
  374. detail::string_sort(first, last, get_character, length, get_character((*first), 0));
  375. }
  376. }
  377. /*! \brief String sort algorithm using range, wraps using default of @c unsigned char.
  378. (All variants fall back to @c boost::sort::pdqsort if the data size is too small, < @c detail::min_sort_size).
  379. \details @c string_sort is a fast templated in-place hybrid radix/comparison algorithm,
  380. which in testing tends to be roughly 50% to 2X faster than @c std::sort for large tests (>=100kB).\n
  381. \par
  382. Worst-case performance is <em> O(N * (lg(range)/s + s)) </em>,
  383. so @c string_sort is asymptotically faster
  384. than pure comparison-based algorithms. \n\n
  385. Some performance plots of runtime vs. n and log(range) are provided:\n
  386. <a href="../../doc/graph/windows_string_sort.htm"> windows_string_sort</a>\n
  387. <a href="../../doc/graph/osx_string_sort.htm"> osx_string_sort</a>
  388. \param[in] range Range [first, last) for sorting.
  389. \param[in] get_character Bracket functor equivalent to @c operator[], taking a number corresponding to the character offset.
  390. \param[in] length Functor to get the length of the string in characters.
  391. \pre [@c first, @c last) is a valid range.
  392. \post The elements in the range [@c first, @c last) are sorted in ascending order.
  393. \return @c void.
  394. \throws std::exception Propagates exceptions if any of the element comparisons, the element swaps (or moves),
  395. the right shift, subtraction of right-shifted elements, functors,
  396. or any operations on iterators throw.
  397. \warning Throwing an exception may cause data loss. This will also throw if a small vector resize throws, in which case there will be no data loss.
  398. \warning Invalid arguments cause undefined behaviour.
  399. \note @c spreadsort function provides a wrapper that calls the fastest sorting algorithm available for a data type,
  400. enabling faster generic-programming.
  401. \remark The lesser of <em> O(N*log(N)) </em> comparisons and <em> O(N*log(K/S + S)) </em>operations worst-case, where:
  402. \remark * N is @c last - @c first,
  403. \remark * K is the log of the range in bits (32 for 32-bit integers using their full range),
  404. \remark * S is a constant called max_splits, defaulting to 11 (except for strings where it is the log of the character size).
  405. */
  406. template <class Range, class Get_char, class Get_length>
  407. inline void string_sort(Range& range, Get_char get_character, Get_length length)
  408. {
  409. string_sort(boost::begin(range), boost::end(range), get_character, length);
  410. }
  411. /*! \brief String sort algorithm using random access iterators, wraps using default of @c unsigned char.
  412. (All variants fall back to @c boost::sort::pdqsort if the data size is too small, < @c detail::min_sort_size).
  413. \details @c string_sort is a fast templated in-place hybrid radix/comparison algorithm,
  414. which in testing tends to be roughly 50% to 2X faster than @c std::sort for large tests (>=100kB).\n
  415. \par
  416. Worst-case performance is <em> O(N * (lg(range)/s + s)) </em>,
  417. so @c string_sort is asymptotically faster
  418. than pure comparison-based algorithms. \n\n
  419. Some performance plots of runtime vs. n and log(range) are provided:\n
  420. <a href="../../doc/graph/windows_string_sort.htm"> windows_string_sort</a>\n
  421. <a href="../../doc/graph/osx_string_sort.htm"> osx_string_sort</a>
  422. \param[in] first Iterator pointer to first element.
  423. \param[in] last Iterator pointing to one beyond the end of data.
  424. \param[in] get_character Bracket functor equivalent to @c operator[], taking a number corresponding to the character offset.
  425. \param[in] length Functor to get the length of the string in characters.
  426. \param[in] comp A binary functor that returns whether the first element passed to it should go before the second in order.
  427. \pre [@c first, @c last) is a valid range.
  428. \pre @c RandomAccessIter @c value_type is mutable.
  429. \pre @c RandomAccessIter @c value_type is <a href="http://en.cppreference.com/w/cpp/concept/LessThanComparable">LessThanComparable</a>
  430. \post The elements in the range [@c first, @c last) are sorted in ascending order.
  431. \return @c void.
  432. \throws std::exception Propagates exceptions if any of the element comparisons, the element swaps (or moves),
  433. the right shift, subtraction of right-shifted elements, functors,
  434. or any operations on iterators throw.
  435. \warning Throwing an exception may cause data loss. This will also throw if a small vector resize throws, in which case there will be no data loss.
  436. \warning Invalid arguments cause undefined behaviour.
  437. \note @c spreadsort function provides a wrapper that calls the fastest sorting algorithm available for a data type,
  438. enabling faster generic-programming.
  439. \remark The lesser of <em> O(N*log(N)) </em> comparisons and <em> O(N*log(K/S + S)) </em>operations worst-case, where:
  440. \remark * N is @c last - @c first,
  441. \remark * K is the log of the range in bits (32 for 32-bit integers using their full range),
  442. \remark * S is a constant called max_splits, defaulting to 11 (except for strings where it is the log of the character size).
  443. */
  444. template <class RandomAccessIter, class Get_char, class Get_length,
  445. class Compare>
  446. inline void string_sort(RandomAccessIter first, RandomAccessIter last,
  447. Get_char get_character, Get_length length, Compare comp)
  448. {
  449. //Don't sort if it's too small to optimize
  450. if (last - first < detail::min_sort_size)
  451. boost::sort::pdqsort(first, last, comp);
  452. else {
  453. //skipping past empties, which allows us to get the character type
  454. //.empty() is not used so as not to require a user declaration of it
  455. while (!length(*first)) {
  456. if (++first == last)
  457. return;
  458. }
  459. detail::string_sort(first, last, get_character, length, comp,
  460. get_character((*first), 0));
  461. }
  462. }
  463. /*! \brief String sort algorithm using range, wraps using default of @c unsigned char.
  464. (All variants fall back to @c boost::sort::pdqsort if the data size is too small, < @c detail::min_sort_size).
  465. \details @c string_sort is a fast templated in-place hybrid radix/comparison algorithm,
  466. which in testing tends to be roughly 50% to 2X faster than @c std::sort for large tests (>=100kB).\n
  467. \par
  468. Worst-case performance is <em> O(N * (lg(range)/s + s)) </em>,
  469. so @c string_sort is asymptotically faster
  470. than pure comparison-based algorithms. \n\n
  471. Some performance plots of runtime vs. n and log(range) are provided:\n
  472. <a href="../../doc/graph/windows_string_sort.htm"> windows_string_sort</a>\n
  473. <a href="../../doc/graph/osx_string_sort.htm"> osx_string_sort</a>
  474. \param[in] range Range [first, last) for sorting.
  475. \param[in] get_character Bracket functor equivalent to @c operator[], taking a number corresponding to the character offset.
  476. \param[in] length Functor to get the length of the string in characters.
  477. \param[in] comp A binary functor that returns whether the first element passed to it should go before the second in order.
  478. \pre [@c first, @c last) is a valid range.
  479. \post The elements in the range [@c first, @c last) are sorted in ascending order.
  480. \return @c void.
  481. \throws std::exception Propagates exceptions if any of the element comparisons, the element swaps (or moves),
  482. the right shift, subtraction of right-shifted elements, functors,
  483. or any operations on iterators throw.
  484. \warning Throwing an exception may cause data loss. This will also throw if a small vector resize throws, in which case there will be no data loss.
  485. \warning Invalid arguments cause undefined behaviour.
  486. \note @c spreadsort function provides a wrapper that calls the fastest sorting algorithm available for a data type,
  487. enabling faster generic-programming.
  488. \remark The lesser of <em> O(N*log(N)) </em> comparisons and <em> O(N*log(K/S + S)) </em>operations worst-case, where:
  489. \remark * N is @c last - @c first,
  490. \remark * K is the log of the range in bits (32 for 32-bit integers using their full range),
  491. \remark * S is a constant called max_splits, defaulting to 11 (except for strings where it is the log of the character size).
  492. */
  493. template <class Range, class Get_char, class Get_length, class Compare>
  494. inline void string_sort(Range& range,
  495. Get_char get_character, Get_length length, Compare comp)
  496. {
  497. string_sort(boost::begin(range), boost::end(range), get_character, length, comp);
  498. }
  499. /*! \brief Reverse String sort algorithm using random access iterators.
  500. (All variants fall back to @c boost::sort::pdqsort if the data size is too small, < @c detail::min_sort_size).
  501. \details @c string_sort is a fast templated in-place hybrid radix/comparison algorithm,
  502. which in testing tends to be roughly 50% to 2X faster than @c std::sort for large tests (>=100kB).\n
  503. \par
  504. Worst-case performance is <em> O(N * (lg(range)/s + s)) </em>,
  505. so @c string_sort is asymptotically faster
  506. than pure comparison-based algorithms. \n\n
  507. Some performance plots of runtime vs. n and log(range) are provided:\n
  508. <a href="../../doc/graph/windows_string_sort.htm"> windows_string_sort</a>\n
  509. <a href="../../doc/graph/osx_string_sort.htm"> osx_string_sort</a>
  510. \param[in] first Iterator pointer to first element.
  511. \param[in] last Iterator pointing to one beyond the end of data.
  512. \param[in] get_character Bracket functor equivalent to @c operator[], taking a number corresponding to the character offset.
  513. \param[in] length Functor to get the length of the string in characters.
  514. \param[in] comp A binary functor that returns whether the first element passed to it should go before the second in order.
  515. \pre [@c first, @c last) is a valid range.
  516. \pre @c RandomAccessIter @c value_type is mutable.
  517. \pre @c RandomAccessIter @c value_type is <a href="http://en.cppreference.com/w/cpp/concept/LessThanComparable">LessThanComparable</a>
  518. \post The elements in the range [@c first, @c last) are sorted in ascending order.
  519. \return @c void.
  520. \throws std::exception Propagates exceptions if any of the element comparisons, the element swaps (or moves),
  521. the right shift, subtraction of right-shifted elements, functors,
  522. or any operations on iterators throw.
  523. \warning Throwing an exception may cause data loss. This will also throw if a small vector resize throws, in which case there will be no data loss.
  524. \warning Invalid arguments cause undefined behaviour.
  525. \note @c spreadsort function provides a wrapper that calls the fastest sorting algorithm available for a data type,
  526. enabling faster generic-programming.
  527. \remark The lesser of <em> O(N*log(N)) </em> comparisons and <em> O(N*log(K/S + S)) </em>operations worst-case, where:
  528. \remark * N is @c last - @c first,
  529. \remark * K is the log of the range in bits (32 for 32-bit integers using their full range),
  530. \remark * S is a constant called max_splits, defaulting to 11 (except for strings where it is the log of the character size).
  531. */
  532. template <class RandomAccessIter, class Get_char, class Get_length,
  533. class Compare>
  534. inline void reverse_string_sort(RandomAccessIter first,
  535. RandomAccessIter last, Get_char get_character, Get_length length, Compare comp)
  536. {
  537. //Don't sort if it's too small to optimize
  538. if (last - first < detail::min_sort_size)
  539. boost::sort::pdqsort(first, last, comp);
  540. else {
  541. //skipping past empties, which allows us to get the character type
  542. //.empty() is not used so as not to require a user declaration of it
  543. while (!length(*(--last))) {
  544. //If there is just one non-empty at the beginning, this is sorted
  545. if (first == last)
  546. return;
  547. }
  548. //making last just after the end of the non-empty part of the array
  549. detail::reverse_string_sort(first, last + 1, get_character, length, comp,
  550. get_character((*last), 0));
  551. }
  552. }
  553. /*! \brief Reverse String sort algorithm using range.
  554. (All variants fall back to @c boost::sort::pdqsort if the data size is too small, < @c detail::min_sort_size).
  555. \details @c string_sort is a fast templated in-place hybrid radix/comparison algorithm,
  556. which in testing tends to be roughly 50% to 2X faster than @c std::sort for large tests (>=100kB).\n
  557. \par
  558. Worst-case performance is <em> O(N * (lg(range)/s + s)) </em>,
  559. so @c string_sort is asymptotically faster
  560. than pure comparison-based algorithms. \n\n
  561. Some performance plots of runtime vs. n and log(range) are provided:\n
  562. <a href="../../doc/graph/windows_string_sort.htm"> windows_string_sort</a>\n
  563. <a href="../../doc/graph/osx_string_sort.htm"> osx_string_sort</a>
  564. \param[in] range Range [first, last) for sorting.
  565. \param[in] get_character Bracket functor equivalent to @c operator[], taking a number corresponding to the character offset.
  566. \param[in] length Functor to get the length of the string in characters.
  567. \param[in] comp A binary functor that returns whether the first element passed to it should go before the second in order.
  568. \pre [@c first, @c last) is a valid range.
  569. \post The elements in the range [@c first, @c last) are sorted in ascending order.
  570. \return @c void.
  571. \throws std::exception Propagates exceptions if any of the element comparisons, the element swaps (or moves),
  572. the right shift, subtraction of right-shifted elements, functors,
  573. or any operations on iterators throw.
  574. \warning Throwing an exception may cause data loss. This will also throw if a small vector resize throws, in which case there will be no data loss.
  575. \warning Invalid arguments cause undefined behaviour.
  576. \note @c spreadsort function provides a wrapper that calls the fastest sorting algorithm available for a data type,
  577. enabling faster generic-programming.
  578. \remark The lesser of <em> O(N*log(N)) </em> comparisons and <em> O(N*log(K/S + S)) </em>operations worst-case, where:
  579. \remark * N is @c last - @c first,
  580. \remark * K is the log of the range in bits (32 for 32-bit integers using their full range),
  581. \remark * S is a constant called max_splits, defaulting to 11 (except for strings where it is the log of the character size).
  582. */
  583. template <class Range, class Get_char, class Get_length,
  584. class Compare>
  585. inline void reverse_string_sort(Range& range, Get_char get_character, Get_length length, Compare comp)
  586. {
  587. reverse_string_sort(boost::begin(range), boost::end(range), get_character, length, comp);
  588. }
  589. }
  590. }
  591. }
  592. #endif