123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741 |
- //Templated hybrid string_sort
- // Copyright Steven J. Ross 2001 - 2009.
- // Distributed under the Boost Software License, Version 1.0.
- // (See accompanying file LICENSE_1_0.txt or copy at
- // http://www.boost.org/LICENSE_1_0.txt)
- // See http://www.boost.org/libs/sort/ for library home page.
- /*
- Some improvements suggested by:
- Phil Endecott and Frank Gennari
- */
- #ifndef BOOST_STRING_SORT_HPP
- #define BOOST_STRING_SORT_HPP
- #include <algorithm>
- #include <vector>
- #include <cstring>
- #include <limits>
- #include <boost/static_assert.hpp>
- #include <boost/sort/spreadsort/detail/constants.hpp>
- #include <boost/sort/spreadsort/detail/string_sort.hpp>
- #include <boost/range/begin.hpp>
- #include <boost/range/end.hpp>
- namespace boost {
- namespace sort {
- namespace spreadsort {
- /*! \brief String sort algorithm using random access iterators, allowing character-type overloads.\n
- (All variants fall back to @c boost::sort::pdqsort if the data size is too small, < @c detail::min_sort_size).
- \details @c string_sort is a fast templated in-place hybrid radix/comparison algorithm,
- which in testing tends to be roughly 50% to 2X faster than @c std::sort for large tests (>=100kB).\n
- \par
- Worst-case performance is <em> O(N * (lg(range)/s + s)) </em>,
- so @c string_sort is asymptotically faster
- than pure comparison-based algorithms. \n\n
- Some performance plots of runtime vs. n and log(range) are provided:\n
- <a href="../../doc/graph/windows_string_sort.htm"> windows_string_sort</a>\n
- <a href="../../doc/graph/osx_string_sort.htm"> osx_string_sort</a>
- \tparam RandomAccessIter <a href="http://www.cplusplus.com/reference/iterator/RandomAccessIterator/">Random access iterator</a>
- \tparam Unsigned_char_type Unsigned character type used for string.
- \param[in] first Iterator pointer to first element.
- \param[in] last Iterator pointing to one beyond the end of data.
- \param[in] unused value with the same type as the result of the [] operator, defining the Unsigned_char_type. The actual value is unused.
- \pre [@c first, @c last) is a valid range.
- \pre @c RandomAccessIter @c value_type is mutable.
- \pre @c RandomAccessIter @c value_type is <a href="http://en.cppreference.com/w/cpp/concept/LessThanComparable">LessThanComparable</a>
- \pre @c RandomAccessIter @c value_type supports the @c operator>>,
- which returns an integer-type right-shifted a specified number of bits.
- \post The elements in the range [@c first, @c last) are sorted in ascending order.
- \throws std::exception Propagates exceptions if any of the element comparisons, the element swaps (or moves),
- the right shift, subtraction of right-shifted elements, functors,
- or any operations on iterators throw.
- \warning Throwing an exception may cause data loss. This will also throw if a small vector resize throws, in which case there will be no data loss.
- \warning Invalid arguments cause undefined behaviour.
- \note @c spreadsort function provides a wrapper that calls the fastest sorting algorithm available for a data type,
- enabling faster generic-programming.
- \remark The lesser of <em> O(N*log(N)) </em> comparisons and <em> O(N*log(K/S + S)) </em>operations worst-case, where:
- \remark * N is @c last - @c first,
- \remark * K is the log of the range in bits (32 for 32-bit integers using their full range),
- \remark * S is a constant called max_splits, defaulting to 11 (except for strings where it is the log of the character size).
- */
- template <class RandomAccessIter, class Unsigned_char_type>
- inline void string_sort(RandomAccessIter first, RandomAccessIter last,
- Unsigned_char_type unused)
- {
- //Don't sort if it's too small to optimize
- if (last - first < detail::min_sort_size)
- boost::sort::pdqsort(first, last);
- else
- detail::string_sort(first, last, unused);
- }
- /*! \brief String sort algorithm using range, allowing character-type overloads.\n
- (All variants fall back to @c boost::sort::pdqsort if the data size is too small, < @c detail::min_sort_size).
- \details @c string_sort is a fast templated in-place hybrid radix/comparison algorithm,
- which in testing tends to be roughly 50% to 2X faster than @c std::sort for large tests (>=100kB).\n
- \par
- Worst-case performance is <em> O(N * (lg(range)/s + s)) </em>,
- so @c string_sort is asymptotically faster
- than pure comparison-based algorithms. \n\n
- Some performance plots of runtime vs. n and log(range) are provided:\n
- <a href="../../doc/graph/windows_string_sort.htm"> windows_string_sort</a>\n
- <a href="../../doc/graph/osx_string_sort.htm"> osx_string_sort</a>
- \tparam Unsigned_char_type Unsigned character type used for string.
- \param[in] range Range [first, last) for sorting.
- \param[in] unused value with the same type as the result of the [] operator, defining the Unsigned_char_type. The actual value is unused.
- \pre [@c first, @c last) is a valid range.
- \post The elements in the range [@c first, @c last) are sorted in ascending order.
- \throws std::exception Propagates exceptions if any of the element comparisons, the element swaps (or moves),
- the right shift, subtraction of right-shifted elements, functors,
- or any operations on iterators throw.
- \warning Throwing an exception may cause data loss. This will also throw if a small vector resize throws, in which case there will be no data loss.
- \warning Invalid arguments cause undefined behaviour.
- \note @c spreadsort function provides a wrapper that calls the fastest sorting algorithm available for a data type,
- enabling faster generic-programming.
- \remark The lesser of <em> O(N*log(N)) </em> comparisons and <em> O(N*log(K/S + S)) </em>operations worst-case, where:
- \remark * N is @c last - @c first,
- \remark * K is the log of the range in bits (32 for 32-bit integers using their full range),
- \remark * S is a constant called max_splits, defaulting to 11 (except for strings where it is the log of the character size).
- */
- template <class Range, class Unsigned_char_type>
- inline void string_sort(Range& range, Unsigned_char_type unused)
- {
- string_sort(boost::begin(range), boost::end(range), unused);
- }
- /*! \brief String sort algorithm using random access iterators, wraps using default of unsigned char.
- (All variants fall back to @c boost::sort::pdqsort if the data size is too small, < @c detail::min_sort_size).
- \details @c string_sort is a fast templated in-place hybrid radix/comparison algorithm,
- which in testing tends to be roughly 50% to 2X faster than @c std::sort for large tests (>=100kB).\n
- Worst-case performance is <em> O(N * (lg(range)/s + s)) </em>,
- so @c string_sort is asymptotically faster
- than pure comparison-based algorithms. \n\n
- Some performance plots of runtime vs. n and log(range) are provided:\n
- <a href="../../doc/graph/windows_string_sort.htm"> windows_string_sort</a>
- \n
- <a href="../../doc/graph/osx_string_sort.htm"> osx_string_sort</a>
- \param[in] first Iterator pointer to first element.
- \param[in] last Iterator pointing to one beyond the end of data.
- \pre [@c first, @c last) is a valid range.
- \pre @c RandomAccessIter @c value_type is mutable.
- \pre @c RandomAccessIter @c value_type is <a href="http://en.cppreference.com/w/cpp/concept/LessThanComparable">LessThanComparable</a>
- \pre @c RandomAccessIter @c value_type supports the @c operator>>,
- which returns an integer-type right-shifted a specified number of bits.
- \post The elements in the range [@c first, @c last) are sorted in ascending order.
- \throws std::exception Propagates exceptions if any of the element comparisons, the element swaps (or moves),
- the right shift, subtraction of right-shifted elements, functors,
- or any operations on iterators throw.
- \warning Throwing an exception may cause data loss. This will also throw if a small vector resize throws, in which case there will be no data loss.
- \warning Invalid arguments cause undefined behaviour.
- \note @c spreadsort function provides a wrapper that calls the fastest sorting algorithm available for a data type,
- enabling faster generic-programming.
- \remark The lesser of <em> O(N*log(N)) </em> comparisons and <em> O(N*log(K/S + S)) </em>operations worst-case, where:
- \remark * N is @c last - @c first,
- \remark * K is the log of the range in bits (32 for 32-bit integers using their full range),
- \remark * S is a constant called max_splits, defaulting to 11 (except for strings where it is the log of the character size).
- */
- template <class RandomAccessIter>
- inline void string_sort(RandomAccessIter first, RandomAccessIter last)
- {
- unsigned char unused = '\0';
- string_sort(first, last, unused);
- }
- /*! \brief String sort algorithm using range, wraps using default of unsigned char.
- (All variants fall back to @c boost::sort::pdqsort if the data size is too small, < @c detail::min_sort_size).
- \details @c string_sort is a fast templated in-place hybrid radix/comparison algorithm,
- which in testing tends to be roughly 50% to 2X faster than @c std::sort for large tests (>=100kB).\n
- Worst-case performance is <em> O(N * (lg(range)/s + s)) </em>,
- so @c string_sort is asymptotically faster
- than pure comparison-based algorithms. \n\n
- Some performance plots of runtime vs. n and log(range) are provided:\n
- <a href="../../doc/graph/windows_string_sort.htm"> windows_string_sort</a>
- \n
- <a href="../../doc/graph/osx_string_sort.htm"> osx_string_sort</a>
- \param[in] range Range [first, last) for sorting.
- \pre [@c first, @c last) is a valid range.
- \post The elements in the range [@c first, @c last) are sorted in ascending order.
- \throws std::exception Propagates exceptions if any of the element comparisons, the element swaps (or moves),
- the right shift, subtraction of right-shifted elements, functors,
- or any operations on iterators throw.
- \warning Throwing an exception may cause data loss. This will also throw if a small vector resize throws, in which case there will be no data loss.
- \warning Invalid arguments cause undefined behaviour.
- \note @c spreadsort function provides a wrapper that calls the fastest sorting algorithm available for a data type,
- enabling faster generic-programming.
- \remark The lesser of <em> O(N*log(N)) </em> comparisons and <em> O(N*log(K/S + S)) </em>operations worst-case, where:
- \remark * N is @c last - @c first,
- \remark * K is the log of the range in bits (32 for 32-bit integers using their full range),
- \remark * S is a constant called max_splits, defaulting to 11 (except for strings where it is the log of the character size).
- */
- template <class Range>
- inline void string_sort(Range& range)
- {
- string_sort(boost::begin(range), boost::end(range));
- }
- /*! \brief String sort algorithm using random access iterators, allowing character-type overloads.
- (All variants fall back to @c boost::sort::pdqsort if the data size is too small, < detail::min_sort_size).
- \details @c string_sort is a fast templated in-place hybrid radix/comparison algorithm,
- which in testing tends to be roughly 50% to 2X faster than @c std::sort for large tests (>=100kB).\n
- \par
- Worst-case performance is <em> O(N * (lg(range)/s + s)) </em>,
- so @c string_sort is asymptotically faster
- than pure comparison-based algorithms. \n\n
- Some performance plots of runtime vs. n and log(range) are provided:\n
- <a href="../../doc/graph/windows_string_sort.htm"> windows_string_sort</a>\n
- <a href="../../doc/graph/osx_string_sort.htm"> osx_string_sort</a>
- \tparam RandomAccessIter <a href="http://www.cplusplus.com/reference/iterator/RandomAccessIterator/">Random access iterator</a>
- \tparam Comp Functor type to use for comparison.
- \tparam Unsigned_char_type Unsigned character type used for string.
- \param[in] first Iterator pointer to first element.
- \param[in] last Iterator pointing to one beyond the end of data.
- \param[in] comp A binary functor that returns whether the first element passed to it should go before the second in order.
- \param[in] unused value with the same type as the result of the [] operator, defining the Unsigned_char_type. The actual value is unused.
- \pre [@c first, @c last) is a valid range.
- \pre @c RandomAccessIter @c value_type is mutable.
- \pre @c RandomAccessIter @c value_type is <a href="http://en.cppreference.com/w/cpp/concept/LessThanComparable">LessThanComparable</a>
- \pre @c RandomAccessIter @c value_type supports the @c operator>>,
- which returns an integer-type right-shifted a specified number of bits.
- \post The elements in the range [@c first, @c last) are sorted in ascending order.
- \return @c void.
- \throws std::exception Propagates exceptions if any of the element comparisons, the element swaps (or moves),
- the right shift, subtraction of right-shifted elements, functors,
- or any operations on iterators throw.
- \warning Throwing an exception may cause data loss. This will also throw if a small vector resize throws, in which case there will be no data loss.
- \warning Invalid arguments cause undefined behaviour.
- \note @c spreadsort function provides a wrapper that calls the fastest sorting algorithm available for a data type,
- enabling faster generic-programming.
- \remark The lesser of <em> O(N*log(N)) </em> comparisons and <em> O(N*log(K/S + S)) </em>operations worst-case, where:
- \remark * N is @c last - @c first,
- \remark * K is the log of the range in bits (32 for 32-bit integers using their full range),
- \remark * S is a constant called max_splits, defaulting to 11 (except for strings where it is the log of the character size).
- */
- template <class RandomAccessIter, class Compare, class Unsigned_char_type>
- inline void reverse_string_sort(RandomAccessIter first,
- RandomAccessIter last, Compare comp, Unsigned_char_type unused)
- {
- //Don't sort if it's too small to optimize.
- if (last - first < detail::min_sort_size)
- boost::sort::pdqsort(first, last, comp);
- else
- detail::reverse_string_sort(first, last, unused);
- }
- /*! \brief String sort algorithm using range, allowing character-type overloads.
- (All variants fall back to @c boost::sort::pdqsort if the data size is too small, < detail::min_sort_size).
- \details @c string_sort is a fast templated in-place hybrid radix/comparison algorithm,
- which in testing tends to be roughly 50% to 2X faster than @c std::sort for large tests (>=100kB).\n
- Worst-case performance is <em> O(N * (lg(range)/s + s)) </em>,
- so @c string_sort is asymptotically faster
- than pure comparison-based algorithms. \n\n
- Some performance plots of runtime vs. n and log(range) are provided:\n
- <a href="../../doc/graph/windows_integer_sort.htm"> windows_integer_sort</a>
- \n
- <a href="../../doc/graph/osx_integer_sort.htm"> osx_integer_sort</a>
- \tparam Comp Functor type to use for comparison.
- \tparam Unsigned_char_type Unsigned character type used for string.
- \param[in] range Range [first, last) for sorting.
- \param[in] comp A binary functor that returns whether the first element passed to it should go before the second in order.
- \param[in] unused value with the same type as the result of the [] operator, defining the Unsigned_char_type. The actual value is unused.
- \pre [@c first, @c last) is a valid range.
- \post The elements in the range [@c first, @c last) are sorted in ascending order.
- \return @c void.
- \throws std::exception Propagates exceptions if any of the element comparisons, the element swaps (or moves),
- the right shift, subtraction of right-shifted elements, functors,
- or any operations on iterators throw.
- \warning Throwing an exception may cause data loss. This will also throw if a small vector resize throws, in which case there will be no data loss.
- \warning Invalid arguments cause undefined behaviour.
- \note @c spreadsort function provides a wrapper that calls the fastest sorting algorithm available for a data type,
- enabling faster generic-programming.
- \remark The lesser of <em> O(N*log(N)) </em> comparisons and <em> O(N*log(K/S + S)) </em>operations worst-case, where:
- \remark * N is @c last - @c first,
- \remark * K is the log of the range in bits (32 for 32-bit integers using their full range),
- \remark * S is a constant called max_splits, defaulting to 11 (except for strings where it is the log of the character size).
- */
- template <class Range, class Compare, class Unsigned_char_type>
- inline void reverse_string_sort(Range& range, Compare comp, Unsigned_char_type unused)
- {
- reverse_string_sort(boost::begin(range), boost::end(range), comp, unused);
- }
- /*! \brief String sort algorithm using random access iterators, wraps using default of @c unsigned char.
- (All variants fall back to @c boost::sort::pdqsort if the data size is too small, < @c detail::min_sort_size).
- \details @c string_sort is a fast templated in-place hybrid radix/comparison algorithm,
- which in testing tends to be roughly 50% to 2X faster than @c std::sort for large tests (>=100kB).\n
- \par
- Worst-case performance is <em> O(N * (lg(range)/s + s)) </em>,
- so @c string_sort is asymptotically faster
- than pure comparison-based algorithms.\n\n
- Some performance plots of runtime vs. n and log(range) are provided:\n
- <a href="../../doc/graph/windows_string_sort.htm"> windows_string_sort</a>\n
- <a href="../../doc/graph/osx_string_sort.htm"> osx_string_sort</a>
- \param[in] first Iterator pointer to first element.
- \param[in] last Iterator pointing to one beyond the end of data.
- \param[in] comp A binary functor that returns whether the first element passed to it should go before the second in order.
- \pre [@c first, @c last) is a valid range.
- \pre @c RandomAccessIter @c value_type is mutable.
- \pre @c RandomAccessIter @c value_type is <a href="http://en.cppreference.com/w/cpp/concept/LessThanComparable">LessThanComparable</a>
- \pre @c RandomAccessIter @c value_type supports the @c operator>>,
- which returns an integer-type right-shifted a specified number of bits.
- \post The elements in the range [@c first, @c last) are sorted in ascending order.
- \return @c void.
- \throws std::exception Propagates exceptions if any of the element comparisons, the element swaps (or moves),
- the right shift, subtraction of right-shifted elements, functors,
- or any operations on iterators throw.
- \warning Throwing an exception may cause data loss. This will also throw if a small vector resize throws, in which case there will be no data loss.
- \warning Invalid arguments cause undefined behaviour.
- \note @c spreadsort function provides a wrapper that calls the fastest sorting algorithm available for a data type,
- enabling faster generic-programming.
- \remark The lesser of <em> O(N*log(N)) </em> comparisons and <em> O(N*log(K/S + S)) </em>operations worst-case, where:
- \remark * N is @c last - @c first,
- \remark * K is the log of the range in bits (32 for 32-bit integers using their full range),
- \remark * S is a constant called max_splits, defaulting to 11 (except for strings where it is the log of the character size).
- */
- template <class RandomAccessIter, class Compare>
- inline void reverse_string_sort(RandomAccessIter first,
- RandomAccessIter last, Compare comp)
- {
- unsigned char unused = '\0';
- reverse_string_sort(first, last, comp, unused);
- }
- /*! \brief String sort algorithm using range, wraps using default of @c unsigned char.
- (All variants fall back to @c boost::sort::pdqsort if the data size is too small, < @c detail::min_sort_size).
- \details @c string_sort is a fast templated in-place hybrid radix/comparison algorithm,
- which in testing tends to be roughly 50% to 2X faster than @c std::sort for large tests (>=100kB).\n
- \par
- Worst-case performance is <em> O(N * (lg(range)/s + s)) </em>,
- so @c string_sort is asymptotically faster
- than pure comparison-based algorithms. \n\n
- Some performance plots of runtime vs. n and log(range) are provided:\n
- <a href="../../doc/graph/windows_string_sort.htm"> windows_string_sort</a>\n
- <a href="../../doc/graph/osx_string_sort.htm"> osx_string_sort</a>
- \param[in] range Range [first, last) for sorting.
- \param[in] comp A binary functor that returns whether the first element passed to it should go before the second in order.
- \pre [@c first, @c last) is a valid range.
- \post The elements in the range [@c first, @c last) are sorted in ascending order.
- \return @c void.
- \throws std::exception Propagates exceptions if any of the element comparisons, the element swaps (or moves),
- the right shift, subtraction of right-shifted elements, functors,
- or any operations on iterators throw.
- \warning Throwing an exception may cause data loss. This will also throw if a small vector resize throws, in which case there will be no data loss.
- \warning Invalid arguments cause undefined behaviour.
- \note @c spreadsort function provides a wrapper that calls the fastest sorting algorithm available for a data type,
- enabling faster generic-programming.
- \remark The lesser of <em> O(N*log(N)) </em> comparisons and <em> O(N*log(K/S + S)) </em>operations worst-case, where:
- \remark * N is @c last - @c first,
- \remark * K is the log of the range in bits (32 for 32-bit integers using their full range),
- \remark * S is a constant called max_splits, defaulting to 11 (except for strings where it is the log of the character size).
- */
- template <class Range, class Compare>
- inline void reverse_string_sort(Range& range, Compare comp)
- {
- reverse_string_sort(boost::begin(range), boost::end(range), comp);
- }
- /*! \brief String sort algorithm using random access iterators, wraps using default of @c unsigned char.
- (All variants fall back to @c boost::sort::pdqsort if the data size is too small, < @c detail::min_sort_size).
- \details @c string_sort is a fast templated in-place hybrid radix/comparison algorithm,
- which in testing tends to be roughly 50% to 2X faster than @c std::sort for large tests (>=100kB).\n
- \par
- Worst-case performance is <em> O(N * (lg(range)/s + s)) </em>,
- so @c string_sort is asymptotically faster
- than pure comparison-based algorithms. \n\n
- Some performance plots of runtime vs. n and log(range) are provided:\n
- <a href="../../doc/graph/windows_string_sort.htm"> windows_string_sort</a>\n
- <a href="../../doc/graph/osx_string_sort.htm"> osx_string_sort</a>
- \param[in] first Iterator pointer to first element.
- \param[in] last Iterator pointing to one beyond the end of data.
- \param[in] get_character Bracket functor equivalent to @c operator[], taking a number corresponding to the character offset.
- \param[in] length Functor to get the length of the string in characters.
- \pre [@c first, @c last) is a valid range.
- \pre @c RandomAccessIter @c value_type is mutable.
- \pre @c RandomAccessIter @c value_type is <a href="http://en.cppreference.com/w/cpp/concept/LessThanComparable">LessThanComparable</a>
- \pre @c RandomAccessIter @c value_type supports the @c operator>>,
- which returns an integer-type right-shifted a specified number of bits.
- \post The elements in the range [@c first, @c last) are sorted in ascending order.
- \return @c void.
- \throws std::exception Propagates exceptions if any of the element comparisons, the element swaps (or moves),
- the right shift, subtraction of right-shifted elements, functors,
- or any operations on iterators throw.
- \warning Throwing an exception may cause data loss. This will also throw if a small vector resize throws, in which case there will be no data loss.
- \warning Invalid arguments cause undefined behaviour.
- \note @c spreadsort function provides a wrapper that calls the fastest sorting algorithm available for a data type,
- enabling faster generic-programming.
- \remark The lesser of <em> O(N*log(N)) </em> comparisons and <em> O(N*log(K/S + S)) </em>operations worst-case, where:
- \remark * N is @c last - @c first,
- \remark * K is the log of the range in bits (32 for 32-bit integers using their full range),
- \remark * S is a constant called max_splits, defaulting to 11 (except for strings where it is the log of the character size).
- */
- template <class RandomAccessIter, class Get_char, class Get_length>
- inline void string_sort(RandomAccessIter first, RandomAccessIter last,
- Get_char get_character, Get_length length)
- {
- //Don't sort if it's too small to optimize
- if (last - first < detail::min_sort_size)
- boost::sort::pdqsort(first, last);
- else {
- //skipping past empties, which allows us to get the character type
- //.empty() is not used so as not to require a user declaration of it
- while (!length(*first)) {
- if (++first == last)
- return;
- }
- detail::string_sort(first, last, get_character, length, get_character((*first), 0));
- }
- }
- /*! \brief String sort algorithm using range, wraps using default of @c unsigned char.
- (All variants fall back to @c boost::sort::pdqsort if the data size is too small, < @c detail::min_sort_size).
- \details @c string_sort is a fast templated in-place hybrid radix/comparison algorithm,
- which in testing tends to be roughly 50% to 2X faster than @c std::sort for large tests (>=100kB).\n
- \par
- Worst-case performance is <em> O(N * (lg(range)/s + s)) </em>,
- so @c string_sort is asymptotically faster
- than pure comparison-based algorithms. \n\n
- Some performance plots of runtime vs. n and log(range) are provided:\n
- <a href="../../doc/graph/windows_string_sort.htm"> windows_string_sort</a>\n
- <a href="../../doc/graph/osx_string_sort.htm"> osx_string_sort</a>
- \param[in] range Range [first, last) for sorting.
- \param[in] get_character Bracket functor equivalent to @c operator[], taking a number corresponding to the character offset.
- \param[in] length Functor to get the length of the string in characters.
- \pre [@c first, @c last) is a valid range.
- \post The elements in the range [@c first, @c last) are sorted in ascending order.
- \return @c void.
- \throws std::exception Propagates exceptions if any of the element comparisons, the element swaps (or moves),
- the right shift, subtraction of right-shifted elements, functors,
- or any operations on iterators throw.
- \warning Throwing an exception may cause data loss. This will also throw if a small vector resize throws, in which case there will be no data loss.
- \warning Invalid arguments cause undefined behaviour.
- \note @c spreadsort function provides a wrapper that calls the fastest sorting algorithm available for a data type,
- enabling faster generic-programming.
- \remark The lesser of <em> O(N*log(N)) </em> comparisons and <em> O(N*log(K/S + S)) </em>operations worst-case, where:
- \remark * N is @c last - @c first,
- \remark * K is the log of the range in bits (32 for 32-bit integers using their full range),
- \remark * S is a constant called max_splits, defaulting to 11 (except for strings where it is the log of the character size).
- */
- template <class Range, class Get_char, class Get_length>
- inline void string_sort(Range& range, Get_char get_character, Get_length length)
- {
- string_sort(boost::begin(range), boost::end(range), get_character, length);
- }
- /*! \brief String sort algorithm using random access iterators, wraps using default of @c unsigned char.
- (All variants fall back to @c boost::sort::pdqsort if the data size is too small, < @c detail::min_sort_size).
- \details @c string_sort is a fast templated in-place hybrid radix/comparison algorithm,
- which in testing tends to be roughly 50% to 2X faster than @c std::sort for large tests (>=100kB).\n
- \par
- Worst-case performance is <em> O(N * (lg(range)/s + s)) </em>,
- so @c string_sort is asymptotically faster
- than pure comparison-based algorithms. \n\n
- Some performance plots of runtime vs. n and log(range) are provided:\n
- <a href="../../doc/graph/windows_string_sort.htm"> windows_string_sort</a>\n
- <a href="../../doc/graph/osx_string_sort.htm"> osx_string_sort</a>
- \param[in] first Iterator pointer to first element.
- \param[in] last Iterator pointing to one beyond the end of data.
- \param[in] get_character Bracket functor equivalent to @c operator[], taking a number corresponding to the character offset.
- \param[in] length Functor to get the length of the string in characters.
- \param[in] comp A binary functor that returns whether the first element passed to it should go before the second in order.
- \pre [@c first, @c last) is a valid range.
- \pre @c RandomAccessIter @c value_type is mutable.
- \pre @c RandomAccessIter @c value_type is <a href="http://en.cppreference.com/w/cpp/concept/LessThanComparable">LessThanComparable</a>
- \post The elements in the range [@c first, @c last) are sorted in ascending order.
- \return @c void.
- \throws std::exception Propagates exceptions if any of the element comparisons, the element swaps (or moves),
- the right shift, subtraction of right-shifted elements, functors,
- or any operations on iterators throw.
- \warning Throwing an exception may cause data loss. This will also throw if a small vector resize throws, in which case there will be no data loss.
- \warning Invalid arguments cause undefined behaviour.
- \note @c spreadsort function provides a wrapper that calls the fastest sorting algorithm available for a data type,
- enabling faster generic-programming.
- \remark The lesser of <em> O(N*log(N)) </em> comparisons and <em> O(N*log(K/S + S)) </em>operations worst-case, where:
- \remark * N is @c last - @c first,
- \remark * K is the log of the range in bits (32 for 32-bit integers using their full range),
- \remark * S is a constant called max_splits, defaulting to 11 (except for strings where it is the log of the character size).
- */
- template <class RandomAccessIter, class Get_char, class Get_length,
- class Compare>
- inline void string_sort(RandomAccessIter first, RandomAccessIter last,
- Get_char get_character, Get_length length, Compare comp)
- {
- //Don't sort if it's too small to optimize
- if (last - first < detail::min_sort_size)
- boost::sort::pdqsort(first, last, comp);
- else {
- //skipping past empties, which allows us to get the character type
- //.empty() is not used so as not to require a user declaration of it
- while (!length(*first)) {
- if (++first == last)
- return;
- }
- detail::string_sort(first, last, get_character, length, comp,
- get_character((*first), 0));
- }
- }
- /*! \brief String sort algorithm using range, wraps using default of @c unsigned char.
- (All variants fall back to @c boost::sort::pdqsort if the data size is too small, < @c detail::min_sort_size).
- \details @c string_sort is a fast templated in-place hybrid radix/comparison algorithm,
- which in testing tends to be roughly 50% to 2X faster than @c std::sort for large tests (>=100kB).\n
- \par
- Worst-case performance is <em> O(N * (lg(range)/s + s)) </em>,
- so @c string_sort is asymptotically faster
- than pure comparison-based algorithms. \n\n
- Some performance plots of runtime vs. n and log(range) are provided:\n
- <a href="../../doc/graph/windows_string_sort.htm"> windows_string_sort</a>\n
- <a href="../../doc/graph/osx_string_sort.htm"> osx_string_sort</a>
- \param[in] range Range [first, last) for sorting.
- \param[in] get_character Bracket functor equivalent to @c operator[], taking a number corresponding to the character offset.
- \param[in] length Functor to get the length of the string in characters.
- \param[in] comp A binary functor that returns whether the first element passed to it should go before the second in order.
- \pre [@c first, @c last) is a valid range.
- \post The elements in the range [@c first, @c last) are sorted in ascending order.
- \return @c void.
- \throws std::exception Propagates exceptions if any of the element comparisons, the element swaps (or moves),
- the right shift, subtraction of right-shifted elements, functors,
- or any operations on iterators throw.
- \warning Throwing an exception may cause data loss. This will also throw if a small vector resize throws, in which case there will be no data loss.
- \warning Invalid arguments cause undefined behaviour.
- \note @c spreadsort function provides a wrapper that calls the fastest sorting algorithm available for a data type,
- enabling faster generic-programming.
- \remark The lesser of <em> O(N*log(N)) </em> comparisons and <em> O(N*log(K/S + S)) </em>operations worst-case, where:
- \remark * N is @c last - @c first,
- \remark * K is the log of the range in bits (32 for 32-bit integers using their full range),
- \remark * S is a constant called max_splits, defaulting to 11 (except for strings where it is the log of the character size).
- */
- template <class Range, class Get_char, class Get_length, class Compare>
- inline void string_sort(Range& range,
- Get_char get_character, Get_length length, Compare comp)
- {
- string_sort(boost::begin(range), boost::end(range), get_character, length, comp);
- }
- /*! \brief Reverse String sort algorithm using random access iterators.
- (All variants fall back to @c boost::sort::pdqsort if the data size is too small, < @c detail::min_sort_size).
- \details @c string_sort is a fast templated in-place hybrid radix/comparison algorithm,
- which in testing tends to be roughly 50% to 2X faster than @c std::sort for large tests (>=100kB).\n
- \par
- Worst-case performance is <em> O(N * (lg(range)/s + s)) </em>,
- so @c string_sort is asymptotically faster
- than pure comparison-based algorithms. \n\n
- Some performance plots of runtime vs. n and log(range) are provided:\n
- <a href="../../doc/graph/windows_string_sort.htm"> windows_string_sort</a>\n
- <a href="../../doc/graph/osx_string_sort.htm"> osx_string_sort</a>
- \param[in] first Iterator pointer to first element.
- \param[in] last Iterator pointing to one beyond the end of data.
- \param[in] get_character Bracket functor equivalent to @c operator[], taking a number corresponding to the character offset.
- \param[in] length Functor to get the length of the string in characters.
- \param[in] comp A binary functor that returns whether the first element passed to it should go before the second in order.
- \pre [@c first, @c last) is a valid range.
- \pre @c RandomAccessIter @c value_type is mutable.
- \pre @c RandomAccessIter @c value_type is <a href="http://en.cppreference.com/w/cpp/concept/LessThanComparable">LessThanComparable</a>
- \post The elements in the range [@c first, @c last) are sorted in ascending order.
- \return @c void.
- \throws std::exception Propagates exceptions if any of the element comparisons, the element swaps (or moves),
- the right shift, subtraction of right-shifted elements, functors,
- or any operations on iterators throw.
- \warning Throwing an exception may cause data loss. This will also throw if a small vector resize throws, in which case there will be no data loss.
- \warning Invalid arguments cause undefined behaviour.
- \note @c spreadsort function provides a wrapper that calls the fastest sorting algorithm available for a data type,
- enabling faster generic-programming.
- \remark The lesser of <em> O(N*log(N)) </em> comparisons and <em> O(N*log(K/S + S)) </em>operations worst-case, where:
- \remark * N is @c last - @c first,
- \remark * K is the log of the range in bits (32 for 32-bit integers using their full range),
- \remark * S is a constant called max_splits, defaulting to 11 (except for strings where it is the log of the character size).
- */
- template <class RandomAccessIter, class Get_char, class Get_length,
- class Compare>
- inline void reverse_string_sort(RandomAccessIter first,
- RandomAccessIter last, Get_char get_character, Get_length length, Compare comp)
- {
- //Don't sort if it's too small to optimize
- if (last - first < detail::min_sort_size)
- boost::sort::pdqsort(first, last, comp);
- else {
- //skipping past empties, which allows us to get the character type
- //.empty() is not used so as not to require a user declaration of it
- while (!length(*(--last))) {
- //If there is just one non-empty at the beginning, this is sorted
- if (first == last)
- return;
- }
- //making last just after the end of the non-empty part of the array
- detail::reverse_string_sort(first, last + 1, get_character, length, comp,
- get_character((*last), 0));
- }
- }
- /*! \brief Reverse String sort algorithm using range.
- (All variants fall back to @c boost::sort::pdqsort if the data size is too small, < @c detail::min_sort_size).
- \details @c string_sort is a fast templated in-place hybrid radix/comparison algorithm,
- which in testing tends to be roughly 50% to 2X faster than @c std::sort for large tests (>=100kB).\n
- \par
- Worst-case performance is <em> O(N * (lg(range)/s + s)) </em>,
- so @c string_sort is asymptotically faster
- than pure comparison-based algorithms. \n\n
- Some performance plots of runtime vs. n and log(range) are provided:\n
- <a href="../../doc/graph/windows_string_sort.htm"> windows_string_sort</a>\n
- <a href="../../doc/graph/osx_string_sort.htm"> osx_string_sort</a>
- \param[in] range Range [first, last) for sorting.
- \param[in] get_character Bracket functor equivalent to @c operator[], taking a number corresponding to the character offset.
- \param[in] length Functor to get the length of the string in characters.
- \param[in] comp A binary functor that returns whether the first element passed to it should go before the second in order.
- \pre [@c first, @c last) is a valid range.
- \post The elements in the range [@c first, @c last) are sorted in ascending order.
- \return @c void.
- \throws std::exception Propagates exceptions if any of the element comparisons, the element swaps (or moves),
- the right shift, subtraction of right-shifted elements, functors,
- or any operations on iterators throw.
- \warning Throwing an exception may cause data loss. This will also throw if a small vector resize throws, in which case there will be no data loss.
- \warning Invalid arguments cause undefined behaviour.
- \note @c spreadsort function provides a wrapper that calls the fastest sorting algorithm available for a data type,
- enabling faster generic-programming.
- \remark The lesser of <em> O(N*log(N)) </em> comparisons and <em> O(N*log(K/S + S)) </em>operations worst-case, where:
- \remark * N is @c last - @c first,
- \remark * K is the log of the range in bits (32 for 32-bit integers using their full range),
- \remark * S is a constant called max_splits, defaulting to 11 (except for strings where it is the log of the character size).
- */
- template <class Range, class Get_char, class Get_length,
- class Compare>
- inline void reverse_string_sort(Range& range, Get_char get_character, Get_length length, Compare comp)
- {
- reverse_string_sort(boost::begin(range), boost::end(range), get_character, length, comp);
- }
- }
- }
- }
- #endif
|