collator.hpp 8.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261
  1. //
  2. // Copyright (c) 2009-2011 Artyom Beilis (Tonkikh)
  3. //
  4. // Distributed under the Boost Software License, Version 1.0. (See
  5. // accompanying file LICENSE_1_0.txt or copy at
  6. // http://www.boost.org/LICENSE_1_0.txt)
  7. //
  8. #ifndef BOOST_LOCALE_COLLATOR_HPP_INCLUDED
  9. #define BOOST_LOCALE_COLLATOR_HPP_INCLUDED
  10. #include <boost/locale/config.hpp>
  11. #ifdef BOOST_MSVC
  12. # pragma warning(push)
  13. # pragma warning(disable : 4275 4251 4231 4660)
  14. #endif
  15. #include <locale>
  16. namespace boost {
  17. namespace locale {
  18. class info;
  19. ///
  20. /// \defgroup collation Collation
  21. ///
  22. /// This module introduces collation related classes
  23. ///
  24. /// @{
  25. ///
  26. /// \brief a base class that includes collation level flags
  27. ///
  28. class collator_base {
  29. public:
  30. ///
  31. /// Unicode collation level types
  32. ///
  33. typedef enum {
  34. primary = 0, ///< 1st collation level: base letters
  35. secondary = 1, ///< 2nd collation level: letters and accents
  36. tertiary = 2, ///< 3rd collation level: letters, accents and case
  37. quaternary = 3, ///< 4th collation level: letters, accents, case and punctuation
  38. identical = 4 ///< identical collation level: include code-point comparison
  39. } level_type;
  40. };
  41. ///
  42. /// \brief Collation facet.
  43. ///
  44. /// It reimplements standard C++ std::collate,
  45. /// allowing usage of std::locale for direct string comparison
  46. ///
  47. template<typename CharType>
  48. class collator :
  49. public std::collate<CharType>,
  50. public collator_base
  51. {
  52. public:
  53. ///
  54. /// Type of the underlying character
  55. ///
  56. typedef CharType char_type;
  57. ///
  58. /// Type of string used with this facet
  59. ///
  60. typedef std::basic_string<CharType> string_type;
  61. ///
  62. /// Compare two strings in rage [b1,e1), [b2,e2) according using a collation level \a level. Calls do_compare
  63. ///
  64. /// Returns -1 if the first of the two strings sorts before the seconds, returns 1 if sorts after and 0 if
  65. /// they considered equal.
  66. ///
  67. int compare(level_type level,
  68. char_type const *b1,char_type const *e1,
  69. char_type const *b2,char_type const *e2) const
  70. {
  71. return do_compare(level,b1,e1,b2,e2);
  72. }
  73. ///
  74. /// Create a binary string that can be compared to other in order to get collation order. The string is created
  75. /// for text in range [b,e). It is useful for collation of multiple strings for text.
  76. ///
  77. /// The transformation follows these rules:
  78. /// \code
  79. /// compare(level,b1,e1,b2,e2) == sign( transform(level,b1,e1).compare(transform(level,b2,e2)) );
  80. /// \endcode
  81. ///
  82. /// Calls do_transform
  83. ///
  84. string_type transform(level_type level,char_type const *b,char_type const *e) const
  85. {
  86. return do_transform(level,b,e);
  87. }
  88. ///
  89. /// Calculate a hash of a text in range [b,e). The value can be used for collation sensitive string comparison.
  90. ///
  91. /// If compare(level,b1,e1,b2,e2) == 0 then hash(level,b1,e1) == hash(level,b2,e2)
  92. ///
  93. /// Calls do_hash
  94. ///
  95. long hash(level_type level,char_type const *b,char_type const *e) const
  96. {
  97. return do_hash(level,b,e);
  98. }
  99. ///
  100. /// Compare two strings \a l and \a r using collation level \a level
  101. ///
  102. /// Returns -1 if the first of the two strings sorts before the seconds, returns 1 if sorts after and 0 if
  103. /// they considered equal.
  104. ///
  105. ///
  106. int compare(level_type level,string_type const &l,string_type const &r) const
  107. {
  108. return do_compare(level,l.data(),l.data()+l.size(),r.data(),r.data()+r.size());
  109. }
  110. ///
  111. /// Calculate a hash that can be used for collation sensitive string comparison of a string \a s
  112. ///
  113. /// If compare(level,s1,s2) == 0 then hash(level,s1) == hash(level,s2)
  114. ///
  115. long hash(level_type level,string_type const &s) const
  116. {
  117. return do_hash(level,s.data(),s.data()+s.size());
  118. }
  119. ///
  120. /// Create a binary string from string \a s, that can be compared to other, useful for collation of multiple
  121. /// strings.
  122. ///
  123. /// The transformation follows these rules:
  124. /// \code
  125. /// compare(level,s1,s2) == sign( transform(level,s1).compare(transform(level,s2)) );
  126. /// \endcode
  127. ///
  128. string_type transform(level_type level,string_type const &s) const
  129. {
  130. return do_transform(level,s.data(),s.data()+s.size());
  131. }
  132. protected:
  133. ///
  134. /// constructor of the collator object
  135. ///
  136. collator(size_t refs = 0) : std::collate<CharType>(refs)
  137. {
  138. }
  139. virtual ~collator()
  140. {
  141. }
  142. ///
  143. /// This function is used to override default collation function that does not take in account collation level.
  144. /// Uses primary level
  145. ///
  146. virtual int do_compare( char_type const *b1,char_type const *e1,
  147. char_type const *b2,char_type const *e2) const
  148. {
  149. return do_compare(identical,b1,e1,b2,e2);
  150. }
  151. ///
  152. /// This function is used to override default collation function that does not take in account collation level.
  153. /// Uses primary level
  154. ///
  155. virtual string_type do_transform(char_type const *b,char_type const *e) const
  156. {
  157. return do_transform(identical,b,e);
  158. }
  159. ///
  160. /// This function is used to override default collation function that does not take in account collation level.
  161. /// Uses primary level
  162. ///
  163. virtual long do_hash(char_type const *b,char_type const *e) const
  164. {
  165. return do_hash(identical,b,e);
  166. }
  167. ///
  168. /// Actual function that performs comparison between the strings. For details see compare member function. Can be overridden.
  169. ///
  170. virtual int do_compare( level_type level,
  171. char_type const *b1,char_type const *e1,
  172. char_type const *b2,char_type const *e2) const = 0;
  173. ///
  174. /// Actual function that performs transformation. For details see transform member function. Can be overridden.
  175. ///
  176. virtual string_type do_transform(level_type level,char_type const *b,char_type const *e) const = 0;
  177. ///
  178. /// Actual function that calculates hash. For details see hash member function. Can be overridden.
  179. ///
  180. virtual long do_hash(level_type level,char_type const *b,char_type const *e) const = 0;
  181. };
  182. ///
  183. /// \brief This class can be used in STL algorithms and containers for comparison of strings
  184. /// with a level other than primary
  185. ///
  186. /// For example:
  187. ///
  188. /// \code
  189. /// std::map<std::string,std::string,comparator<char,collator_base::secondary> > data;
  190. /// \endcode
  191. ///
  192. /// Would create a map the keys of which are sorted using secondary collation level
  193. ///
  194. template<typename CharType,collator_base::level_type default_level = collator_base::identical>
  195. struct comparator
  196. {
  197. public:
  198. ///
  199. /// Create a comparator class for locale \a l and with collation leval \a level
  200. ///
  201. /// \note throws std::bad_cast if l does not have \ref collator facet installed
  202. ///
  203. comparator(std::locale const &l=std::locale(),collator_base::level_type level=default_level) :
  204. locale_(l),
  205. level_(level)
  206. {
  207. }
  208. ///
  209. /// Compare two strings -- equivalent to return left < right according to collation rules
  210. ///
  211. bool operator()(std::basic_string<CharType> const &left,std::basic_string<CharType> const &right) const
  212. {
  213. return std::use_facet<collator<CharType> >(locale_).compare(level_,left,right) < 0;
  214. }
  215. private:
  216. std::locale locale_;
  217. collator_base::level_type level_;
  218. };
  219. ///
  220. ///@}
  221. ///
  222. } // locale
  223. } // boost
  224. #ifdef BOOST_MSVC
  225. #pragma warning(pop)
  226. #endif
  227. #endif
  228. ///
  229. /// \example collate.cpp
  230. /// Example of using collation functions
  231. ///
  232. // vim: tabstop=4 expandtab shiftwidth=4 softtabstop=4