types.hpp 5.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136
  1. //
  2. // Copyright (c) 2009-2011 Artyom Beilis (Tonkikh)
  3. //
  4. // Distributed under the Boost Software License, Version 1.0. (See
  5. // accompanying file LICENSE_1_0.txt or copy at
  6. // http://www.boost.org/LICENSE_1_0.txt)
  7. //
  8. #ifndef BOOST_LOCALE_BOUNDARY_TYPES_HPP_INCLUDED
  9. #define BOOST_LOCALE_BOUNDARY_TYPES_HPP_INCLUDED
  10. #include <boost/locale/config.hpp>
  11. #include <boost/cstdint.hpp>
  12. #include <boost/assert.hpp>
  13. #ifdef BOOST_MSVC
  14. # pragma warning(push)
  15. # pragma warning(disable : 4275 4251 4231 4660)
  16. #endif
  17. namespace boost {
  18. namespace locale {
  19. ///
  20. /// \brief This namespase contains all operations required for boundary analysis of text
  21. ///
  22. namespace boundary {
  23. ///
  24. /// \defgroup boundary Boundary Analysis
  25. ///
  26. /// This module contains all operations required for boundary analysis of text: character, word, like and sentence boundaries
  27. ///
  28. /// @{
  29. ///
  30. ///
  31. /// This type describes a possible boundary analysis alternatives.
  32. ///
  33. enum boundary_type {
  34. character, ///< Analyse the text for character boundaries
  35. word, ///< Analyse the text for word boundaries
  36. sentence, ///< Analyse the text for Find sentence boundaries
  37. line ///< Analyse the text for positions suitable for line breaks
  38. };
  39. ///
  40. /// \brief Flags used with word boundary analysis -- the type of the word, line or sentence boundary found.
  41. ///
  42. /// It is a bit-mask that represents various combinations of rules used to select this specific boundary.
  43. ///
  44. typedef uint32_t rule_type;
  45. ///
  46. /// \anchor bl_boundary_word_rules
  47. /// \name Flags that describe a type of word selected
  48. /// @{
  49. static const rule_type
  50. word_none = 0x0000F, ///< Not a word, like white space or punctuation mark
  51. word_number = 0x000F0, ///< Word that appear to be a number
  52. word_letter = 0x00F00, ///< Word that contains letters, excluding kana and ideographic characters
  53. word_kana = 0x0F000, ///< Word that contains kana characters
  54. word_ideo = 0xF0000, ///< Word that contains ideographic characters
  55. word_any = 0xFFFF0, ///< Any word including numbers, 0 is special flag, equivalent to 15
  56. word_letters = 0xFFF00, ///< Any word, excluding numbers but including letters, kana and ideograms.
  57. word_kana_ideo = 0xFF000, ///< Word that includes kana or ideographic characters
  58. word_mask = 0xFFFFF; ///< Full word mask - select all possible variants
  59. /// @}
  60. ///
  61. /// \anchor bl_boundary_line_rules
  62. /// \name Flags that describe a type of line break
  63. /// @{
  64. static const rule_type
  65. line_soft = 0x0F, ///< Soft line break: optional but not required
  66. line_hard = 0xF0, ///< Hard line break: like break is required (as per CR/LF)
  67. line_any = 0xFF, ///< Soft or Hard line break
  68. line_mask = 0xFF; ///< Select all types of line breaks
  69. /// @}
  70. ///
  71. /// \anchor bl_boundary_sentence_rules
  72. /// \name Flags that describe a type of sentence break
  73. ///
  74. /// @{
  75. static const rule_type
  76. sentence_term = 0x0F, ///< \brief The sentence was terminated with a sentence terminator
  77. /// like ".", "!" possible followed by hard separator like CR, LF, PS
  78. sentence_sep = 0xF0, ///< \brief The sentence does not contain terminator like ".", "!" but ended with hard separator
  79. /// like CR, LF, PS or end of input.
  80. sentence_any = 0xFF, ///< Either first or second sentence break type;.
  81. sentence_mask = 0xFF; ///< Select all sentence breaking points
  82. ///@}
  83. ///
  84. /// \name Flags that describe a type of character break.
  85. ///
  86. /// At this point break iterator does not distinguish different
  87. /// kinds of characters so it is used for consistency.
  88. ///@{
  89. static const rule_type
  90. character_any = 0xF, ///< Not in use, just for consistency
  91. character_mask = 0xF; ///< Select all character breaking points
  92. ///@}
  93. ///
  94. /// This function returns the mask that covers all variants for specific boundary type
  95. ///
  96. inline rule_type boundary_rule(boundary_type t)
  97. {
  98. switch(t) {
  99. case character: return character_mask;
  100. case word: return word_mask;
  101. case sentence: return sentence_mask;
  102. case line: return line_mask;
  103. default: return 0;
  104. }
  105. }
  106. ///
  107. ///@}
  108. ///
  109. } // boundary
  110. } // locale
  111. } // boost
  112. #ifdef BOOST_MSVC
  113. #pragma warning(pop)
  114. #endif
  115. #endif
  116. // vim: tabstop=4 expandtab shiftwidth=4 softtabstop=4