123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136 |
- //
- // Copyright (c) 2009-2011 Artyom Beilis (Tonkikh)
- //
- // Distributed under the Boost Software License, Version 1.0. (See
- // accompanying file LICENSE_1_0.txt or copy at
- // http://www.boost.org/LICENSE_1_0.txt)
- //
- #ifndef BOOST_LOCALE_BOUNDARY_TYPES_HPP_INCLUDED
- #define BOOST_LOCALE_BOUNDARY_TYPES_HPP_INCLUDED
- #include <boost/locale/config.hpp>
- #include <boost/cstdint.hpp>
- #include <boost/assert.hpp>
- #ifdef BOOST_MSVC
- # pragma warning(push)
- # pragma warning(disable : 4275 4251 4231 4660)
- #endif
- namespace boost {
- namespace locale {
-
- ///
- /// \brief This namespase contains all operations required for boundary analysis of text
- ///
- namespace boundary {
- ///
- /// \defgroup boundary Boundary Analysis
- ///
- /// This module contains all operations required for boundary analysis of text: character, word, like and sentence boundaries
- ///
- /// @{
- ///
- ///
- /// This type describes a possible boundary analysis alternatives.
- ///
- enum boundary_type {
- character, ///< Analyse the text for character boundaries
- word, ///< Analyse the text for word boundaries
- sentence, ///< Analyse the text for Find sentence boundaries
- line ///< Analyse the text for positions suitable for line breaks
- };
- ///
- /// \brief Flags used with word boundary analysis -- the type of the word, line or sentence boundary found.
- ///
- /// It is a bit-mask that represents various combinations of rules used to select this specific boundary.
- ///
- typedef uint32_t rule_type;
- ///
- /// \anchor bl_boundary_word_rules
- /// \name Flags that describe a type of word selected
- /// @{
- static const rule_type
- word_none = 0x0000F, ///< Not a word, like white space or punctuation mark
- word_number = 0x000F0, ///< Word that appear to be a number
- word_letter = 0x00F00, ///< Word that contains letters, excluding kana and ideographic characters
- word_kana = 0x0F000, ///< Word that contains kana characters
- word_ideo = 0xF0000, ///< Word that contains ideographic characters
- word_any = 0xFFFF0, ///< Any word including numbers, 0 is special flag, equivalent to 15
- word_letters = 0xFFF00, ///< Any word, excluding numbers but including letters, kana and ideograms.
- word_kana_ideo = 0xFF000, ///< Word that includes kana or ideographic characters
- word_mask = 0xFFFFF; ///< Full word mask - select all possible variants
- /// @}
- ///
- /// \anchor bl_boundary_line_rules
- /// \name Flags that describe a type of line break
- /// @{
- static const rule_type
- line_soft = 0x0F, ///< Soft line break: optional but not required
- line_hard = 0xF0, ///< Hard line break: like break is required (as per CR/LF)
- line_any = 0xFF, ///< Soft or Hard line break
- line_mask = 0xFF; ///< Select all types of line breaks
-
- /// @}
-
- ///
- /// \anchor bl_boundary_sentence_rules
- /// \name Flags that describe a type of sentence break
- ///
- /// @{
- static const rule_type
- sentence_term = 0x0F, ///< \brief The sentence was terminated with a sentence terminator
- /// like ".", "!" possible followed by hard separator like CR, LF, PS
- sentence_sep = 0xF0, ///< \brief The sentence does not contain terminator like ".", "!" but ended with hard separator
- /// like CR, LF, PS or end of input.
- sentence_any = 0xFF, ///< Either first or second sentence break type;.
- sentence_mask = 0xFF; ///< Select all sentence breaking points
- ///@}
- ///
- /// \name Flags that describe a type of character break.
- ///
- /// At this point break iterator does not distinguish different
- /// kinds of characters so it is used for consistency.
- ///@{
- static const rule_type
- character_any = 0xF, ///< Not in use, just for consistency
- character_mask = 0xF; ///< Select all character breaking points
- ///@}
- ///
- /// This function returns the mask that covers all variants for specific boundary type
- ///
- inline rule_type boundary_rule(boundary_type t)
- {
- switch(t) {
- case character: return character_mask;
- case word: return word_mask;
- case sentence: return sentence_mask;
- case line: return line_mask;
- default: return 0;
- }
- }
- ///
- ///@}
- ///
- } // boundary
- } // locale
- } // boost
-
- #ifdef BOOST_MSVC
- #pragma warning(pop)
- #endif
- #endif
- // vim: tabstop=4 expandtab shiftwidth=4 softtabstop=4
|