123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305 |
- /*=============================================================================
- Copyright (c) 2001-2011 Joel de Guzman
- Distributed under the Boost Software License, Version 1.0. (See accompanying
- file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
- Autogenerated by MultiStageTable.py (Unicode multi-stage
- table builder) (c) Peter Kankowski, 2008
- ==============================================================================*/
- #if !defined(BOOST_SPIRIT_UNICODE_QUERY_FEBRUARY_2_2010)
- #define BOOST_SPIRIT_UNICODE_QUERY_FEBRUARY_2_2010
- #include <boost/cstdint.hpp>
- # include "category_table.hpp"
- # include "script_table.hpp"
- # include "lowercase_table.hpp"
- # include "uppercase_table.hpp"
- namespace boost { namespace spirit { namespace ucd
- {
- // This header provides Basic (Level 1) Unicode Support
- // See http://unicode.org/reports/tr18/ for details
- struct properties
- {
- // bit pattern: xxMMMCCC
- // MMM: major_category
- // CCC: category
- enum major_category
- {
- letter,
- mark,
- number,
- separator,
- other,
- punctuation,
- symbol
- };
- enum category
- {
- uppercase_letter = 0, // [Lu] an uppercase letter
- lowercase_letter, // [Ll] a lowercase letter
- titlecase_letter, // [Lt] a digraphic character, with first part uppercase
- modifier_letter, // [Lm] a modifier letter
- other_letter, // [Lo] other letters, including syllables and ideographs
- nonspacing_mark = 8, // [Mn] a nonspacing combining mark (zero advance width)
- enclosing_mark, // [Me] an enclosing combining mark
- spacing_mark, // [Mc] a spacing combining mark (positive advance width)
- decimal_number = 16, // [Nd] a decimal digit
- letter_number, // [Nl] a letterlike numeric character
- other_number, // [No] a numeric character of other type
- space_separator = 24, // [Zs] a space character (of various non-zero widths)
- line_separator, // [Zl] U+2028 LINE SEPARATOR only
- paragraph_separator, // [Zp] U+2029 PARAGRAPH SEPARATOR only
- control = 32, // [Cc] a C0 or C1 control code
- format, // [Cf] a format control character
- private_use, // [Co] a private-use character
- surrogate, // [Cs] a surrogate code point
- unassigned, // [Cn] a reserved unassigned code point or a noncharacter
- dash_punctuation = 40, // [Pd] a dash or hyphen punctuation mark
- open_punctuation, // [Ps] an opening punctuation mark (of a pair)
- close_punctuation, // [Pe] a closing punctuation mark (of a pair)
- connector_punctuation, // [Pc] a connecting punctuation mark, like a tie
- other_punctuation, // [Po] a punctuation mark of other type
- initial_punctuation, // [Pi] an initial quotation mark
- final_punctuation, // [Pf] a final quotation mark
- math_symbol = 48, // [Sm] a symbol of primarily mathematical use
- currency_symbol, // [Sc] a currency sign
- modifier_symbol, // [Sk] a non-letterlike modifier symbol
- other_symbol // [So] a symbol of other type
- };
- enum derived_properties
- {
- alphabetic = 64,
- uppercase = 128,
- lowercase = 256,
- white_space = 512,
- hex_digit = 1024,
- noncharacter_code_point = 2048,
- default_ignorable_code_point = 4096
- };
- enum script
- {
- arabic = 0,
- imperial_aramaic = 1,
- armenian = 2,
- avestan = 3,
- balinese = 4,
- bamum = 5,
- bengali = 6,
- bopomofo = 7,
- braille = 8,
- buginese = 9,
- buhid = 10,
- canadian_aboriginal = 11,
- carian = 12,
- cham = 13,
- cherokee = 14,
- coptic = 15,
- cypriot = 16,
- cyrillic = 17,
- devanagari = 18,
- deseret = 19,
- egyptian_hieroglyphs = 20,
- ethiopic = 21,
- georgian = 22,
- glagolitic = 23,
- gothic = 24,
- greek = 25,
- gujarati = 26,
- gurmukhi = 27,
- hangul = 28,
- han = 29,
- hanunoo = 30,
- hebrew = 31,
- hiragana = 32,
- katakana_or_hiragana = 33,
- old_italic = 34,
- javanese = 35,
- kayah_li = 36,
- katakana = 37,
- kharoshthi = 38,
- khmer = 39,
- kannada = 40,
- kaithi = 41,
- tai_tham = 42,
- lao = 43,
- latin = 44,
- lepcha = 45,
- limbu = 46,
- linear_b = 47,
- lisu = 48,
- lycian = 49,
- lydian = 50,
- malayalam = 51,
- mongolian = 52,
- meetei_mayek = 53,
- myanmar = 54,
- nko = 55,
- ogham = 56,
- ol_chiki = 57,
- old_turkic = 58,
- oriya = 59,
- osmanya = 60,
- phags_pa = 61,
- inscriptional_pahlavi = 62,
- phoenician = 63,
- inscriptional_parthian = 64,
- rejang = 65,
- runic = 66,
- samaritan = 67,
- old_south_arabian = 68,
- saurashtra = 69,
- shavian = 70,
- sinhala = 71,
- sundanese = 72,
- syloti_nagri = 73,
- syriac = 74,
- tagbanwa = 75,
- tai_le = 76,
- new_tai_lue = 77,
- tamil = 78,
- tai_viet = 79,
- telugu = 80,
- tifinagh = 81,
- tagalog = 82,
- thaana = 83,
- thai = 84,
- tibetan = 85,
- ugaritic = 86,
- vai = 87,
- old_persian = 88,
- cuneiform = 89,
- yi = 90,
- inherited = 91,
- common = 92,
- unknown = 93
- };
- };
- inline properties::category get_category(::boost::uint32_t ch)
- {
- return static_cast<properties::category>(detail::category_lookup(ch) & 0x3F);
- }
- inline properties::major_category get_major_category(::boost::uint32_t ch)
- {
- return static_cast<properties::major_category>(get_category(ch) >> 3);
- }
- inline bool is_punctuation(::boost::uint32_t ch)
- {
- return get_major_category(ch) == properties::punctuation;
- }
- inline bool is_decimal_number(::boost::uint32_t ch)
- {
- return get_category(ch) == properties::decimal_number;
- }
- inline bool is_hex_digit(::boost::uint32_t ch)
- {
- return (detail::category_lookup(ch) & properties::hex_digit) != 0;
- }
- inline bool is_control(::boost::uint32_t ch)
- {
- return get_category(ch) == properties::control;
- }
- inline bool is_alphabetic(::boost::uint32_t ch)
- {
- return (detail::category_lookup(ch) & properties::alphabetic) != 0;
- }
- inline bool is_alphanumeric(::boost::uint32_t ch)
- {
- return is_decimal_number(ch) || is_alphabetic(ch);
- }
- inline bool is_uppercase(::boost::uint32_t ch)
- {
- return (detail::category_lookup(ch) & properties::uppercase) != 0;
- }
- inline bool is_lowercase(::boost::uint32_t ch)
- {
- return (detail::category_lookup(ch) & properties::lowercase) != 0;
- }
- inline bool is_white_space(::boost::uint32_t ch)
- {
- return (detail::category_lookup(ch) & properties::white_space) != 0;
- }
- inline bool is_blank(::boost::uint32_t ch)
- {
- switch (ch)
- {
- case '\n': case '\v': case '\f': case '\r':
- return false;
- default:
- return is_white_space(ch)
- && !( get_category(ch) == properties::line_separator
- || get_category(ch) == properties::paragraph_separator
- );
- }
- }
- inline bool is_graph(::boost::uint32_t ch)
- {
- return !( is_white_space(ch)
- || get_category(ch) == properties::control
- || get_category(ch) == properties::surrogate
- || get_category(ch) == properties::unassigned
- );
- }
- inline bool is_print(::boost::uint32_t ch)
- {
- return (is_graph(ch) || is_blank(ch)) && !is_control(ch);
- }
- inline bool is_noncharacter_code_point(::boost::uint32_t ch)
- {
- return (detail::category_lookup(ch) & properties::noncharacter_code_point) != 0;
- }
- inline bool is_default_ignorable_code_point(::boost::uint32_t ch)
- {
- return (detail::category_lookup(ch) & properties::default_ignorable_code_point) != 0;
- }
- inline properties::script get_script(::boost::uint32_t ch)
- {
- return static_cast<properties::script>(detail::script_lookup(ch) & 0x7F);
- }
- inline ::boost::uint32_t to_lowercase(::boost::uint32_t ch)
- {
- // The table returns 0 to signal that this code maps to itself
- ::boost::uint32_t r = detail::lowercase_lookup(ch);
- return (r == 0)? ch : r;
- }
- inline ::boost::uint32_t to_uppercase(::boost::uint32_t ch)
- {
- // The table returns 0 to signal that this code maps to itself
- ::boost::uint32_t r = detail::uppercase_lookup(ch);
- return (r == 0)? ch : r;
- }
- }}}
- #endif
|