123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337 |
- /*=============================================================================
- Copyright (c) 2001-2011 Hartmut Kaiser
- Copyright (c) 2001-2011 Joel de Guzman
- Distributed under the Boost Software License, Version 1.0. (See accompanying
- file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
- =============================================================================*/
- #if !defined(BOOST_SPIRIT_ASCII_APRIL_26_2006_1106PM)
- #define BOOST_SPIRIT_ASCII_APRIL_26_2006_1106PM
- #if defined(_MSC_VER)
- #pragma once
- #endif
- #include <climits>
- #include <boost/assert.hpp>
- #include <boost/cstdint.hpp>
- ///////////////////////////////////////////////////////////////////////////////
- // constants used to classify the single characters
- ///////////////////////////////////////////////////////////////////////////////
- #define BOOST_CC_DIGIT 0x0001
- #define BOOST_CC_XDIGIT 0x0002
- #define BOOST_CC_ALPHA 0x0004
- #define BOOST_CC_CTRL 0x0008
- #define BOOST_CC_LOWER 0x0010
- #define BOOST_CC_UPPER 0x0020
- #define BOOST_CC_SPACE 0x0040
- #define BOOST_CC_PUNCT 0x0080
- namespace boost { namespace spirit { namespace char_encoding
- {
- // The detection of isgraph(), isprint() and isblank() is done programmatically
- // to keep the character type table small. Additionally, these functions are
- // rather seldom used and the programmatic detection is very simple.
- ///////////////////////////////////////////////////////////////////////////
- // ASCII character classification table
- ///////////////////////////////////////////////////////////////////////////
- const unsigned char ascii_char_types[] =
- {
- /* NUL 0 0 */ BOOST_CC_CTRL,
- /* SOH 1 1 */ BOOST_CC_CTRL,
- /* STX 2 2 */ BOOST_CC_CTRL,
- /* ETX 3 3 */ BOOST_CC_CTRL,
- /* EOT 4 4 */ BOOST_CC_CTRL,
- /* ENQ 5 5 */ BOOST_CC_CTRL,
- /* ACK 6 6 */ BOOST_CC_CTRL,
- /* BEL 7 7 */ BOOST_CC_CTRL,
- /* BS 8 8 */ BOOST_CC_CTRL,
- /* HT 9 9 */ BOOST_CC_CTRL|BOOST_CC_SPACE,
- /* NL 10 a */ BOOST_CC_CTRL|BOOST_CC_SPACE,
- /* VT 11 b */ BOOST_CC_CTRL|BOOST_CC_SPACE,
- /* NP 12 c */ BOOST_CC_CTRL|BOOST_CC_SPACE,
- /* CR 13 d */ BOOST_CC_CTRL|BOOST_CC_SPACE,
- /* SO 14 e */ BOOST_CC_CTRL,
- /* SI 15 f */ BOOST_CC_CTRL,
- /* DLE 16 10 */ BOOST_CC_CTRL,
- /* DC1 17 11 */ BOOST_CC_CTRL,
- /* DC2 18 12 */ BOOST_CC_CTRL,
- /* DC3 19 13 */ BOOST_CC_CTRL,
- /* DC4 20 14 */ BOOST_CC_CTRL,
- /* NAK 21 15 */ BOOST_CC_CTRL,
- /* SYN 22 16 */ BOOST_CC_CTRL,
- /* ETB 23 17 */ BOOST_CC_CTRL,
- /* CAN 24 18 */ BOOST_CC_CTRL,
- /* EM 25 19 */ BOOST_CC_CTRL,
- /* SUB 26 1a */ BOOST_CC_CTRL,
- /* ESC 27 1b */ BOOST_CC_CTRL,
- /* FS 28 1c */ BOOST_CC_CTRL,
- /* GS 29 1d */ BOOST_CC_CTRL,
- /* RS 30 1e */ BOOST_CC_CTRL,
- /* US 31 1f */ BOOST_CC_CTRL,
- /* SP 32 20 */ BOOST_CC_SPACE,
- /* ! 33 21 */ BOOST_CC_PUNCT,
- /* " 34 22 */ BOOST_CC_PUNCT,
- /* # 35 23 */ BOOST_CC_PUNCT,
- /* $ 36 24 */ BOOST_CC_PUNCT,
- /* % 37 25 */ BOOST_CC_PUNCT,
- /* & 38 26 */ BOOST_CC_PUNCT,
- /* ' 39 27 */ BOOST_CC_PUNCT,
- /* ( 40 28 */ BOOST_CC_PUNCT,
- /* ) 41 29 */ BOOST_CC_PUNCT,
- /* * 42 2a */ BOOST_CC_PUNCT,
- /* + 43 2b */ BOOST_CC_PUNCT,
- /* , 44 2c */ BOOST_CC_PUNCT,
- /* - 45 2d */ BOOST_CC_PUNCT,
- /* . 46 2e */ BOOST_CC_PUNCT,
- /* / 47 2f */ BOOST_CC_PUNCT,
- /* 0 48 30 */ BOOST_CC_DIGIT|BOOST_CC_XDIGIT,
- /* 1 49 31 */ BOOST_CC_DIGIT|BOOST_CC_XDIGIT,
- /* 2 50 32 */ BOOST_CC_DIGIT|BOOST_CC_XDIGIT,
- /* 3 51 33 */ BOOST_CC_DIGIT|BOOST_CC_XDIGIT,
- /* 4 52 34 */ BOOST_CC_DIGIT|BOOST_CC_XDIGIT,
- /* 5 53 35 */ BOOST_CC_DIGIT|BOOST_CC_XDIGIT,
- /* 6 54 36 */ BOOST_CC_DIGIT|BOOST_CC_XDIGIT,
- /* 7 55 37 */ BOOST_CC_DIGIT|BOOST_CC_XDIGIT,
- /* 8 56 38 */ BOOST_CC_DIGIT|BOOST_CC_XDIGIT,
- /* 9 57 39 */ BOOST_CC_DIGIT|BOOST_CC_XDIGIT,
- /* : 58 3a */ BOOST_CC_PUNCT,
- /* ; 59 3b */ BOOST_CC_PUNCT,
- /* < 60 3c */ BOOST_CC_PUNCT,
- /* = 61 3d */ BOOST_CC_PUNCT,
- /* > 62 3e */ BOOST_CC_PUNCT,
- /* ? 63 3f */ BOOST_CC_PUNCT,
- /* @ 64 40 */ BOOST_CC_PUNCT,
- /* A 65 41 */ BOOST_CC_ALPHA|BOOST_CC_XDIGIT|BOOST_CC_UPPER,
- /* B 66 42 */ BOOST_CC_ALPHA|BOOST_CC_XDIGIT|BOOST_CC_UPPER,
- /* C 67 43 */ BOOST_CC_ALPHA|BOOST_CC_XDIGIT|BOOST_CC_UPPER,
- /* D 68 44 */ BOOST_CC_ALPHA|BOOST_CC_XDIGIT|BOOST_CC_UPPER,
- /* E 69 45 */ BOOST_CC_ALPHA|BOOST_CC_XDIGIT|BOOST_CC_UPPER,
- /* F 70 46 */ BOOST_CC_ALPHA|BOOST_CC_XDIGIT|BOOST_CC_UPPER,
- /* G 71 47 */ BOOST_CC_ALPHA|BOOST_CC_UPPER,
- /* H 72 48 */ BOOST_CC_ALPHA|BOOST_CC_UPPER,
- /* I 73 49 */ BOOST_CC_ALPHA|BOOST_CC_UPPER,
- /* J 74 4a */ BOOST_CC_ALPHA|BOOST_CC_UPPER,
- /* K 75 4b */ BOOST_CC_ALPHA|BOOST_CC_UPPER,
- /* L 76 4c */ BOOST_CC_ALPHA|BOOST_CC_UPPER,
- /* M 77 4d */ BOOST_CC_ALPHA|BOOST_CC_UPPER,
- /* N 78 4e */ BOOST_CC_ALPHA|BOOST_CC_UPPER,
- /* O 79 4f */ BOOST_CC_ALPHA|BOOST_CC_UPPER,
- /* P 80 50 */ BOOST_CC_ALPHA|BOOST_CC_UPPER,
- /* Q 81 51 */ BOOST_CC_ALPHA|BOOST_CC_UPPER,
- /* R 82 52 */ BOOST_CC_ALPHA|BOOST_CC_UPPER,
- /* S 83 53 */ BOOST_CC_ALPHA|BOOST_CC_UPPER,
- /* T 84 54 */ BOOST_CC_ALPHA|BOOST_CC_UPPER,
- /* U 85 55 */ BOOST_CC_ALPHA|BOOST_CC_UPPER,
- /* V 86 56 */ BOOST_CC_ALPHA|BOOST_CC_UPPER,
- /* W 87 57 */ BOOST_CC_ALPHA|BOOST_CC_UPPER,
- /* X 88 58 */ BOOST_CC_ALPHA|BOOST_CC_UPPER,
- /* Y 89 59 */ BOOST_CC_ALPHA|BOOST_CC_UPPER,
- /* Z 90 5a */ BOOST_CC_ALPHA|BOOST_CC_UPPER,
- /* [ 91 5b */ BOOST_CC_PUNCT,
- /* \ 92 5c */ BOOST_CC_PUNCT,
- /* ] 93 5d */ BOOST_CC_PUNCT,
- /* ^ 94 5e */ BOOST_CC_PUNCT,
- /* _ 95 5f */ BOOST_CC_PUNCT,
- /* ` 96 60 */ BOOST_CC_PUNCT,
- /* a 97 61 */ BOOST_CC_ALPHA|BOOST_CC_XDIGIT|BOOST_CC_LOWER,
- /* b 98 62 */ BOOST_CC_ALPHA|BOOST_CC_XDIGIT|BOOST_CC_LOWER,
- /* c 99 63 */ BOOST_CC_ALPHA|BOOST_CC_XDIGIT|BOOST_CC_LOWER,
- /* d 100 64 */ BOOST_CC_ALPHA|BOOST_CC_XDIGIT|BOOST_CC_LOWER,
- /* e 101 65 */ BOOST_CC_ALPHA|BOOST_CC_XDIGIT|BOOST_CC_LOWER,
- /* f 102 66 */ BOOST_CC_ALPHA|BOOST_CC_XDIGIT|BOOST_CC_LOWER,
- /* g 103 67 */ BOOST_CC_ALPHA|BOOST_CC_LOWER,
- /* h 104 68 */ BOOST_CC_ALPHA|BOOST_CC_LOWER,
- /* i 105 69 */ BOOST_CC_ALPHA|BOOST_CC_LOWER,
- /* j 106 6a */ BOOST_CC_ALPHA|BOOST_CC_LOWER,
- /* k 107 6b */ BOOST_CC_ALPHA|BOOST_CC_LOWER,
- /* l 108 6c */ BOOST_CC_ALPHA|BOOST_CC_LOWER,
- /* m 109 6d */ BOOST_CC_ALPHA|BOOST_CC_LOWER,
- /* n 110 6e */ BOOST_CC_ALPHA|BOOST_CC_LOWER,
- /* o 111 6f */ BOOST_CC_ALPHA|BOOST_CC_LOWER,
- /* p 112 70 */ BOOST_CC_ALPHA|BOOST_CC_LOWER,
- /* q 113 71 */ BOOST_CC_ALPHA|BOOST_CC_LOWER,
- /* r 114 72 */ BOOST_CC_ALPHA|BOOST_CC_LOWER,
- /* s 115 73 */ BOOST_CC_ALPHA|BOOST_CC_LOWER,
- /* t 116 74 */ BOOST_CC_ALPHA|BOOST_CC_LOWER,
- /* u 117 75 */ BOOST_CC_ALPHA|BOOST_CC_LOWER,
- /* v 118 76 */ BOOST_CC_ALPHA|BOOST_CC_LOWER,
- /* w 119 77 */ BOOST_CC_ALPHA|BOOST_CC_LOWER,
- /* x 120 78 */ BOOST_CC_ALPHA|BOOST_CC_LOWER,
- /* y 121 79 */ BOOST_CC_ALPHA|BOOST_CC_LOWER,
- /* z 122 7a */ BOOST_CC_ALPHA|BOOST_CC_LOWER,
- /* { 123 7b */ BOOST_CC_PUNCT,
- /* | 124 7c */ BOOST_CC_PUNCT,
- /* } 125 7d */ BOOST_CC_PUNCT,
- /* ~ 126 7e */ BOOST_CC_PUNCT,
- /* DEL 127 7f */ BOOST_CC_CTRL,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- };
- ///////////////////////////////////////////////////////////////////////////
- // Test characters for specified conditions (using ASCII)
- ///////////////////////////////////////////////////////////////////////////
- struct ascii
- {
- typedef char char_type;
- typedef unsigned char classify_type;
- static bool
- isascii_(int ch)
- {
- return 0 == (ch & ~0x7f);
- }
- static bool
- ischar(int ch)
- {
- return isascii_(ch);
- }
- // *** Note on assertions: The precondition is that the calls to
- // these functions do not violate the required range of ch (type int)
- // which is that strict_ischar(ch) should be true. It is the
- // responsibility of the caller to make sure this precondition is not
- // violated.
- static bool
- strict_ischar(int ch)
- {
- return ch >= 0 && ch <= 127;
- }
- static bool
- isalnum(int ch)
- {
- BOOST_ASSERT(strict_ischar(ch));
- return (ascii_char_types[ch] & BOOST_CC_ALPHA)
- || (ascii_char_types[ch] & BOOST_CC_DIGIT);
- }
- static bool
- isalpha(int ch)
- {
- BOOST_ASSERT(strict_ischar(ch));
- return (ascii_char_types[ch] & BOOST_CC_ALPHA) ? true : false;
- }
- static bool
- isdigit(int ch)
- {
- BOOST_ASSERT(strict_ischar(ch));
- return (ascii_char_types[ch] & BOOST_CC_DIGIT) ? true : false;
- }
- static bool
- isxdigit(int ch)
- {
- BOOST_ASSERT(strict_ischar(ch));
- return (ascii_char_types[ch] & BOOST_CC_XDIGIT) ? true : false;
- }
- static bool
- iscntrl(int ch)
- {
- BOOST_ASSERT(strict_ischar(ch));
- return (ascii_char_types[ch] & BOOST_CC_CTRL) ? true : false;
- }
- static bool
- isgraph(int ch)
- {
- BOOST_ASSERT(strict_ischar(ch));
- return ('\x21' <= ch && ch <= '\x7e');
- }
- static bool
- islower(int ch)
- {
- BOOST_ASSERT(strict_ischar(ch));
- return (ascii_char_types[ch] & BOOST_CC_LOWER) ? true : false;
- }
- static bool
- isprint(int ch)
- {
- BOOST_ASSERT(strict_ischar(ch));
- return ('\x20' <= ch && ch <= '\x7e');
- }
- static bool
- ispunct(int ch)
- {
- BOOST_ASSERT(strict_ischar(ch));
- return (ascii_char_types[ch] & BOOST_CC_PUNCT) ? true : false;
- }
- static bool
- isspace(int ch)
- {
- BOOST_ASSERT(strict_ischar(ch));
- return (ascii_char_types[ch] & BOOST_CC_SPACE) ? true : false;
- }
- static bool
- isblank BOOST_PREVENT_MACRO_SUBSTITUTION (int ch)
- {
- BOOST_ASSERT(strict_ischar(ch));
- return ('\x09' == ch || '\x20' == ch);
- }
- static bool
- isupper(int ch)
- {
- BOOST_ASSERT(strict_ischar(ch));
- return (ascii_char_types[ch] & BOOST_CC_UPPER) ? true : false;
- }
- ///////////////////////////////////////////////////////////////////////
- // Simple character conversions
- ///////////////////////////////////////////////////////////////////////
- static int
- tolower(int ch)
- {
- BOOST_ASSERT(strict_ischar(ch));
- return isupper(ch) ? (ch - 'A' + 'a') : ch;
- }
- static int
- toupper(int ch)
- {
- BOOST_ASSERT(strict_ischar(ch));
- return islower(ch) ? (ch - 'a' + 'A') : ch;
- }
- static ::boost::uint32_t
- toucs4(int ch)
- {
- BOOST_ASSERT(strict_ischar(ch));
- return ch;
- }
- };
- }}}
- ///////////////////////////////////////////////////////////////////////////////
- // undefine macros
- ///////////////////////////////////////////////////////////////////////////////
- #undef BOOST_CC_DIGIT
- #undef BOOST_CC_XDIGIT
- #undef BOOST_CC_ALPHA
- #undef BOOST_CC_CTRL
- #undef BOOST_CC_LOWER
- #undef BOOST_CC_UPPER
- #undef BOOST_CC_PUNCT
- #undef BOOST_CC_SPACE
- #endif
|