ascii.hpp 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337
  1. /*=============================================================================
  2. Copyright (c) 2001-2011 Hartmut Kaiser
  3. Copyright (c) 2001-2011 Joel de Guzman
  4. Distributed under the Boost Software License, Version 1.0. (See accompanying
  5. file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
  6. =============================================================================*/
  7. #if !defined(BOOST_SPIRIT_ASCII_APRIL_26_2006_1106PM)
  8. #define BOOST_SPIRIT_ASCII_APRIL_26_2006_1106PM
  9. #if defined(_MSC_VER)
  10. #pragma once
  11. #endif
  12. #include <climits>
  13. #include <boost/assert.hpp>
  14. #include <boost/cstdint.hpp>
  15. ///////////////////////////////////////////////////////////////////////////////
  16. // constants used to classify the single characters
  17. ///////////////////////////////////////////////////////////////////////////////
  18. #define BOOST_CC_DIGIT 0x0001
  19. #define BOOST_CC_XDIGIT 0x0002
  20. #define BOOST_CC_ALPHA 0x0004
  21. #define BOOST_CC_CTRL 0x0008
  22. #define BOOST_CC_LOWER 0x0010
  23. #define BOOST_CC_UPPER 0x0020
  24. #define BOOST_CC_SPACE 0x0040
  25. #define BOOST_CC_PUNCT 0x0080
  26. namespace boost { namespace spirit { namespace char_encoding
  27. {
  28. // The detection of isgraph(), isprint() and isblank() is done programmatically
  29. // to keep the character type table small. Additionally, these functions are
  30. // rather seldom used and the programmatic detection is very simple.
  31. ///////////////////////////////////////////////////////////////////////////
  32. // ASCII character classification table
  33. ///////////////////////////////////////////////////////////////////////////
  34. const unsigned char ascii_char_types[] =
  35. {
  36. /* NUL 0 0 */ BOOST_CC_CTRL,
  37. /* SOH 1 1 */ BOOST_CC_CTRL,
  38. /* STX 2 2 */ BOOST_CC_CTRL,
  39. /* ETX 3 3 */ BOOST_CC_CTRL,
  40. /* EOT 4 4 */ BOOST_CC_CTRL,
  41. /* ENQ 5 5 */ BOOST_CC_CTRL,
  42. /* ACK 6 6 */ BOOST_CC_CTRL,
  43. /* BEL 7 7 */ BOOST_CC_CTRL,
  44. /* BS 8 8 */ BOOST_CC_CTRL,
  45. /* HT 9 9 */ BOOST_CC_CTRL|BOOST_CC_SPACE,
  46. /* NL 10 a */ BOOST_CC_CTRL|BOOST_CC_SPACE,
  47. /* VT 11 b */ BOOST_CC_CTRL|BOOST_CC_SPACE,
  48. /* NP 12 c */ BOOST_CC_CTRL|BOOST_CC_SPACE,
  49. /* CR 13 d */ BOOST_CC_CTRL|BOOST_CC_SPACE,
  50. /* SO 14 e */ BOOST_CC_CTRL,
  51. /* SI 15 f */ BOOST_CC_CTRL,
  52. /* DLE 16 10 */ BOOST_CC_CTRL,
  53. /* DC1 17 11 */ BOOST_CC_CTRL,
  54. /* DC2 18 12 */ BOOST_CC_CTRL,
  55. /* DC3 19 13 */ BOOST_CC_CTRL,
  56. /* DC4 20 14 */ BOOST_CC_CTRL,
  57. /* NAK 21 15 */ BOOST_CC_CTRL,
  58. /* SYN 22 16 */ BOOST_CC_CTRL,
  59. /* ETB 23 17 */ BOOST_CC_CTRL,
  60. /* CAN 24 18 */ BOOST_CC_CTRL,
  61. /* EM 25 19 */ BOOST_CC_CTRL,
  62. /* SUB 26 1a */ BOOST_CC_CTRL,
  63. /* ESC 27 1b */ BOOST_CC_CTRL,
  64. /* FS 28 1c */ BOOST_CC_CTRL,
  65. /* GS 29 1d */ BOOST_CC_CTRL,
  66. /* RS 30 1e */ BOOST_CC_CTRL,
  67. /* US 31 1f */ BOOST_CC_CTRL,
  68. /* SP 32 20 */ BOOST_CC_SPACE,
  69. /* ! 33 21 */ BOOST_CC_PUNCT,
  70. /* " 34 22 */ BOOST_CC_PUNCT,
  71. /* # 35 23 */ BOOST_CC_PUNCT,
  72. /* $ 36 24 */ BOOST_CC_PUNCT,
  73. /* % 37 25 */ BOOST_CC_PUNCT,
  74. /* & 38 26 */ BOOST_CC_PUNCT,
  75. /* ' 39 27 */ BOOST_CC_PUNCT,
  76. /* ( 40 28 */ BOOST_CC_PUNCT,
  77. /* ) 41 29 */ BOOST_CC_PUNCT,
  78. /* * 42 2a */ BOOST_CC_PUNCT,
  79. /* + 43 2b */ BOOST_CC_PUNCT,
  80. /* , 44 2c */ BOOST_CC_PUNCT,
  81. /* - 45 2d */ BOOST_CC_PUNCT,
  82. /* . 46 2e */ BOOST_CC_PUNCT,
  83. /* / 47 2f */ BOOST_CC_PUNCT,
  84. /* 0 48 30 */ BOOST_CC_DIGIT|BOOST_CC_XDIGIT,
  85. /* 1 49 31 */ BOOST_CC_DIGIT|BOOST_CC_XDIGIT,
  86. /* 2 50 32 */ BOOST_CC_DIGIT|BOOST_CC_XDIGIT,
  87. /* 3 51 33 */ BOOST_CC_DIGIT|BOOST_CC_XDIGIT,
  88. /* 4 52 34 */ BOOST_CC_DIGIT|BOOST_CC_XDIGIT,
  89. /* 5 53 35 */ BOOST_CC_DIGIT|BOOST_CC_XDIGIT,
  90. /* 6 54 36 */ BOOST_CC_DIGIT|BOOST_CC_XDIGIT,
  91. /* 7 55 37 */ BOOST_CC_DIGIT|BOOST_CC_XDIGIT,
  92. /* 8 56 38 */ BOOST_CC_DIGIT|BOOST_CC_XDIGIT,
  93. /* 9 57 39 */ BOOST_CC_DIGIT|BOOST_CC_XDIGIT,
  94. /* : 58 3a */ BOOST_CC_PUNCT,
  95. /* ; 59 3b */ BOOST_CC_PUNCT,
  96. /* < 60 3c */ BOOST_CC_PUNCT,
  97. /* = 61 3d */ BOOST_CC_PUNCT,
  98. /* > 62 3e */ BOOST_CC_PUNCT,
  99. /* ? 63 3f */ BOOST_CC_PUNCT,
  100. /* @ 64 40 */ BOOST_CC_PUNCT,
  101. /* A 65 41 */ BOOST_CC_ALPHA|BOOST_CC_XDIGIT|BOOST_CC_UPPER,
  102. /* B 66 42 */ BOOST_CC_ALPHA|BOOST_CC_XDIGIT|BOOST_CC_UPPER,
  103. /* C 67 43 */ BOOST_CC_ALPHA|BOOST_CC_XDIGIT|BOOST_CC_UPPER,
  104. /* D 68 44 */ BOOST_CC_ALPHA|BOOST_CC_XDIGIT|BOOST_CC_UPPER,
  105. /* E 69 45 */ BOOST_CC_ALPHA|BOOST_CC_XDIGIT|BOOST_CC_UPPER,
  106. /* F 70 46 */ BOOST_CC_ALPHA|BOOST_CC_XDIGIT|BOOST_CC_UPPER,
  107. /* G 71 47 */ BOOST_CC_ALPHA|BOOST_CC_UPPER,
  108. /* H 72 48 */ BOOST_CC_ALPHA|BOOST_CC_UPPER,
  109. /* I 73 49 */ BOOST_CC_ALPHA|BOOST_CC_UPPER,
  110. /* J 74 4a */ BOOST_CC_ALPHA|BOOST_CC_UPPER,
  111. /* K 75 4b */ BOOST_CC_ALPHA|BOOST_CC_UPPER,
  112. /* L 76 4c */ BOOST_CC_ALPHA|BOOST_CC_UPPER,
  113. /* M 77 4d */ BOOST_CC_ALPHA|BOOST_CC_UPPER,
  114. /* N 78 4e */ BOOST_CC_ALPHA|BOOST_CC_UPPER,
  115. /* O 79 4f */ BOOST_CC_ALPHA|BOOST_CC_UPPER,
  116. /* P 80 50 */ BOOST_CC_ALPHA|BOOST_CC_UPPER,
  117. /* Q 81 51 */ BOOST_CC_ALPHA|BOOST_CC_UPPER,
  118. /* R 82 52 */ BOOST_CC_ALPHA|BOOST_CC_UPPER,
  119. /* S 83 53 */ BOOST_CC_ALPHA|BOOST_CC_UPPER,
  120. /* T 84 54 */ BOOST_CC_ALPHA|BOOST_CC_UPPER,
  121. /* U 85 55 */ BOOST_CC_ALPHA|BOOST_CC_UPPER,
  122. /* V 86 56 */ BOOST_CC_ALPHA|BOOST_CC_UPPER,
  123. /* W 87 57 */ BOOST_CC_ALPHA|BOOST_CC_UPPER,
  124. /* X 88 58 */ BOOST_CC_ALPHA|BOOST_CC_UPPER,
  125. /* Y 89 59 */ BOOST_CC_ALPHA|BOOST_CC_UPPER,
  126. /* Z 90 5a */ BOOST_CC_ALPHA|BOOST_CC_UPPER,
  127. /* [ 91 5b */ BOOST_CC_PUNCT,
  128. /* \ 92 5c */ BOOST_CC_PUNCT,
  129. /* ] 93 5d */ BOOST_CC_PUNCT,
  130. /* ^ 94 5e */ BOOST_CC_PUNCT,
  131. /* _ 95 5f */ BOOST_CC_PUNCT,
  132. /* ` 96 60 */ BOOST_CC_PUNCT,
  133. /* a 97 61 */ BOOST_CC_ALPHA|BOOST_CC_XDIGIT|BOOST_CC_LOWER,
  134. /* b 98 62 */ BOOST_CC_ALPHA|BOOST_CC_XDIGIT|BOOST_CC_LOWER,
  135. /* c 99 63 */ BOOST_CC_ALPHA|BOOST_CC_XDIGIT|BOOST_CC_LOWER,
  136. /* d 100 64 */ BOOST_CC_ALPHA|BOOST_CC_XDIGIT|BOOST_CC_LOWER,
  137. /* e 101 65 */ BOOST_CC_ALPHA|BOOST_CC_XDIGIT|BOOST_CC_LOWER,
  138. /* f 102 66 */ BOOST_CC_ALPHA|BOOST_CC_XDIGIT|BOOST_CC_LOWER,
  139. /* g 103 67 */ BOOST_CC_ALPHA|BOOST_CC_LOWER,
  140. /* h 104 68 */ BOOST_CC_ALPHA|BOOST_CC_LOWER,
  141. /* i 105 69 */ BOOST_CC_ALPHA|BOOST_CC_LOWER,
  142. /* j 106 6a */ BOOST_CC_ALPHA|BOOST_CC_LOWER,
  143. /* k 107 6b */ BOOST_CC_ALPHA|BOOST_CC_LOWER,
  144. /* l 108 6c */ BOOST_CC_ALPHA|BOOST_CC_LOWER,
  145. /* m 109 6d */ BOOST_CC_ALPHA|BOOST_CC_LOWER,
  146. /* n 110 6e */ BOOST_CC_ALPHA|BOOST_CC_LOWER,
  147. /* o 111 6f */ BOOST_CC_ALPHA|BOOST_CC_LOWER,
  148. /* p 112 70 */ BOOST_CC_ALPHA|BOOST_CC_LOWER,
  149. /* q 113 71 */ BOOST_CC_ALPHA|BOOST_CC_LOWER,
  150. /* r 114 72 */ BOOST_CC_ALPHA|BOOST_CC_LOWER,
  151. /* s 115 73 */ BOOST_CC_ALPHA|BOOST_CC_LOWER,
  152. /* t 116 74 */ BOOST_CC_ALPHA|BOOST_CC_LOWER,
  153. /* u 117 75 */ BOOST_CC_ALPHA|BOOST_CC_LOWER,
  154. /* v 118 76 */ BOOST_CC_ALPHA|BOOST_CC_LOWER,
  155. /* w 119 77 */ BOOST_CC_ALPHA|BOOST_CC_LOWER,
  156. /* x 120 78 */ BOOST_CC_ALPHA|BOOST_CC_LOWER,
  157. /* y 121 79 */ BOOST_CC_ALPHA|BOOST_CC_LOWER,
  158. /* z 122 7a */ BOOST_CC_ALPHA|BOOST_CC_LOWER,
  159. /* { 123 7b */ BOOST_CC_PUNCT,
  160. /* | 124 7c */ BOOST_CC_PUNCT,
  161. /* } 125 7d */ BOOST_CC_PUNCT,
  162. /* ~ 126 7e */ BOOST_CC_PUNCT,
  163. /* DEL 127 7f */ BOOST_CC_CTRL,
  164. 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
  165. 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
  166. 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
  167. 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
  168. 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
  169. 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
  170. 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
  171. 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
  172. };
  173. ///////////////////////////////////////////////////////////////////////////
  174. // Test characters for specified conditions (using ASCII)
  175. ///////////////////////////////////////////////////////////////////////////
  176. struct ascii
  177. {
  178. typedef char char_type;
  179. typedef unsigned char classify_type;
  180. static bool
  181. isascii_(int ch)
  182. {
  183. return 0 == (ch & ~0x7f);
  184. }
  185. static bool
  186. ischar(int ch)
  187. {
  188. return isascii_(ch);
  189. }
  190. // *** Note on assertions: The precondition is that the calls to
  191. // these functions do not violate the required range of ch (type int)
  192. // which is that strict_ischar(ch) should be true. It is the
  193. // responsibility of the caller to make sure this precondition is not
  194. // violated.
  195. static bool
  196. strict_ischar(int ch)
  197. {
  198. return ch >= 0 && ch <= 127;
  199. }
  200. static bool
  201. isalnum(int ch)
  202. {
  203. BOOST_ASSERT(strict_ischar(ch));
  204. return (ascii_char_types[ch] & BOOST_CC_ALPHA)
  205. || (ascii_char_types[ch] & BOOST_CC_DIGIT);
  206. }
  207. static bool
  208. isalpha(int ch)
  209. {
  210. BOOST_ASSERT(strict_ischar(ch));
  211. return (ascii_char_types[ch] & BOOST_CC_ALPHA) ? true : false;
  212. }
  213. static bool
  214. isdigit(int ch)
  215. {
  216. BOOST_ASSERT(strict_ischar(ch));
  217. return (ascii_char_types[ch] & BOOST_CC_DIGIT) ? true : false;
  218. }
  219. static bool
  220. isxdigit(int ch)
  221. {
  222. BOOST_ASSERT(strict_ischar(ch));
  223. return (ascii_char_types[ch] & BOOST_CC_XDIGIT) ? true : false;
  224. }
  225. static bool
  226. iscntrl(int ch)
  227. {
  228. BOOST_ASSERT(strict_ischar(ch));
  229. return (ascii_char_types[ch] & BOOST_CC_CTRL) ? true : false;
  230. }
  231. static bool
  232. isgraph(int ch)
  233. {
  234. BOOST_ASSERT(strict_ischar(ch));
  235. return ('\x21' <= ch && ch <= '\x7e');
  236. }
  237. static bool
  238. islower(int ch)
  239. {
  240. BOOST_ASSERT(strict_ischar(ch));
  241. return (ascii_char_types[ch] & BOOST_CC_LOWER) ? true : false;
  242. }
  243. static bool
  244. isprint(int ch)
  245. {
  246. BOOST_ASSERT(strict_ischar(ch));
  247. return ('\x20' <= ch && ch <= '\x7e');
  248. }
  249. static bool
  250. ispunct(int ch)
  251. {
  252. BOOST_ASSERT(strict_ischar(ch));
  253. return (ascii_char_types[ch] & BOOST_CC_PUNCT) ? true : false;
  254. }
  255. static bool
  256. isspace(int ch)
  257. {
  258. BOOST_ASSERT(strict_ischar(ch));
  259. return (ascii_char_types[ch] & BOOST_CC_SPACE) ? true : false;
  260. }
  261. static bool
  262. isblank BOOST_PREVENT_MACRO_SUBSTITUTION (int ch)
  263. {
  264. BOOST_ASSERT(strict_ischar(ch));
  265. return ('\x09' == ch || '\x20' == ch);
  266. }
  267. static bool
  268. isupper(int ch)
  269. {
  270. BOOST_ASSERT(strict_ischar(ch));
  271. return (ascii_char_types[ch] & BOOST_CC_UPPER) ? true : false;
  272. }
  273. ///////////////////////////////////////////////////////////////////////
  274. // Simple character conversions
  275. ///////////////////////////////////////////////////////////////////////
  276. static int
  277. tolower(int ch)
  278. {
  279. BOOST_ASSERT(strict_ischar(ch));
  280. return isupper(ch) ? (ch - 'A' + 'a') : ch;
  281. }
  282. static int
  283. toupper(int ch)
  284. {
  285. BOOST_ASSERT(strict_ischar(ch));
  286. return islower(ch) ? (ch - 'a' + 'A') : ch;
  287. }
  288. static ::boost::uint32_t
  289. toucs4(int ch)
  290. {
  291. BOOST_ASSERT(strict_ischar(ch));
  292. return ch;
  293. }
  294. };
  295. }}}
  296. ///////////////////////////////////////////////////////////////////////////////
  297. // undefine macros
  298. ///////////////////////////////////////////////////////////////////////////////
  299. #undef BOOST_CC_DIGIT
  300. #undef BOOST_CC_XDIGIT
  301. #undef BOOST_CC_ALPHA
  302. #undef BOOST_CC_CTRL
  303. #undef BOOST_CC_LOWER
  304. #undef BOOST_CC_UPPER
  305. #undef BOOST_CC_PUNCT
  306. #undef BOOST_CC_SPACE
  307. #endif