iso8859_1.hpp 28 KB


  1. /*=============================================================================
  2. Copyright (c) 2001-2011 Hartmut Kaiser
  3. Copyright (c) 2001-2011 Joel de Guzman
  4. Distributed under the Boost Software License, Version 1.0. (See accompanying
  5. file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
  6. =============================================================================*/
  7. #if !defined(BOOST_SPIRIT_ISO8859_1_APRIL_26_2006_1106PM)
  8. #define BOOST_SPIRIT_ISO8859_1_APRIL_26_2006_1106PM
  9. #if defined(_MSC_VER)
  10. #pragma once
  11. #endif
  12. #include <climits>
  13. #include <boost/assert.hpp>
  14. #include <boost/cstdint.hpp>
  15. ///////////////////////////////////////////////////////////////////////////////
  16. // constants used to classify the single characters
  17. ///////////////////////////////////////////////////////////////////////////////
  18. #define BOOST_CC_DIGIT 0x0001
  19. #define BOOST_CC_XDIGIT 0x0002
  20. #define BOOST_CC_ALPHA 0x0004
  21. #define BOOST_CC_CTRL 0x0008
  22. #define BOOST_CC_LOWER 0x0010
  23. #define BOOST_CC_UPPER 0x0020
  24. #define BOOST_CC_SPACE 0x0040
  25. #define BOOST_CC_PUNCT 0x0080
  26. namespace boost { namespace spirit { namespace char_encoding
  27. {
  28. // The detection of isgraph(), isprint() and isblank() is done programmatically
  29. // to keep the character type table small. Additionally, these functions are
  30. // rather seldom used and the programmatic detection is very simple.
  31. ///////////////////////////////////////////////////////////////////////////
  32. // ISO 8859-1 character classification table
  33. //
  34. // the comments intentionally contain non-ascii characters
  35. // boostinspect:noascii
  36. ///////////////////////////////////////////////////////////////////////////
  37. const unsigned char iso8859_1_char_types[] =
  38. {
  39. /* NUL 0 0 */ BOOST_CC_CTRL,
  40. /* SOH 1 1 */ BOOST_CC_CTRL,
  41. /* STX 2 2 */ BOOST_CC_CTRL,
  42. /* ETX 3 3 */ BOOST_CC_CTRL,
  43. /* EOT 4 4 */ BOOST_CC_CTRL,
  44. /* ENQ 5 5 */ BOOST_CC_CTRL,
  45. /* ACK 6 6 */ BOOST_CC_CTRL,
  46. /* BEL 7 7 */ BOOST_CC_CTRL,
  47. /* BS 8 8 */ BOOST_CC_CTRL,
  48. /* HT 9 9 */ BOOST_CC_CTRL|BOOST_CC_SPACE,
  49. /* NL 10 a */ BOOST_CC_CTRL|BOOST_CC_SPACE,
  50. /* VT 11 b */ BOOST_CC_CTRL|BOOST_CC_SPACE,
  51. /* NP 12 c */ BOOST_CC_CTRL|BOOST_CC_SPACE,
  52. /* CR 13 d */ BOOST_CC_CTRL|BOOST_CC_SPACE,
  53. /* SO 14 e */ BOOST_CC_CTRL,
  54. /* SI 15 f */ BOOST_CC_CTRL,
  55. /* DLE 16 10 */ BOOST_CC_CTRL,
  56. /* DC1 17 11 */ BOOST_CC_CTRL,
  57. /* DC2 18 12 */ BOOST_CC_CTRL,
  58. /* DC3 19 13 */ BOOST_CC_CTRL,
  59. /* DC4 20 14 */ BOOST_CC_CTRL,
  60. /* NAK 21 15 */ BOOST_CC_CTRL,
  61. /* SYN 22 16 */ BOOST_CC_CTRL,
  62. /* ETB 23 17 */ BOOST_CC_CTRL,
  63. /* CAN 24 18 */ BOOST_CC_CTRL,
  64. /* EM 25 19 */ BOOST_CC_CTRL,
  65. /* SUB 26 1a */ BOOST_CC_CTRL,
  66. /* ESC 27 1b */ BOOST_CC_CTRL,
  67. /* FS 28 1c */ BOOST_CC_CTRL,
  68. /* GS 29 1d */ BOOST_CC_CTRL,
  69. /* RS 30 1e */ BOOST_CC_CTRL,
  70. /* US 31 1f */ BOOST_CC_CTRL,
  71. /* SP 32 20 */ BOOST_CC_SPACE,
  72. /* ! 33 21 */ BOOST_CC_PUNCT,
  73. /* " 34 22 */ BOOST_CC_PUNCT,
  74. /* # 35 23 */ BOOST_CC_PUNCT,
  75. /* $ 36 24 */ BOOST_CC_PUNCT,
  76. /* % 37 25 */ BOOST_CC_PUNCT,
  77. /* & 38 26 */ BOOST_CC_PUNCT,
  78. /* ' 39 27 */ BOOST_CC_PUNCT,
  79. /* ( 40 28 */ BOOST_CC_PUNCT,
  80. /* ) 41 29 */ BOOST_CC_PUNCT,
  81. /* * 42 2a */ BOOST_CC_PUNCT,
  82. /* + 43 2b */ BOOST_CC_PUNCT,
  83. /* , 44 2c */ BOOST_CC_PUNCT,
  84. /* - 45 2d */ BOOST_CC_PUNCT,
  85. /* . 46 2e */ BOOST_CC_PUNCT,
  86. /* / 47 2f */ BOOST_CC_PUNCT,
  87. /* 0 48 30 */ BOOST_CC_DIGIT|BOOST_CC_XDIGIT,
  88. /* 1 49 31 */ BOOST_CC_DIGIT|BOOST_CC_XDIGIT,
  89. /* 2 50 32 */ BOOST_CC_DIGIT|BOOST_CC_XDIGIT,
  90. /* 3 51 33 */ BOOST_CC_DIGIT|BOOST_CC_XDIGIT,
  91. /* 4 52 34 */ BOOST_CC_DIGIT|BOOST_CC_XDIGIT,
  92. /* 5 53 35 */ BOOST_CC_DIGIT|BOOST_CC_XDIGIT,
  93. /* 6 54 36 */ BOOST_CC_DIGIT|BOOST_CC_XDIGIT,
  94. /* 7 55 37 */ BOOST_CC_DIGIT|BOOST_CC_XDIGIT,
  95. /* 8 56 38 */ BOOST_CC_DIGIT|BOOST_CC_XDIGIT,
  96. /* 9 57 39 */ BOOST_CC_DIGIT|BOOST_CC_XDIGIT,
  97. /* : 58 3a */ BOOST_CC_PUNCT,
  98. /* ; 59 3b */ BOOST_CC_PUNCT,
  99. /* < 60 3c */ BOOST_CC_PUNCT,
  100. /* = 61 3d */ BOOST_CC_PUNCT,
  101. /* > 62 3e */ BOOST_CC_PUNCT,
  102. /* ? 63 3f */ BOOST_CC_PUNCT,
  103. /* @ 64 40 */ BOOST_CC_PUNCT,
  104. /* A 65 41 */ BOOST_CC_ALPHA|BOOST_CC_XDIGIT|BOOST_CC_UPPER,
  105. /* B 66 42 */ BOOST_CC_ALPHA|BOOST_CC_XDIGIT|BOOST_CC_UPPER,
  106. /* C 67 43 */ BOOST_CC_ALPHA|BOOST_CC_XDIGIT|BOOST_CC_UPPER,
  107. /* D 68 44 */ BOOST_CC_ALPHA|BOOST_CC_XDIGIT|BOOST_CC_UPPER,
  108. /* E 69 45 */ BOOST_CC_ALPHA|BOOST_CC_XDIGIT|BOOST_CC_UPPER,
  109. /* F 70 46 */ BOOST_CC_ALPHA|BOOST_CC_XDIGIT|BOOST_CC_UPPER,
  110. /* G 71 47 */ BOOST_CC_ALPHA|BOOST_CC_UPPER,
  111. /* H 72 48 */ BOOST_CC_ALPHA|BOOST_CC_UPPER,
  112. /* I 73 49 */ BOOST_CC_ALPHA|BOOST_CC_UPPER,
  113. /* J 74 4a */ BOOST_CC_ALPHA|BOOST_CC_UPPER,
  114. /* K 75 4b */ BOOST_CC_ALPHA|BOOST_CC_UPPER,
  115. /* L 76 4c */ BOOST_CC_ALPHA|BOOST_CC_UPPER,
  116. /* M 77 4d */ BOOST_CC_ALPHA|BOOST_CC_UPPER,
  117. /* N 78 4e */ BOOST_CC_ALPHA|BOOST_CC_UPPER,
  118. /* O 79 4f */ BOOST_CC_ALPHA|BOOST_CC_UPPER,
  119. /* P 80 50 */ BOOST_CC_ALPHA|BOOST_CC_UPPER,
  120. /* Q 81 51 */ BOOST_CC_ALPHA|BOOST_CC_UPPER,
  121. /* R 82 52 */ BOOST_CC_ALPHA|BOOST_CC_UPPER,
  122. /* S 83 53 */ BOOST_CC_ALPHA|BOOST_CC_UPPER,
  123. /* T 84 54 */ BOOST_CC_ALPHA|BOOST_CC_UPPER,
  124. /* U 85 55 */ BOOST_CC_ALPHA|BOOST_CC_UPPER,
  125. /* V 86 56 */ BOOST_CC_ALPHA|BOOST_CC_UPPER,
  126. /* W 87 57 */ BOOST_CC_ALPHA|BOOST_CC_UPPER,
  127. /* X 88 58 */ BOOST_CC_ALPHA|BOOST_CC_UPPER,
  128. /* Y 89 59 */ BOOST_CC_ALPHA|BOOST_CC_UPPER,
  129. /* Z 90 5a */ BOOST_CC_ALPHA|BOOST_CC_UPPER,
  130. /* [ 91 5b */ BOOST_CC_PUNCT,
  131. /* \ 92 5c */ BOOST_CC_PUNCT,
  132. /* ] 93 5d */ BOOST_CC_PUNCT,
  133. /* ^ 94 5e */ BOOST_CC_PUNCT,
  134. /* _ 95 5f */ BOOST_CC_PUNCT,
  135. /* ` 96 60 */ BOOST_CC_PUNCT,
  136. /* a 97 61 */ BOOST_CC_ALPHA|BOOST_CC_XDIGIT|BOOST_CC_LOWER,
  137. /* b 98 62 */ BOOST_CC_ALPHA|BOOST_CC_XDIGIT|BOOST_CC_LOWER,
  138. /* c 99 63 */ BOOST_CC_ALPHA|BOOST_CC_XDIGIT|BOOST_CC_LOWER,
  139. /* d 100 64 */ BOOST_CC_ALPHA|BOOST_CC_XDIGIT|BOOST_CC_LOWER,
  140. /* e 101 65 */ BOOST_CC_ALPHA|BOOST_CC_XDIGIT|BOOST_CC_LOWER,
  141. /* f 102 66 */ BOOST_CC_ALPHA|BOOST_CC_XDIGIT|BOOST_CC_LOWER,
  142. /* g 103 67 */ BOOST_CC_ALPHA|BOOST_CC_LOWER,
  143. /* h 104 68 */ BOOST_CC_ALPHA|BOOST_CC_LOWER,
  144. /* i 105 69 */ BOOST_CC_ALPHA|BOOST_CC_LOWER,
  145. /* j 106 6a */ BOOST_CC_ALPHA|BOOST_CC_LOWER,
  146. /* k 107 6b */ BOOST_CC_ALPHA|BOOST_CC_LOWER,
  147. /* l 108 6c */ BOOST_CC_ALPHA|BOOST_CC_LOWER,
  148. /* m 109 6d */ BOOST_CC_ALPHA|BOOST_CC_LOWER,
  149. /* n 110 6e */ BOOST_CC_ALPHA|BOOST_CC_LOWER,
  150. /* o 111 6f */ BOOST_CC_ALPHA|BOOST_CC_LOWER,
  151. /* p 112 70 */ BOOST_CC_ALPHA|BOOST_CC_LOWER,
  152. /* q 113 71 */ BOOST_CC_ALPHA|BOOST_CC_LOWER,
  153. /* r 114 72 */ BOOST_CC_ALPHA|BOOST_CC_LOWER,
  154. /* s 115 73 */ BOOST_CC_ALPHA|BOOST_CC_LOWER,
  155. /* t 116 74 */ BOOST_CC_ALPHA|BOOST_CC_LOWER,
  156. /* u 117 75 */ BOOST_CC_ALPHA|BOOST_CC_LOWER,
  157. /* v 118 76 */ BOOST_CC_ALPHA|BOOST_CC_LOWER,
  158. /* w 119 77 */ BOOST_CC_ALPHA|BOOST_CC_LOWER,
  159. /* x 120 78 */ BOOST_CC_ALPHA|BOOST_CC_LOWER,
  160. /* y 121 79 */ BOOST_CC_ALPHA|BOOST_CC_LOWER,
  161. /* z 122 7a */ BOOST_CC_ALPHA|BOOST_CC_LOWER,
  162. /* { 123 7b */ BOOST_CC_PUNCT,
  163. /* | 124 7c */ BOOST_CC_PUNCT,
  164. /* } 125 7d */ BOOST_CC_PUNCT,
  165. /* ~ 126 7e */ BOOST_CC_PUNCT,
  166. /* DEL 127 7f */ BOOST_CC_CTRL,
  167. /* -- 128 80 */ BOOST_CC_CTRL,
  168. /* -- 129 81 */ BOOST_CC_CTRL,
  169. /* -- 130 82 */ BOOST_CC_CTRL,
  170. /* -- 131 83 */ BOOST_CC_CTRL,
  171. /* -- 132 84 */ BOOST_CC_CTRL,
  172. /* -- 133 85 */ BOOST_CC_CTRL,
  173. /* -- 134 86 */ BOOST_CC_CTRL,
  174. /* -- 135 87 */ BOOST_CC_CTRL,
  175. /* -- 136 88 */ BOOST_CC_CTRL,
  176. /* -- 137 89 */ BOOST_CC_CTRL,
  177. /* -- 138 8a */ BOOST_CC_CTRL,
  178. /* -- 139 8b */ BOOST_CC_CTRL,
  179. /* -- 140 8c */ BOOST_CC_CTRL,
  180. /* -- 141 8d */ BOOST_CC_CTRL,
  181. /* -- 142 8e */ BOOST_CC_CTRL,
  182. /* -- 143 8f */ BOOST_CC_CTRL,
  183. /* -- 144 90 */ BOOST_CC_CTRL,
  184. /* -- 145 91 */ BOOST_CC_CTRL,
  185. /* -- 146 92 */ BOOST_CC_CTRL,
  186. /* -- 147 93 */ BOOST_CC_CTRL,
  187. /* -- 148 94 */ BOOST_CC_CTRL,
  188. /* -- 149 95 */ BOOST_CC_CTRL,
  189. /* -- 150 96 */ BOOST_CC_CTRL,
  190. /* -- 151 97 */ BOOST_CC_CTRL,
  191. /* -- 152 98 */ BOOST_CC_CTRL,
  192. /* -- 153 99 */ BOOST_CC_CTRL,
  193. /* -- 154 9a */ BOOST_CC_CTRL,
  194. /* -- 155 9b */ BOOST_CC_CTRL,
  195. /* -- 156 9c */ BOOST_CC_CTRL,
  196. /* -- 157 9d */ BOOST_CC_CTRL,
  197. /* -- 158 9e */ BOOST_CC_CTRL,
  198. /* -- 159 9f */ BOOST_CC_CTRL,
  199. /* 160 a0 */ BOOST_CC_SPACE,
  200. /* � 161 a1 */ BOOST_CC_PUNCT,
  201. /* � 162 a2 */ BOOST_CC_PUNCT,
  202. /* � 163 a3 */ BOOST_CC_PUNCT,
  203. /* � 164 a4 */ BOOST_CC_PUNCT,
  204. /* � 165 a5 */ BOOST_CC_PUNCT,
  205. /* � 166 a6 */ BOOST_CC_PUNCT,
  206. /* � 167 a7 */ BOOST_CC_PUNCT,
  207. /* � 168 a8 */ BOOST_CC_PUNCT,
  208. /* � 169 a9 */ BOOST_CC_PUNCT,
  209. /* � 170 aa */ BOOST_CC_PUNCT,
  210. /* � 171 ab */ BOOST_CC_PUNCT,
  211. /* � 172 ac */ BOOST_CC_PUNCT,
  212. /* � 173 ad */ BOOST_CC_PUNCT,
  213. /* � 174 ae */ BOOST_CC_PUNCT,
  214. /* � 175 af */ BOOST_CC_PUNCT,
  215. /* � 176 b0 */ BOOST_CC_PUNCT,
  216. /* � 177 b1 */ BOOST_CC_PUNCT,
  217. /* � 178 b2 */ BOOST_CC_DIGIT|BOOST_CC_PUNCT,
  218. /* � 179 b3 */ BOOST_CC_DIGIT|BOOST_CC_PUNCT,
  219. /* � 180 b4 */ BOOST_CC_PUNCT,
  220. /* � 181 b5 */ BOOST_CC_PUNCT,
  221. /* � 182 b6 */ BOOST_CC_PUNCT,
  222. /* � 183 b7 */ BOOST_CC_PUNCT,
  223. /* � 184 b8 */ BOOST_CC_PUNCT,
  224. /* � 185 b9 */ BOOST_CC_DIGIT|BOOST_CC_PUNCT,
  225. /* � 186 ba */ BOOST_CC_PUNCT,
  226. /* � 187 bb */ BOOST_CC_PUNCT,
  227. /* � 188 bc */ BOOST_CC_PUNCT,
  228. /* � 189 bd */ BOOST_CC_PUNCT,
  229. /* � 190 be */ BOOST_CC_PUNCT,
  230. /* � 191 bf */ BOOST_CC_PUNCT,
  231. /* � 192 c0 */ BOOST_CC_ALPHA|BOOST_CC_UPPER,
  232. /* � 193 c1 */ BOOST_CC_ALPHA|BOOST_CC_UPPER,
  233. /* � 194 c2 */ BOOST_CC_ALPHA|BOOST_CC_UPPER,
  234. /* � 195 c3 */ BOOST_CC_ALPHA|BOOST_CC_UPPER,
  235. /* � 196 c4 */ BOOST_CC_ALPHA|BOOST_CC_UPPER,
  236. /* � 197 c5 */ BOOST_CC_ALPHA|BOOST_CC_UPPER,
  237. /* � 198 c6 */ BOOST_CC_ALPHA|BOOST_CC_UPPER,
  238. /* � 199 c7 */ BOOST_CC_ALPHA|BOOST_CC_UPPER,
  239. /* � 200 c8 */ BOOST_CC_ALPHA|BOOST_CC_UPPER,
  240. /* � 201 c9 */ BOOST_CC_ALPHA|BOOST_CC_UPPER,
  241. /* � 202 ca */ BOOST_CC_ALPHA|BOOST_CC_UPPER,
  242. /* � 203 cb */ BOOST_CC_ALPHA|BOOST_CC_UPPER,
  243. /* � 204 cc */ BOOST_CC_ALPHA|BOOST_CC_UPPER,
  244. /* � 205 cd */ BOOST_CC_ALPHA|BOOST_CC_UPPER,
  245. /* � 206 ce */ BOOST_CC_ALPHA|BOOST_CC_UPPER,
  246. /* � 207 cf */ BOOST_CC_ALPHA|BOOST_CC_UPPER,
  247. /* � 208 d0 */ BOOST_CC_ALPHA|BOOST_CC_UPPER,
  248. /* � 209 d1 */ BOOST_CC_ALPHA|BOOST_CC_UPPER,
  249. /* � 210 d2 */ BOOST_CC_ALPHA|BOOST_CC_UPPER,
  250. /* � 211 d3 */ BOOST_CC_ALPHA|BOOST_CC_UPPER,
  251. /* � 212 d4 */ BOOST_CC_ALPHA|BOOST_CC_UPPER,
  252. /* � 213 d5 */ BOOST_CC_ALPHA|BOOST_CC_UPPER,
  253. /* � 214 d6 */ BOOST_CC_ALPHA|BOOST_CC_UPPER,
  254. /* � 215 d7 */ BOOST_CC_PUNCT,
  255. /* � 216 d8 */ BOOST_CC_ALPHA|BOOST_CC_UPPER,
  256. /* � 217 d9 */ BOOST_CC_ALPHA|BOOST_CC_UPPER,
  257. /* � 218 da */ BOOST_CC_ALPHA|BOOST_CC_UPPER,
  258. /* � 219 db */ BOOST_CC_ALPHA|BOOST_CC_UPPER,
  259. /* � 220 dc */ BOOST_CC_ALPHA|BOOST_CC_UPPER,
  260. /* � 221 dd */ BOOST_CC_ALPHA|BOOST_CC_UPPER,
  261. /* � 222 de */ BOOST_CC_ALPHA|BOOST_CC_UPPER,
  262. /* � 223 df */ BOOST_CC_ALPHA|BOOST_CC_LOWER,
  263. /* � 224 e0 */ BOOST_CC_ALPHA|BOOST_CC_LOWER,
  264. /* � 225 e1 */ BOOST_CC_ALPHA|BOOST_CC_LOWER,
  265. /* � 226 e2 */ BOOST_CC_ALPHA|BOOST_CC_LOWER,
  266. /* � 227 e3 */ BOOST_CC_ALPHA|BOOST_CC_LOWER,
  267. /* � 228 e4 */ BOOST_CC_ALPHA|BOOST_CC_LOWER,
  268. /* � 229 e5 */ BOOST_CC_ALPHA|BOOST_CC_LOWER,
  269. /* � 230 e6 */ BOOST_CC_ALPHA|BOOST_CC_LOWER,
  270. /* � 231 e7 */ BOOST_CC_ALPHA|BOOST_CC_LOWER,
  271. /* � 232 e8 */ BOOST_CC_ALPHA|BOOST_CC_LOWER,
  272. /* � 233 e9 */ BOOST_CC_ALPHA|BOOST_CC_LOWER,
  273. /* � 234 ea */ BOOST_CC_ALPHA|BOOST_CC_LOWER,
  274. /* � 235 eb */ BOOST_CC_ALPHA|BOOST_CC_LOWER,
  275. /* � 236 ec */ BOOST_CC_ALPHA|BOOST_CC_LOWER,
  276. /* � 237 ed */ BOOST_CC_ALPHA|BOOST_CC_LOWER,
  277. /* � 238 ee */ BOOST_CC_ALPHA|BOOST_CC_LOWER,
  278. /* � 239 ef */ BOOST_CC_ALPHA|BOOST_CC_LOWER,
  279. /* � 240 f0 */ BOOST_CC_ALPHA|BOOST_CC_LOWER,
  280. /* � 241 f1 */ BOOST_CC_ALPHA|BOOST_CC_LOWER,
  281. /* � 242 f2 */ BOOST_CC_ALPHA|BOOST_CC_LOWER,
  282. /* � 243 f3 */ BOOST_CC_ALPHA|BOOST_CC_LOWER,
  283. /* � 244 f4 */ BOOST_CC_ALPHA|BOOST_CC_LOWER,
  284. /* � 245 f5 */ BOOST_CC_ALPHA|BOOST_CC_LOWER,
  285. /* � 246 f6 */ BOOST_CC_ALPHA|BOOST_CC_LOWER,
  286. /* � 247 f7 */ BOOST_CC_PUNCT,
  287. /* � 248 f8 */ BOOST_CC_ALPHA|BOOST_CC_LOWER,
  288. /* � 249 f9 */ BOOST_CC_ALPHA|BOOST_CC_LOWER,
  289. /* � 250 fa */ BOOST_CC_ALPHA|BOOST_CC_LOWER,
  290. /* � 251 fb */ BOOST_CC_ALPHA|BOOST_CC_LOWER,
  291. /* � 252 fc */ BOOST_CC_ALPHA|BOOST_CC_LOWER,
  292. /* � 253 fd */ BOOST_CC_ALPHA|BOOST_CC_LOWER,
  293. /* � 254 fe */ BOOST_CC_ALPHA|BOOST_CC_LOWER,
  294. /* � 255 ff */ BOOST_CC_ALPHA|BOOST_CC_LOWER,
  295. };
  296. ///////////////////////////////////////////////////////////////////////////
  297. // ISO 8859-1 character conversion table
  298. ///////////////////////////////////////////////////////////////////////////
  299. const unsigned char iso8859_1_char_conversion[] =
  300. {
  301. /* NUL 0 0 */ '\0',
  302. /* SOH 1 1 */ '\0',
  303. /* STX 2 2 */ '\0',
  304. /* ETX 3 3 */ '\0',
  305. /* EOT 4 4 */ '\0',
  306. /* ENQ 5 5 */ '\0',
  307. /* ACK 6 6 */ '\0',
  308. /* BEL 7 7 */ '\0',
  309. /* BS 8 8 */ '\0',
  310. /* HT 9 9 */ '\0',
  311. /* NL 10 a */ '\0',
  312. /* VT 11 b */ '\0',
  313. /* NP 12 c */ '\0',
  314. /* CR 13 d */ '\0',
  315. /* SO 14 e */ '\0',
  316. /* SI 15 f */ '\0',
  317. /* DLE 16 10 */ '\0',
  318. /* DC1 17 11 */ '\0',
  319. /* DC2 18 12 */ '\0',
  320. /* DC3 19 13 */ '\0',
  321. /* DC4 20 14 */ '\0',
  322. /* NAK 21 15 */ '\0',
  323. /* SYN 22 16 */ '\0',
  324. /* ETB 23 17 */ '\0',
  325. /* CAN 24 18 */ '\0',
  326. /* EM 25 19 */ '\0',
  327. /* SUB 26 1a */ '\0',
  328. /* ESC 27 1b */ '\0',
  329. /* FS 28 1c */ '\0',
  330. /* GS 29 1d */ '\0',
  331. /* RS 30 1e */ '\0',
  332. /* US 31 1f */ '\0',
  333. /* SP 32 20 */ '\0',
  334. /* ! 33 21 */ '\0',
  335. /* " 34 22 */ '\0',
  336. /* # 35 23 */ '\0',
  337. /* $ 36 24 */ '\0',
  338. /* % 37 25 */ '\0',
  339. /* & 38 26 */ '\0',
  340. /* ' 39 27 */ '\0',
  341. /* ( 40 28 */ '\0',
  342. /* ) 41 29 */ '\0',
  343. /* * 42 2a */ '\0',
  344. /* + 43 2b */ '\0',
  345. /* , 44 2c */ '\0',
  346. /* - 45 2d */ '\0',
  347. /* . 46 2e */ '\0',
  348. /* / 47 2f */ '\0',
  349. /* 0 48 30 */ '\0',
  350. /* 1 49 31 */ '\0',
  351. /* 2 50 32 */ '\0',
  352. /* 3 51 33 */ '\0',
  353. /* 4 52 34 */ '\0',
  354. /* 5 53 35 */ '\0',
  355. /* 6 54 36 */ '\0',
  356. /* 7 55 37 */ '\0',
  357. /* 8 56 38 */ '\0',
  358. /* 9 57 39 */ '\0',
  359. /* : 58 3a */ '\0',
  360. /* ; 59 3b */ '\0',
  361. /* < 60 3c */ '\0',
  362. /* = 61 3d */ '\0',
  363. /* > 62 3e */ '\0',
  364. /* ? 63 3f */ '\0',
  365. /* @ 64 40 */ '\0',
  366. /* A 65 41 */ 'a',
  367. /* B 66 42 */ 'b',
  368. /* C 67 43 */ 'c',
  369. /* D 68 44 */ 'd',
  370. /* E 69 45 */ 'e',
  371. /* F 70 46 */ 'f',
  372. /* G 71 47 */ 'g',
  373. /* H 72 48 */ 'h',
  374. /* I 73 49 */ 'i',
  375. /* J 74 4a */ 'j',
  376. /* K 75 4b */ 'k',
  377. /* L 76 4c */ 'l',
  378. /* M 77 4d */ 'm',
  379. /* N 78 4e */ 'n',
  380. /* O 79 4f */ 'o',
  381. /* P 80 50 */ 'p',
  382. /* Q 81 51 */ 'q',
  383. /* R 82 52 */ 'r',
  384. /* S 83 53 */ 's',
  385. /* T 84 54 */ 't',
  386. /* U 85 55 */ 'u',
  387. /* V 86 56 */ 'v',
  388. /* W 87 57 */ 'w',
  389. /* X 88 58 */ 'x',
  390. /* Y 89 59 */ 'y',
  391. /* Z 90 5a */ 'z',
  392. /* [ 91 5b */ '\0',
  393. /* \ 92 5c */ '\0',
  394. /* ] 93 5d */ '\0',
  395. /* ^ 94 5e */ '\0',
  396. /* _ 95 5f */ '\0',
  397. /* ` 96 60 */ '\0',
  398. /* a 97 61 */ 'A',
  399. /* b 98 62 */ 'B',
  400. /* c 99 63 */ 'C',
  401. /* d 100 64 */ 'D',
  402. /* e 101 65 */ 'E',
  403. /* f 102 66 */ 'F',
  404. /* g 103 67 */ 'G',
  405. /* h 104 68 */ 'H',
  406. /* i 105 69 */ 'I',
  407. /* j 106 6a */ 'J',
  408. /* k 107 6b */ 'K',
  409. /* l 108 6c */ 'L',
  410. /* m 109 6d */ 'M',
  411. /* n 110 6e */ 'N',
  412. /* o 111 6f */ 'O',
  413. /* p 112 70 */ 'P',
  414. /* q 113 71 */ 'Q',
  415. /* r 114 72 */ 'R',
  416. /* s 115 73 */ 'S',
  417. /* t 116 74 */ 'T',
  418. /* u 117 75 */ 'U',
  419. /* v 118 76 */ 'V',
  420. /* w 119 77 */ 'W',
  421. /* x 120 78 */ 'X',
  422. /* y 121 79 */ 'Y',
  423. /* z 122 7a */ 'Z',
  424. /* { 123 7b */ '\0',
  425. /* | 124 7c */ '\0',
  426. /* } 125 7d */ '\0',
  427. /* ~ 126 7e */ '\0',
  428. /* DEL 127 7f */ '\0',
  429. /* -- 128 80 */ '\0',
  430. /* -- 129 81 */ '\0',
  431. /* -- 130 82 */ '\0',
  432. /* -- 131 83 */ '\0',
  433. /* -- 132 84 */ '\0',
  434. /* -- 133 85 */ '\0',
  435. /* -- 134 86 */ '\0',
  436. /* -- 135 87 */ '\0',
  437. /* -- 136 88 */ '\0',
  438. /* -- 137 89 */ '\0',
  439. /* -- 138 8a */ '\0',
  440. /* -- 139 8b */ '\0',
  441. /* -- 140 8c */ '\0',
  442. /* -- 141 8d */ '\0',
  443. /* -- 142 8e */ '\0',
  444. /* -- 143 8f */ '\0',
  445. /* -- 144 90 */ '\0',
  446. /* -- 145 91 */ '\0',
  447. /* -- 146 92 */ '\0',
  448. /* -- 147 93 */ '\0',
  449. /* -- 148 94 */ '\0',
  450. /* -- 149 95 */ '\0',
  451. /* -- 150 96 */ '\0',
  452. /* -- 151 97 */ '\0',
  453. /* -- 152 98 */ '\0',
  454. /* -- 153 99 */ '\0',
  455. /* -- 154 9a */ '\0',
  456. /* -- 155 9b */ '\0',
  457. /* -- 156 9c */ '\0',
  458. /* -- 157 9d */ '\0',
  459. /* -- 158 9e */ '\0',
  460. /* -- 159 9f */ '\0',
  461. /* 160 a0 */ '\0',
  462. /* � 161 a1 */ '\0',
  463. /* � 162 a2 */ '\0',
  464. /* � 163 a3 */ '\0',
  465. /* � 164 a4 */ '\0',
  466. /* � 165 a5 */ '\0',
  467. /* � 166 a6 */ '\0',
  468. /* � 167 a7 */ '\0',
  469. /* � 168 a8 */ '\0',
  470. /* � 169 a9 */ '\0',
  471. /* � 170 aa */ '\0',
  472. /* � 171 ab */ '\0',
  473. /* � 172 ac */ '\0',
  474. /* � 173 ad */ '\0',
  475. /* � 174 ae */ '\0',
  476. /* � 175 af */ '\0',
  477. /* � 176 b0 */ '\0',
  478. /* � 177 b1 */ '\0',
  479. /* � 178 b2 */ '\0',
  480. /* � 179 b3 */ '\0',
  481. /* � 180 b4 */ '\0',
  482. /* � 181 b5 */ '\0',
  483. /* � 182 b6 */ '\0',
  484. /* � 183 b7 */ '\0',
  485. /* � 184 b8 */ '\0',
  486. /* � 185 b9 */ '\0',
  487. /* � 186 ba */ '\0',
  488. /* � 187 bb */ '\0',
  489. /* � 188 bc */ '\0',
  490. /* � 189 bd */ '\0',
  491. /* � 190 be */ '\0',
  492. /* � 191 bf */ '\0',
  493. /* � 192 c0 */ 0xe0,
  494. /* � 193 c1 */ 0xe1,
  495. /* � 194 c2 */ 0xe2,
  496. /* � 195 c3 */ 0xe3,
  497. /* � 196 c4 */ 0xe4,
  498. /* � 197 c5 */ 0xe5,
  499. /* � 198 c6 */ 0xe6,
  500. /* � 199 c7 */ 0xe7,
  501. /* � 200 c8 */ 0xe8,
  502. /* � 201 c9 */ 0xe9,
  503. /* � 202 ca */ 0xea,
  504. /* � 203 cb */ 0xeb,
  505. /* � 204 cc */ 0xec,
  506. /* � 205 cd */ 0xed,
  507. /* � 206 ce */ 0xee,
  508. /* � 207 cf */ 0xef,
  509. /* � 208 d0 */ 0xf0,
  510. /* � 209 d1 */ 0xf1,
  511. /* � 210 d2 */ 0xf2,
  512. /* � 211 d3 */ 0xf3,
  513. /* � 212 d4 */ 0xf4,
  514. /* � 213 d5 */ 0xf5,
  515. /* � 214 d6 */ 0xf6,
  516. /* � 215 d7 */ '\0',
  517. /* � 216 d8 */ 0xf8,
  518. /* � 217 d9 */ 0xf9,
  519. /* � 218 da */ 0xfa,
  520. /* � 219 db */ 0xfb,
  521. /* � 220 dc */ 0xfc,
  522. /* � 221 dd */ 0xfd,
  523. /* � 222 de */ 0xfe,
  524. /* � 223 df */ '\0',
  525. /* � 224 e0 */ 0xc0,
  526. /* � 225 e1 */ 0xc1,
  527. /* � 226 e2 */ 0xc2,
  528. /* � 227 e3 */ 0xc3,
  529. /* � 228 e4 */ 0xc4,
  530. /* � 229 e5 */ 0xc5,
  531. /* � 230 e6 */ 0xc6,
  532. /* � 231 e7 */ 0xc7,
  533. /* � 232 e8 */ 0xc8,
  534. /* � 233 e9 */ 0xc9,
  535. /* � 234 ea */ 0xca,
  536. /* � 235 eb */ 0xcb,
  537. /* � 236 ec */ 0xcc,
  538. /* � 237 ed */ 0xcd,
  539. /* � 238 ee */ 0xce,
  540. /* � 239 ef */ 0xcf,
  541. /* � 240 f0 */ 0xd0,
  542. /* � 241 f1 */ 0xd1,
  543. /* � 242 f2 */ 0xd2,
  544. /* � 243 f3 */ 0xd3,
  545. /* � 244 f4 */ 0xd4,
  546. /* � 245 f5 */ 0xd5,
  547. /* � 246 f6 */ 0xd6,
  548. /* � 247 f7 */ '\0',
  549. /* � 248 f8 */ 0xd8,
  550. /* � 249 f9 */ 0xd9,
  551. /* � 250 fa */ 0xda,
  552. /* � 251 fb */ 0xdb,
  553. /* � 252 fc */ 0xdc,
  554. /* � 253 fd */ 0xdd,
  555. /* � 254 fe */ 0xde,
  556. /* � 255 ff */ '\0',
  557. };
  558. ///////////////////////////////////////////////////////////////////////////
  559. // Test characters for specified conditions (using iso8859-1)
  560. ///////////////////////////////////////////////////////////////////////////
  561. struct iso8859_1
  562. {
  563. typedef unsigned char char_type;
  564. typedef unsigned char classify_type;
  565. static bool
  566. isascii_(int ch)
  567. {
  568. return 0 == (ch & ~0x7f);
  569. }
  570. static bool
  571. ischar(int ch)
  572. {
  573. // iso8859.1 uses all 8 bits
  574. // we have to watch out for sign extensions
  575. return (0 == (ch & ~0xff) || ~0 == (ch | 0xff)) != 0;
  576. }
  577. // *** Note on assertions: The precondition is that the calls to
  578. // these functions do not violate the required range of ch (type int)
  579. // which is that strict_ischar(ch) should be true. It is the
  580. // responsibility of the caller to make sure this precondition is not
  581. // violated.
  582. static bool
  583. strict_ischar(int ch)
  584. {
  585. return ch >= 0 && ch <= 255;
  586. }
  587. static bool
  588. isalnum(int ch)
  589. {
  590. BOOST_ASSERT(strict_ischar(ch));
  591. return (iso8859_1_char_types[ch] & BOOST_CC_ALPHA)
  592. || (iso8859_1_char_types[ch] & BOOST_CC_DIGIT);
  593. }
  594. static bool
  595. isalpha(int ch)
  596. {
  597. BOOST_ASSERT(strict_ischar(ch));
  598. return (iso8859_1_char_types[ch] & BOOST_CC_ALPHA) != 0;
  599. }
  600. static bool
  601. isdigit(int ch)
  602. {
  603. BOOST_ASSERT(strict_ischar(ch));
  604. return (iso8859_1_char_types[ch] & BOOST_CC_DIGIT) != 0;
  605. }
  606. static bool
  607. isxdigit(int ch)
  608. {
  609. BOOST_ASSERT(strict_ischar(ch));
  610. return (iso8859_1_char_types[ch] & BOOST_CC_XDIGIT) != 0;
  611. }
  612. static bool
  613. iscntrl(int ch)
  614. {
  615. BOOST_ASSERT(strict_ischar(ch));
  616. return (iso8859_1_char_types[ch] & BOOST_CC_CTRL) != 0;
  617. }
  618. static bool
  619. isgraph(int ch)
  620. {
  621. return ('\x21' <= ch && ch <= '\x7e') || ('\xa1' <= ch && ch <= '\xff');
  622. }
  623. static bool
  624. islower(int ch)
  625. {
  626. BOOST_ASSERT(strict_ischar(ch));
  627. return (iso8859_1_char_types[ch] & BOOST_CC_LOWER) != 0;
  628. }
  629. static bool
  630. isprint(int ch)
  631. {
  632. return ('\x20' <= ch && ch <= '\x7e') || ('\xa0' <= ch && ch <= '\xff');
  633. }
  634. static bool
  635. ispunct(int ch)
  636. {
  637. BOOST_ASSERT(strict_ischar(ch));
  638. return (iso8859_1_char_types[ch] & BOOST_CC_PUNCT) != 0;
  639. }
  640. static bool
  641. isspace(int ch)
  642. {
  643. BOOST_ASSERT(strict_ischar(ch));
  644. return (iso8859_1_char_types[ch] & BOOST_CC_SPACE) != 0;
  645. }
  646. static bool
  647. isblank BOOST_PREVENT_MACRO_SUBSTITUTION (int ch)
  648. {
  649. BOOST_ASSERT(strict_ischar(ch));
  650. return ('\x09' == ch || '\x20' == ch || '\xa0' == ch);
  651. }
  652. static bool
  653. isupper(int ch)
  654. {
  655. BOOST_ASSERT(strict_ischar(ch));
  656. return (iso8859_1_char_types[ch] & BOOST_CC_UPPER) != 0;
  657. }
  658. ///////////////////////////////////////////////////////////////////////////
  659. // Simple character conversions
  660. ///////////////////////////////////////////////////////////////////////////
  661. static int
  662. tolower(int ch)
  663. {
  664. BOOST_ASSERT(strict_ischar(ch));
  665. return isupper(ch) && '\0' != iso8859_1_char_conversion[ch] ?
  666. iso8859_1_char_conversion[ch] : ch;
  667. }
  668. static int
  669. toupper(int ch)
  670. {
  671. BOOST_ASSERT(strict_ischar(ch));
  672. return islower(ch) && '\0' != iso8859_1_char_conversion[ch] ?
  673. iso8859_1_char_conversion[ch] : ch;
  674. }
  675. static ::boost::uint32_t
  676. toucs4(int ch)
  677. {
  678. // The first 256 characters in Unicode and the UCS are
  679. // identical to those in ISO/IEC-8859-1.
  680. BOOST_ASSERT(strict_ischar(ch));
  681. return ch;
  682. }
  683. };
  684. }}}
  685. ///////////////////////////////////////////////////////////////////////////////
  686. // undefine macros
  687. ///////////////////////////////////////////////////////////////////////////////
  688. #undef BOOST_CC_DIGIT
  689. #undef BOOST_CC_XDIGIT
  690. #undef BOOST_CC_ALPHA
  691. #undef BOOST_CC_CTRL
  692. #undef BOOST_CC_LOWER
  693. #undef BOOST_CC_UPPER
  694. #undef BOOST_CC_PUNCT
  695. #undef BOOST_CC_SPACE
  696. #endif