unicode.hpp 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340
  1. /*=============================================================================
  2. Copyright (c) 2001-2011 Hartmut Kaiser
  3. Copyright (c) 2001-2011 Joel de Guzman
  4. Distributed under the Boost Software License, Version 1.0. (See accompanying
  5. file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
  6. =============================================================================*/
  7. #if !defined(BOOST_SPIRIT_UNICODE_1_JANUARY_12_2010_0728PM)
  8. #define BOOST_SPIRIT_UNICODE_1_JANUARY_12_2010_0728PM
  9. #if defined(_MSC_VER)
  10. #pragma once
  11. #endif
  12. #include <boost/cstdint.hpp>
  13. #include <boost/spirit/home/support/char_encoding/unicode/query.hpp>
  14. namespace boost { namespace spirit { namespace char_encoding
  15. {
  16. ///////////////////////////////////////////////////////////////////////////
  17. // Test characters for specified conditions (using iso8859-1)
  18. ///////////////////////////////////////////////////////////////////////////
  19. struct unicode
  20. {
  21. typedef ::boost::uint32_t char_type;
  22. typedef ::boost::uint32_t classify_type;
  23. ///////////////////////////////////////////////////////////////////////////
  24. // Posix stuff
  25. ///////////////////////////////////////////////////////////////////////////
  26. static bool
  27. isascii_(char_type ch)
  28. {
  29. return 0 == (ch & ~0x7f);
  30. }
  31. static bool
  32. ischar(char_type ch)
  33. {
  34. // unicode code points in the range 0x00 to 0x10FFFF
  35. return ch <= 0x10FFFF;
  36. }
  37. static bool
  38. isalnum(char_type ch)
  39. {
  40. return ucd::is_alphanumeric(ch);
  41. }
  42. static bool
  43. isalpha(char_type ch)
  44. {
  45. return ucd::is_alphabetic(ch);
  46. }
  47. static bool
  48. isdigit(char_type ch)
  49. {
  50. return ucd::is_decimal_number(ch);
  51. }
  52. static bool
  53. isxdigit(char_type ch)
  54. {
  55. return ucd::is_hex_digit(ch);
  56. }
  57. static bool
  58. iscntrl(char_type ch)
  59. {
  60. return ucd::is_control(ch);
  61. }
  62. static bool
  63. isgraph(char_type ch)
  64. {
  65. return ucd::is_graph(ch);
  66. }
  67. static bool
  68. islower(char_type ch)
  69. {
  70. return ucd::is_lowercase(ch);
  71. }
  72. static bool
  73. isprint(char_type ch)
  74. {
  75. return ucd::is_print(ch);
  76. }
  77. static bool
  78. ispunct(char_type ch)
  79. {
  80. return ucd::is_punctuation(ch);
  81. }
  82. static bool
  83. isspace(char_type ch)
  84. {
  85. return ucd::is_white_space(ch);
  86. }
  87. static bool
  88. isblank BOOST_PREVENT_MACRO_SUBSTITUTION (char_type ch)
  89. {
  90. return ucd::is_blank(ch);
  91. }
  92. static bool
  93. isupper(char_type ch)
  94. {
  95. return ucd::is_uppercase(ch);
  96. }
  97. ///////////////////////////////////////////////////////////////////////////
  98. // Simple character conversions
  99. ///////////////////////////////////////////////////////////////////////////
  100. static char_type
  101. tolower(char_type ch)
  102. {
  103. return ucd::to_lowercase(ch);
  104. }
  105. static char_type
  106. toupper(char_type ch)
  107. {
  108. return ucd::to_uppercase(ch);
  109. }
  110. static ::boost::uint32_t
  111. toucs4(char_type ch)
  112. {
  113. return ch;
  114. }
  115. ///////////////////////////////////////////////////////////////////////////
  116. // Major Categories
  117. ///////////////////////////////////////////////////////////////////////////
  118. #define BOOST_SPIRIT_MAJOR_CATEGORY(name) \
  119. static bool \
  120. is_##name(char_type ch) \
  121. { \
  122. return ucd::get_major_category(ch) == ucd::properties::name; \
  123. } \
  124. /***/
  125. BOOST_SPIRIT_MAJOR_CATEGORY(letter)
  126. BOOST_SPIRIT_MAJOR_CATEGORY(mark)
  127. BOOST_SPIRIT_MAJOR_CATEGORY(number)
  128. BOOST_SPIRIT_MAJOR_CATEGORY(separator)
  129. BOOST_SPIRIT_MAJOR_CATEGORY(other)
  130. BOOST_SPIRIT_MAJOR_CATEGORY(punctuation)
  131. BOOST_SPIRIT_MAJOR_CATEGORY(symbol)
  132. ///////////////////////////////////////////////////////////////////////////
  133. // General Categories
  134. ///////////////////////////////////////////////////////////////////////////
  135. #define BOOST_SPIRIT_CATEGORY(name) \
  136. static bool \
  137. is_##name(char_type ch) \
  138. { \
  139. return ucd::get_category(ch) == ucd::properties::name; \
  140. } \
  141. /***/
  142. BOOST_SPIRIT_CATEGORY(uppercase_letter)
  143. BOOST_SPIRIT_CATEGORY(lowercase_letter)
  144. BOOST_SPIRIT_CATEGORY(titlecase_letter)
  145. BOOST_SPIRIT_CATEGORY(modifier_letter)
  146. BOOST_SPIRIT_CATEGORY(other_letter)
  147. BOOST_SPIRIT_CATEGORY(nonspacing_mark)
  148. BOOST_SPIRIT_CATEGORY(enclosing_mark)
  149. BOOST_SPIRIT_CATEGORY(spacing_mark)
  150. BOOST_SPIRIT_CATEGORY(decimal_number)
  151. BOOST_SPIRIT_CATEGORY(letter_number)
  152. BOOST_SPIRIT_CATEGORY(other_number)
  153. BOOST_SPIRIT_CATEGORY(space_separator)
  154. BOOST_SPIRIT_CATEGORY(line_separator)
  155. BOOST_SPIRIT_CATEGORY(paragraph_separator)
  156. BOOST_SPIRIT_CATEGORY(control)
  157. BOOST_SPIRIT_CATEGORY(format)
  158. BOOST_SPIRIT_CATEGORY(private_use)
  159. BOOST_SPIRIT_CATEGORY(surrogate)
  160. BOOST_SPIRIT_CATEGORY(unassigned)
  161. BOOST_SPIRIT_CATEGORY(dash_punctuation)
  162. BOOST_SPIRIT_CATEGORY(open_punctuation)
  163. BOOST_SPIRIT_CATEGORY(close_punctuation)
  164. BOOST_SPIRIT_CATEGORY(connector_punctuation)
  165. BOOST_SPIRIT_CATEGORY(other_punctuation)
  166. BOOST_SPIRIT_CATEGORY(initial_punctuation)
  167. BOOST_SPIRIT_CATEGORY(final_punctuation)
  168. BOOST_SPIRIT_CATEGORY(math_symbol)
  169. BOOST_SPIRIT_CATEGORY(currency_symbol)
  170. BOOST_SPIRIT_CATEGORY(modifier_symbol)
  171. BOOST_SPIRIT_CATEGORY(other_symbol)
  172. ///////////////////////////////////////////////////////////////////////////
  173. // Derived Categories
  174. ///////////////////////////////////////////////////////////////////////////
  175. #define BOOST_SPIRIT_DERIVED_CATEGORY(name) \
  176. static bool \
  177. is_##name(char_type ch) \
  178. { \
  179. return ucd::is_##name(ch); \
  180. } \
  181. /***/
  182. BOOST_SPIRIT_DERIVED_CATEGORY(alphabetic)
  183. BOOST_SPIRIT_DERIVED_CATEGORY(uppercase)
  184. BOOST_SPIRIT_DERIVED_CATEGORY(lowercase)
  185. BOOST_SPIRIT_DERIVED_CATEGORY(white_space)
  186. BOOST_SPIRIT_DERIVED_CATEGORY(hex_digit)
  187. BOOST_SPIRIT_DERIVED_CATEGORY(noncharacter_code_point)
  188. BOOST_SPIRIT_DERIVED_CATEGORY(default_ignorable_code_point)
  189. ///////////////////////////////////////////////////////////////////////////
  190. // Scripts
  191. ///////////////////////////////////////////////////////////////////////////
  192. #define BOOST_SPIRIT_SCRIPT(name) \
  193. static bool \
  194. is_##name(char_type ch) \
  195. { \
  196. return ucd::get_script(ch) == ucd::properties::name; \
  197. } \
  198. /***/
  199. BOOST_SPIRIT_SCRIPT(arabic)
  200. BOOST_SPIRIT_SCRIPT(imperial_aramaic)
  201. BOOST_SPIRIT_SCRIPT(armenian)
  202. BOOST_SPIRIT_SCRIPT(avestan)
  203. BOOST_SPIRIT_SCRIPT(balinese)
  204. BOOST_SPIRIT_SCRIPT(bamum)
  205. BOOST_SPIRIT_SCRIPT(bengali)
  206. BOOST_SPIRIT_SCRIPT(bopomofo)
  207. BOOST_SPIRIT_SCRIPT(braille)
  208. BOOST_SPIRIT_SCRIPT(buginese)
  209. BOOST_SPIRIT_SCRIPT(buhid)
  210. BOOST_SPIRIT_SCRIPT(canadian_aboriginal)
  211. BOOST_SPIRIT_SCRIPT(carian)
  212. BOOST_SPIRIT_SCRIPT(cham)
  213. BOOST_SPIRIT_SCRIPT(cherokee)
  214. BOOST_SPIRIT_SCRIPT(coptic)
  215. BOOST_SPIRIT_SCRIPT(cypriot)
  216. BOOST_SPIRIT_SCRIPT(cyrillic)
  217. BOOST_SPIRIT_SCRIPT(devanagari)
  218. BOOST_SPIRIT_SCRIPT(deseret)
  219. BOOST_SPIRIT_SCRIPT(egyptian_hieroglyphs)
  220. BOOST_SPIRIT_SCRIPT(ethiopic)
  221. BOOST_SPIRIT_SCRIPT(georgian)
  222. BOOST_SPIRIT_SCRIPT(glagolitic)
  223. BOOST_SPIRIT_SCRIPT(gothic)
  224. BOOST_SPIRIT_SCRIPT(greek)
  225. BOOST_SPIRIT_SCRIPT(gujarati)
  226. BOOST_SPIRIT_SCRIPT(gurmukhi)
  227. BOOST_SPIRIT_SCRIPT(hangul)
  228. BOOST_SPIRIT_SCRIPT(han)
  229. BOOST_SPIRIT_SCRIPT(hanunoo)
  230. BOOST_SPIRIT_SCRIPT(hebrew)
  231. BOOST_SPIRIT_SCRIPT(hiragana)
  232. BOOST_SPIRIT_SCRIPT(katakana_or_hiragana)
  233. BOOST_SPIRIT_SCRIPT(old_italic)
  234. BOOST_SPIRIT_SCRIPT(javanese)
  235. BOOST_SPIRIT_SCRIPT(kayah_li)
  236. BOOST_SPIRIT_SCRIPT(katakana)
  237. BOOST_SPIRIT_SCRIPT(kharoshthi)
  238. BOOST_SPIRIT_SCRIPT(khmer)
  239. BOOST_SPIRIT_SCRIPT(kannada)
  240. BOOST_SPIRIT_SCRIPT(kaithi)
  241. BOOST_SPIRIT_SCRIPT(tai_tham)
  242. BOOST_SPIRIT_SCRIPT(lao)
  243. BOOST_SPIRIT_SCRIPT(latin)
  244. BOOST_SPIRIT_SCRIPT(lepcha)
  245. BOOST_SPIRIT_SCRIPT(limbu)
  246. BOOST_SPIRIT_SCRIPT(linear_b)
  247. BOOST_SPIRIT_SCRIPT(lisu)
  248. BOOST_SPIRIT_SCRIPT(lycian)
  249. BOOST_SPIRIT_SCRIPT(lydian)
  250. BOOST_SPIRIT_SCRIPT(malayalam)
  251. BOOST_SPIRIT_SCRIPT(mongolian)
  252. BOOST_SPIRIT_SCRIPT(meetei_mayek)
  253. BOOST_SPIRIT_SCRIPT(myanmar)
  254. BOOST_SPIRIT_SCRIPT(nko)
  255. BOOST_SPIRIT_SCRIPT(ogham)
  256. BOOST_SPIRIT_SCRIPT(ol_chiki)
  257. BOOST_SPIRIT_SCRIPT(old_turkic)
  258. BOOST_SPIRIT_SCRIPT(oriya)
  259. BOOST_SPIRIT_SCRIPT(osmanya)
  260. BOOST_SPIRIT_SCRIPT(phags_pa)
  261. BOOST_SPIRIT_SCRIPT(inscriptional_pahlavi)
  262. BOOST_SPIRIT_SCRIPT(phoenician)
  263. BOOST_SPIRIT_SCRIPT(inscriptional_parthian)
  264. BOOST_SPIRIT_SCRIPT(rejang)
  265. BOOST_SPIRIT_SCRIPT(runic)
  266. BOOST_SPIRIT_SCRIPT(samaritan)
  267. BOOST_SPIRIT_SCRIPT(old_south_arabian)
  268. BOOST_SPIRIT_SCRIPT(saurashtra)
  269. BOOST_SPIRIT_SCRIPT(shavian)
  270. BOOST_SPIRIT_SCRIPT(sinhala)
  271. BOOST_SPIRIT_SCRIPT(sundanese)
  272. BOOST_SPIRIT_SCRIPT(syloti_nagri)
  273. BOOST_SPIRIT_SCRIPT(syriac)
  274. BOOST_SPIRIT_SCRIPT(tagbanwa)
  275. BOOST_SPIRIT_SCRIPT(tai_le)
  276. BOOST_SPIRIT_SCRIPT(new_tai_lue)
  277. BOOST_SPIRIT_SCRIPT(tamil)
  278. BOOST_SPIRIT_SCRIPT(tai_viet)
  279. BOOST_SPIRIT_SCRIPT(telugu)
  280. BOOST_SPIRIT_SCRIPT(tifinagh)
  281. BOOST_SPIRIT_SCRIPT(tagalog)
  282. BOOST_SPIRIT_SCRIPT(thaana)
  283. BOOST_SPIRIT_SCRIPT(thai)
  284. BOOST_SPIRIT_SCRIPT(tibetan)
  285. BOOST_SPIRIT_SCRIPT(ugaritic)
  286. BOOST_SPIRIT_SCRIPT(vai)
  287. BOOST_SPIRIT_SCRIPT(old_persian)
  288. BOOST_SPIRIT_SCRIPT(cuneiform)
  289. BOOST_SPIRIT_SCRIPT(yi)
  290. BOOST_SPIRIT_SCRIPT(inherited)
  291. BOOST_SPIRIT_SCRIPT(common)
  292. BOOST_SPIRIT_SCRIPT(unknown)
  293. #undef BOOST_SPIRIT_MAJOR_CATEGORY
  294. #undef BOOST_SPIRIT_CATEGORY
  295. #undef BOOST_SPIRIT_DERIVED_CATEGORY
  296. #undef BOOST_SPIRIT_SCRIPT
  297. };
  298. }}}
  299. #endif