char.hpp 21 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615
  1. /*=============================================================================
  2. Copyright (c) 2001-2011 Joel de Guzman
  3. Copyright (c) 2001-2011 Hartmut Kaiser
  4. Copyright (c) 2010 Bryce Lelbach
  5. Distributed under the Boost Software License, Version 1.0. (See accompanying
  6. file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
  7. ==============================================================================*/
  8. #if !defined(BOOST_SPIRIT_CHAR_APRIL_16_2006_1051AM)
  9. #define BOOST_SPIRIT_CHAR_APRIL_16_2006_1051AM
  10. #if defined(_MSC_VER)
  11. #pragma once
  12. #endif
  13. #include <boost/spirit/home/support/common_terminals.hpp>
  14. #include <boost/spirit/home/support/string_traits.hpp>
  15. #include <boost/spirit/home/support/info.hpp>
  16. #include <boost/spirit/home/support/detail/get_encoding.hpp>
  17. #include <boost/spirit/home/support/char_set/basic_chset.hpp>
  18. #include <boost/spirit/home/qi/char/char_parser.hpp>
  19. #include <boost/spirit/home/qi/char/char_class.hpp>
  20. #include <boost/spirit/home/qi/meta_compiler.hpp>
  21. #include <boost/spirit/home/qi/auxiliary/lazy.hpp>
  22. #include <boost/spirit/home/qi/detail/enable_lit.hpp>
  23. #include <boost/fusion/include/at.hpp>
  24. #include <boost/mpl/if.hpp>
  25. #include <boost/mpl/assert.hpp>
  26. #include <boost/mpl/identity.hpp>
  27. #include <boost/utility/enable_if.hpp>
  28. #include <boost/type_traits/remove_const.hpp>
  29. #include <string>
  30. #if defined(_MSC_VER)
  31. #pragma once
  32. #endif
  33. namespace boost { namespace spirit
  34. {
  35. ///////////////////////////////////////////////////////////////////////////
  36. // Enablers
  37. ///////////////////////////////////////////////////////////////////////////
  38. template <typename CharEncoding>
  39. struct use_terminal<qi::domain
  40. , terminal<
  41. tag::char_code<tag::char_, CharEncoding> // enables char_
  42. >
  43. > : mpl::true_ {};
  44. template <typename CharEncoding, typename A0>
  45. struct use_terminal<qi::domain
  46. , terminal_ex<
  47. tag::char_code<tag::char_, CharEncoding> // enables char_('x'), char_("x")
  48. , fusion::vector1<A0> // and char_("a-z0-9")
  49. >
  50. > : mpl::true_ {};
  51. template <typename CharEncoding, typename A0, typename A1>
  52. struct use_terminal<qi::domain
  53. , terminal_ex<
  54. tag::char_code<tag::char_, CharEncoding> // enables char_('a','z')
  55. , fusion::vector2<A0, A1>
  56. >
  57. > : mpl::true_ {};
  58. template <typename CharEncoding> // enables *lazy* char_('x'), char_("x")
  59. struct use_lazy_terminal< // and char_("a-z0-9")
  60. qi::domain
  61. , tag::char_code<tag::char_, CharEncoding>
  62. , 1 // arity
  63. > : mpl::true_ {};
  64. template <typename CharEncoding> // enables *lazy* char_('a','z')
  65. struct use_lazy_terminal<
  66. qi::domain
  67. , tag::char_code<tag::char_, CharEncoding>
  68. , 2 // arity
  69. > : mpl::true_ {};
  70. template <>
  71. struct use_terminal<qi::domain, char> // enables 'x'
  72. : mpl::true_ {};
  73. template <>
  74. struct use_terminal<qi::domain, char[2]> // enables "x"
  75. : mpl::true_ {};
  76. template <>
  77. struct use_terminal<qi::domain, wchar_t> // enables wchar_t
  78. : mpl::true_ {};
  79. template <>
  80. struct use_terminal<qi::domain, wchar_t[2]> // enables L"x"
  81. : mpl::true_ {};
  82. // enables lit(...)
  83. template <typename A0>
  84. struct use_terminal<qi::domain
  85. , terminal_ex<tag::lit, fusion::vector1<A0> >
  86. , typename enable_if<traits::is_char<A0> >::type>
  87. : mpl::true_ {};
  88. }}
  89. namespace boost { namespace spirit { namespace qi
  90. {
  91. #ifndef BOOST_SPIRIT_NO_PREDEFINED_TERMINALS
  92. using spirit::lit; // lit('x') is equivalent to 'x'
  93. #endif
  94. using spirit::lit_type;
  95. ///////////////////////////////////////////////////////////////////////////
  96. // Parser for a single character
  97. ///////////////////////////////////////////////////////////////////////////
  98. template <typename CharEncoding, bool no_attribute, bool no_case = false>
  99. struct literal_char
  100. : char_parser<
  101. literal_char<CharEncoding, no_attribute, false>
  102. , typename CharEncoding::char_type
  103. , typename mpl::if_c<no_attribute, unused_type
  104. , typename CharEncoding::char_type>::type>
  105. {
  106. typedef typename CharEncoding::char_type char_type;
  107. typedef CharEncoding char_encoding;
  108. template <typename Char>
  109. literal_char(Char ch_)
  110. : ch(static_cast<char_type>(ch_)) {}
  111. template <typename Context, typename Iterator>
  112. struct attribute
  113. {
  114. typedef typename mpl::if_c<
  115. no_attribute, unused_type, char_type>::type
  116. type;
  117. };
  118. template <typename CharParam, typename Context>
  119. bool test(CharParam ch_, Context&) const
  120. {
  121. return traits::ischar<CharParam, char_encoding>::call(ch_) &&
  122. ch == char_type(ch_);
  123. }
  124. template <typename Context>
  125. info what(Context& /*context*/) const
  126. {
  127. return info("literal-char", char_encoding::toucs4(ch));
  128. }
  129. char_type ch;
  130. };
  131. template <typename CharEncoding, bool no_attribute>
  132. struct literal_char<CharEncoding, no_attribute, true> // case insensitive
  133. : char_parser<
  134. literal_char<CharEncoding, no_attribute, true>
  135. , typename mpl::if_c<no_attribute, unused_type
  136. , typename CharEncoding::char_type>::type>
  137. {
  138. typedef typename CharEncoding::char_type char_type;
  139. typedef CharEncoding char_encoding;
  140. literal_char(char_type ch)
  141. : lo(static_cast<char_type>(char_encoding::tolower(ch)))
  142. , hi(static_cast<char_type>(char_encoding::toupper(ch))) {}
  143. template <typename Context, typename Iterator>
  144. struct attribute
  145. {
  146. typedef typename mpl::if_c<
  147. no_attribute, unused_type, char_type>::type
  148. type;
  149. };
  150. template <typename CharParam, typename Context>
  151. bool test(CharParam ch_, Context&) const
  152. {
  153. if (!traits::ischar<CharParam, char_encoding>::call(ch_))
  154. return false;
  155. char_type ch = char_type(ch_); // optimize for token based parsing
  156. return this->lo == ch || this->hi == ch;
  157. }
  158. template <typename Context>
  159. info what(Context& /*context*/) const
  160. {
  161. return info("no-case-literal-char", char_encoding::toucs4(lo));
  162. }
  163. char_type lo, hi;
  164. };
  165. ///////////////////////////////////////////////////////////////////////////
  166. // Parser for a character range
  167. ///////////////////////////////////////////////////////////////////////////
  168. template <typename CharEncoding, bool no_case = false>
  169. struct char_range
  170. : char_parser<char_range<CharEncoding, false>, typename CharEncoding::char_type>
  171. {
  172. typedef typename CharEncoding::char_type char_type;
  173. typedef CharEncoding char_encoding;
  174. char_range(char_type from_, char_type to_)
  175. : from(from_), to(to_) {}
  176. template <typename CharParam, typename Context>
  177. bool test(CharParam ch_, Context&) const
  178. {
  179. if (!traits::ischar<CharParam, char_encoding>::call(ch_))
  180. return false;
  181. char_type ch = char_type(ch_); // optimize for token based parsing
  182. return !(ch < from) && !(to < ch);
  183. }
  184. template <typename Context>
  185. info what(Context& /*context*/) const
  186. {
  187. info result("char-range", char_encoding::toucs4(from));
  188. boost::get<std::string>(result.value) += '-';
  189. boost::get<std::string>(result.value) += to_utf8(char_encoding::toucs4(to));
  190. return result;
  191. }
  192. char_type from, to;
  193. };
  194. template <typename CharEncoding>
  195. struct char_range<CharEncoding, true> // case insensitive
  196. : char_parser<char_range<CharEncoding, true>, typename CharEncoding::char_type>
  197. {
  198. typedef typename CharEncoding::char_type char_type;
  199. typedef CharEncoding char_encoding;
  200. char_range(char_type from, char_type to)
  201. : from_lo(static_cast<char_type>(char_encoding::tolower(from)))
  202. , to_lo(static_cast<char_type>(char_encoding::tolower(to)))
  203. , from_hi(static_cast<char_type>(char_encoding::toupper(from)))
  204. , to_hi(static_cast<char_type>(char_encoding::toupper(to)))
  205. {}
  206. template <typename CharParam, typename Context>
  207. bool test(CharParam ch_, Context&) const
  208. {
  209. if (!traits::ischar<CharParam, char_encoding>::call(ch_))
  210. return false;
  211. char_type ch = char_type(ch_); // optimize for token based parsing
  212. return (!(ch < from_lo) && !(to_lo < ch))
  213. || (!(ch < from_hi) && !(to_hi < ch))
  214. ;
  215. }
  216. template <typename Context>
  217. info what(Context& /*context*/) const
  218. {
  219. info result("no-case-char-range", char_encoding::toucs4(from_lo));
  220. boost::get<std::string>(result.value) += '-';
  221. boost::get<std::string>(result.value) += to_utf8(char_encoding::toucs4(to_lo));
  222. return result;
  223. }
  224. char_type from_lo, to_lo, from_hi, to_hi;
  225. };
  226. ///////////////////////////////////////////////////////////////////////////
  227. // Parser for a character set
  228. ///////////////////////////////////////////////////////////////////////////
  229. template <typename CharEncoding, bool no_attribute, bool no_case = false>
  230. struct char_set
  231. : char_parser<char_set<CharEncoding, no_attribute, false>
  232. , typename mpl::if_c<no_attribute, unused_type
  233. , typename CharEncoding::char_type>::type>
  234. {
  235. typedef typename CharEncoding::char_type char_type;
  236. typedef CharEncoding char_encoding;
  237. template <typename String>
  238. char_set(String const& str)
  239. {
  240. using spirit::detail::cast_char;
  241. typedef typename
  242. remove_const<
  243. typename traits::char_type_of<String>::type
  244. >::type
  245. in_type;
  246. BOOST_SPIRIT_ASSERT_MSG((
  247. (sizeof(char_type) >= sizeof(in_type))
  248. ), cannot_convert_string, (String));
  249. in_type const* definition =
  250. (in_type const*)traits::get_c_string(str);
  251. in_type ch = *definition++;
  252. while (ch)
  253. {
  254. in_type next = *definition++;
  255. if (next == '-')
  256. {
  257. next = *definition++;
  258. if (next == 0)
  259. {
  260. chset.set(cast_char<char_type>(ch));
  261. chset.set('-');
  262. break;
  263. }
  264. chset.set(
  265. cast_char<char_type>(ch),
  266. cast_char<char_type>(next)
  267. );
  268. }
  269. else
  270. {
  271. chset.set(cast_char<char_type>(ch));
  272. }
  273. ch = next;
  274. }
  275. }
  276. template <typename CharParam, typename Context>
  277. bool test(CharParam ch, Context&) const
  278. {
  279. return traits::ischar<CharParam, char_encoding>::call(ch) &&
  280. chset.test(char_type(ch));
  281. }
  282. template <typename Context>
  283. info what(Context& /*context*/) const
  284. {
  285. return info("char-set");
  286. }
  287. support::detail::basic_chset<char_type> chset;
  288. };
  289. template <typename CharEncoding, bool no_attribute>
  290. struct char_set<CharEncoding, no_attribute, true> // case insensitive
  291. : char_parser<char_set<CharEncoding, no_attribute, true>
  292. , typename mpl::if_c<no_attribute, unused_type
  293. , typename CharEncoding::char_type>::type>
  294. {
  295. typedef typename CharEncoding::char_type char_type;
  296. typedef CharEncoding char_encoding;
  297. template <typename String>
  298. char_set(String const& str)
  299. {
  300. typedef typename traits::char_type_of<String>::type in_type;
  301. BOOST_SPIRIT_ASSERT_MSG((
  302. (sizeof(char_type) == sizeof(in_type))
  303. ), cannot_convert_string, (String));
  304. char_type const* definition =
  305. (char_type const*)traits::get_c_string(str);
  306. char_type ch = *definition++;
  307. while (ch)
  308. {
  309. char_type next = *definition++;
  310. if (next == '-')
  311. {
  312. next = *definition++;
  313. if (next == 0)
  314. {
  315. chset.set(static_cast<char_type>(CharEncoding::tolower(ch)));
  316. chset.set(static_cast<char_type>(CharEncoding::toupper(ch)));
  317. chset.set('-');
  318. break;
  319. }
  320. chset.set(static_cast<char_type>(CharEncoding::tolower(ch))
  321. , static_cast<char_type>(CharEncoding::tolower(next)));
  322. chset.set(static_cast<char_type>(CharEncoding::toupper(ch))
  323. , static_cast<char_type>(CharEncoding::toupper(next)));
  324. }
  325. else
  326. {
  327. chset.set(static_cast<char_type>(CharEncoding::tolower(ch)));
  328. chset.set(static_cast<char_type>(CharEncoding::toupper(ch)));
  329. }
  330. ch = next;
  331. }
  332. }
  333. template <typename CharParam, typename Context>
  334. bool test(CharParam ch, Context&) const
  335. {
  336. return traits::ischar<CharParam, char_encoding>::call(ch) &&
  337. chset.test(char_type(ch));
  338. }
  339. template <typename Context>
  340. info what(Context& /*context*/) const
  341. {
  342. return info("no-case-char-set");
  343. }
  344. support::detail::basic_chset<char_type> chset;
  345. };
  346. ///////////////////////////////////////////////////////////////////////////
  347. // Parser generators: make_xxx function (objects)
  348. ///////////////////////////////////////////////////////////////////////////
  349. namespace detail
  350. {
  351. template <typename Modifiers, typename Encoding>
  352. struct basic_literal
  353. {
  354. static bool const no_case =
  355. has_modifier<
  356. Modifiers
  357. , tag::char_code_base<tag::no_case>
  358. >::value;
  359. static bool const no_attr =
  360. !has_modifier<
  361. Modifiers
  362. , tag::lazy_eval
  363. >::value;
  364. typedef literal_char<
  365. typename spirit::detail::get_encoding_with_case<
  366. Modifiers, Encoding, no_case>::type
  367. , no_attr
  368. , no_case>
  369. result_type;
  370. template <typename Char>
  371. result_type operator()(Char ch, unused_type) const
  372. {
  373. return result_type(ch);
  374. }
  375. template <typename Char>
  376. result_type operator()(Char const* str, unused_type) const
  377. {
  378. return result_type(str[0]);
  379. }
  380. };
  381. }
  382. template <typename Modifiers>
  383. struct make_primitive<char, Modifiers>
  384. : detail::basic_literal<Modifiers, char_encoding::standard> {};
  385. template <typename Modifiers>
  386. struct make_primitive<char const(&)[2], Modifiers>
  387. : detail::basic_literal<Modifiers, char_encoding::standard> {};
  388. template <typename Modifiers>
  389. struct make_primitive<wchar_t, Modifiers>
  390. : detail::basic_literal<Modifiers, char_encoding::standard_wide> {};
  391. template <typename Modifiers>
  392. struct make_primitive<wchar_t const(&)[2], Modifiers>
  393. : detail::basic_literal<Modifiers, char_encoding::standard_wide> {};
  394. template <typename CharEncoding, typename Modifiers>
  395. struct make_primitive<
  396. terminal<tag::char_code<tag::char_, CharEncoding> >, Modifiers>
  397. {
  398. typedef typename
  399. spirit::detail::get_encoding<Modifiers, CharEncoding>::type
  400. char_encoding;
  401. typedef tag::char_code<tag::char_, char_encoding> tag;
  402. typedef char_class<tag> result_type;
  403. result_type operator()(unused_type, unused_type) const
  404. {
  405. return result_type();
  406. }
  407. };
  408. ///////////////////////////////////////////////////////////////////////////
  409. // char_('x')
  410. template <typename CharEncoding, typename Modifiers, typename A0>
  411. struct make_primitive<
  412. terminal_ex<
  413. tag::char_code<tag::char_, CharEncoding>
  414. , fusion::vector1<A0> >
  415. , Modifiers>
  416. {
  417. static bool const no_case =
  418. has_modifier<Modifiers, tag::char_code_base<tag::no_case> >::value;
  419. typedef typename
  420. spirit::detail::get_encoding<Modifiers, CharEncoding>::type
  421. char_encoding;
  422. typedef typename
  423. mpl::if_<
  424. traits::is_string<A0>
  425. , char_set<char_encoding, false, no_case>
  426. , literal_char<char_encoding, false, no_case>
  427. >::type
  428. result_type;
  429. template <typename Terminal>
  430. result_type operator()(Terminal const& term, unused_type) const
  431. {
  432. return result_type(fusion::at_c<0>(term.args));
  433. }
  434. };
  435. // lit('x')
  436. template <typename Modifiers, typename A0>
  437. struct make_primitive<
  438. terminal_ex<tag::lit, fusion::vector1<A0> >
  439. , Modifiers
  440. , typename enable_if<traits::is_char<A0> >::type>
  441. {
  442. static bool const no_case =
  443. has_modifier<
  444. Modifiers
  445. , tag::char_code_base<tag::no_case>
  446. >::value;
  447. typedef typename traits::char_encoding_from_char<
  448. typename traits::char_type_of<A0>::type>::type encoding;
  449. typedef literal_char<
  450. typename spirit::detail::get_encoding_with_case<
  451. Modifiers, encoding, no_case>::type
  452. , true, no_case>
  453. result_type;
  454. template <typename Terminal>
  455. result_type operator()(Terminal const& term, unused_type) const
  456. {
  457. return result_type(fusion::at_c<0>(term.args));
  458. }
  459. };
  460. ///////////////////////////////////////////////////////////////////////////
  461. template <typename CharEncoding, typename Modifiers, typename Char>
  462. struct make_primitive<
  463. terminal_ex<
  464. tag::char_code<tag::char_, CharEncoding>
  465. , fusion::vector1<Char(&)[2]> // For single char strings
  466. >
  467. , Modifiers>
  468. {
  469. static bool const no_case =
  470. has_modifier<Modifiers, tag::char_code_base<tag::no_case> >::value;
  471. typedef typename
  472. spirit::detail::get_encoding<Modifiers, CharEncoding>::type
  473. char_encoding;
  474. typedef literal_char<char_encoding, false, no_case> result_type;
  475. template <typename Terminal>
  476. result_type operator()(Terminal const& term, unused_type) const
  477. {
  478. return result_type(fusion::at_c<0>(term.args)[0]);
  479. }
  480. };
  481. template <typename CharEncoding, typename Modifiers, typename A0, typename A1>
  482. struct make_primitive<
  483. terminal_ex<
  484. tag::char_code<tag::char_, CharEncoding>
  485. , fusion::vector2<A0, A1>
  486. >
  487. , Modifiers>
  488. {
  489. static bool const no_case =
  490. has_modifier<Modifiers, tag::char_code_base<tag::no_case> >::value;
  491. typedef typename
  492. spirit::detail::get_encoding<Modifiers, CharEncoding>::type
  493. char_encoding;
  494. typedef char_range<char_encoding, no_case> result_type;
  495. template <typename Terminal>
  496. result_type operator()(Terminal const& term, unused_type) const
  497. {
  498. return result_type(
  499. fusion::at_c<0>(term.args)
  500. , fusion::at_c<1>(term.args)
  501. );
  502. }
  503. };
  504. template <typename CharEncoding, typename Modifiers, typename Char>
  505. struct make_primitive<
  506. terminal_ex<
  507. tag::char_code<tag::char_, CharEncoding>
  508. , fusion::vector2<Char(&)[2], Char(&)[2]> // For single char strings
  509. >
  510. , Modifiers>
  511. {
  512. static bool const no_case =
  513. has_modifier<Modifiers, tag::char_code_base<tag::no_case> >::value;
  514. typedef typename
  515. spirit::detail::get_encoding<Modifiers, CharEncoding>::type
  516. char_encoding;
  517. typedef char_range<char_encoding, no_case> result_type;
  518. template <typename Terminal>
  519. result_type operator()(Terminal const& term, unused_type) const
  520. {
  521. return result_type(
  522. fusion::at_c<0>(term.args)[0]
  523. , fusion::at_c<1>(term.args)[0]
  524. );
  525. }
  526. };
  527. }}}
  528. #endif