token_omit.cpp 8.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254
  1. // Copyright (c) 2001-2011 Hartmut Kaiser
  2. //
  3. // Distributed under the Boost Software License, Version 1.0. (See accompanying
  4. // file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
  5. #include <boost/config/warning_disable.hpp>
  6. #include <boost/detail/lightweight_test.hpp>
  7. #include <boost/spirit/include/lex_lexertl.hpp>
  8. #include <boost/spirit/include/lex_lexertl_position_token.hpp>
  9. #include <boost/spirit/include/phoenix_object.hpp>
  10. #include <boost/spirit/include/phoenix_operator.hpp>
  11. #include <boost/spirit/include/phoenix_container.hpp>
  12. namespace lex = boost::spirit::lex;
  13. namespace phoenix = boost::phoenix;
  14. namespace mpl = boost::mpl;
  15. ///////////////////////////////////////////////////////////////////////////////
  16. enum tokenids
  17. {
  18. ID_INT = 1000,
  19. ID_DOUBLE
  20. };
  21. template <typename Lexer>
  22. struct token_definitions : lex::lexer<Lexer>
  23. {
  24. token_definitions()
  25. {
  26. this->self.add_pattern("HEXDIGIT", "[0-9a-fA-F]");
  27. this->self.add_pattern("OCTALDIGIT", "[0-7]");
  28. this->self.add_pattern("DIGIT", "[0-9]");
  29. this->self.add_pattern("OPTSIGN", "[-+]?");
  30. this->self.add_pattern("EXPSTART", "[eE][-+]");
  31. this->self.add_pattern("EXPONENT", "[eE]{OPTSIGN}{DIGIT}+");
  32. // define tokens and associate them with the lexer
  33. int_ = "(0x|0X){HEXDIGIT}+|0{OCTALDIGIT}*|{OPTSIGN}[1-9]{DIGIT}*";
  34. int_.id(ID_INT);
  35. double_ = "{OPTSIGN}({DIGIT}*\\.{DIGIT}+|{DIGIT}+\\.){EXPONENT}?|{DIGIT}+{EXPONENT}";
  36. double_.id(ID_DOUBLE);
  37. whitespace = "[ \t\n]+";
  38. this->self =
  39. double_
  40. | int_
  41. | whitespace[ lex::_pass = lex::pass_flags::pass_ignore ]
  42. ;
  43. }
  44. lex::token_def<lex::omit> int_;
  45. lex::token_def<lex::omit> double_;
  46. lex::token_def<lex::omit> whitespace;
  47. };
  48. template <typename Lexer>
  49. struct token_definitions_with_state : lex::lexer<Lexer>
  50. {
  51. token_definitions_with_state()
  52. {
  53. this->self.add_pattern("HEXDIGIT", "[0-9a-fA-F]");
  54. this->self.add_pattern("OCTALDIGIT", "[0-7]");
  55. this->self.add_pattern("DIGIT", "[0-9]");
  56. this->self.add_pattern("OPTSIGN", "[-+]?");
  57. this->self.add_pattern("EXPSTART", "[eE][-+]");
  58. this->self.add_pattern("EXPONENT", "[eE]{OPTSIGN}{DIGIT}+");
  59. this->self.add_state();
  60. this->self.add_state("INT");
  61. this->self.add_state("DOUBLE");
  62. // define tokens and associate them with the lexer
  63. int_ = "(0x|0X){HEXDIGIT}+|0{OCTALDIGIT}*|{OPTSIGN}[1-9]{DIGIT}*";
  64. int_.id(ID_INT);
  65. double_ = "{OPTSIGN}({DIGIT}*\\.{DIGIT}+|{DIGIT}+\\.){EXPONENT}?|{DIGIT}+{EXPONENT}";
  66. double_.id(ID_DOUBLE);
  67. whitespace = "[ \t\n]+";
  68. this->self("*") =
  69. double_ [ lex::_state = "DOUBLE"]
  70. | int_ [ lex::_state = "INT" ]
  71. | whitespace[ lex::_pass = lex::pass_flags::pass_ignore ]
  72. ;
  73. }
  74. lex::token_def<lex::omit> int_;
  75. lex::token_def<lex::omit> double_;
  76. lex::token_def<lex::omit> whitespace;
  77. };
  78. ///////////////////////////////////////////////////////////////////////////////
  79. template <typename Token>
  80. inline bool
  81. test_token_ids(int const* ids, std::vector<Token> const& tokens)
  82. {
  83. BOOST_FOREACH(Token const& t, tokens)
  84. {
  85. if (*ids == -1)
  86. return false; // reached end of expected data
  87. if (t.id() != static_cast<std::size_t>(*ids)) // token id must match
  88. return false;
  89. ++ids;
  90. }
  91. return (*ids == -1) ? true : false;
  92. }
  93. ///////////////////////////////////////////////////////////////////////////////
  94. template <typename Token>
  95. inline bool
  96. test_token_states(std::size_t const* states, std::vector<Token> const& tokens)
  97. {
  98. BOOST_FOREACH(Token const& t, tokens)
  99. {
  100. if (*states == std::size_t(-1))
  101. return false; // reached end of expected data
  102. if (t.state() != *states) // token state must match
  103. return false;
  104. ++states;
  105. }
  106. return (*states == std::size_t(-1)) ? true : false;
  107. }
  108. ///////////////////////////////////////////////////////////////////////////////
  109. struct position_type
  110. {
  111. std::size_t begin, end;
  112. };
  113. template <typename Iterator, typename Token>
  114. inline bool
  115. test_token_positions(Iterator begin, position_type const* positions,
  116. std::vector<Token> const& tokens)
  117. {
  118. BOOST_FOREACH(Token const& t, tokens)
  119. {
  120. if (positions->begin == std::size_t(-1) &&
  121. positions->end == std::size_t(-1))
  122. {
  123. return false; // reached end of expected data
  124. }
  125. boost::iterator_range<Iterator> matched = t.matched();
  126. std::size_t start = std::distance(begin, matched.begin());
  127. std::size_t end = std::distance(begin, matched.end());
  128. // position must match
  129. if (start != positions->begin || end != positions->end)
  130. return false;
  131. ++positions;
  132. }
  133. return (positions->begin == std::size_t(-1) &&
  134. positions->end == std::size_t(-1)) ? true : false;
  135. }
  136. ///////////////////////////////////////////////////////////////////////////////
  137. int main()
  138. {
  139. typedef std::string::iterator base_iterator_type;
  140. std::string input(" 01 1.2 -2 0x3 2.3e6 -3.4");
  141. int ids[] = { ID_INT, ID_DOUBLE, ID_INT, ID_INT, ID_DOUBLE, ID_DOUBLE, -1 };
  142. std::size_t states[] = { 0, 1, 2, 1, 1, 2, std::size_t(-1) };
  143. position_type positions[] =
  144. {
  145. { 1, 3 }, { 4, 7 }, { 8, 10 }, { 11, 14 }, { 15, 20 }, { 21, 25 },
  146. { std::size_t(-1), std::size_t(-1) }
  147. };
  148. // minimal token type: holds just token id, no state, no value
  149. {
  150. typedef lex::lexertl::token<
  151. base_iterator_type, lex::omit, mpl::false_> token_type;
  152. typedef lex::lexertl::actor_lexer<token_type> lexer_type;
  153. token_definitions<lexer_type> lexer;
  154. std::vector<token_type> tokens;
  155. base_iterator_type first = input.begin();
  156. using phoenix::arg_names::_1;
  157. BOOST_TEST(lex::tokenize(first, input.end(), lexer
  158. , phoenix::push_back(phoenix::ref(tokens), _1)));
  159. BOOST_TEST(test_token_ids(ids, tokens));
  160. }
  161. {
  162. typedef lex::lexertl::position_token<
  163. base_iterator_type, lex::omit, mpl::false_> token_type;
  164. typedef lex::lexertl::actor_lexer<token_type> lexer_type;
  165. token_definitions<lexer_type> lexer;
  166. std::vector<token_type> tokens;
  167. base_iterator_type first = input.begin();
  168. using phoenix::arg_names::_1;
  169. BOOST_TEST(lex::tokenize(first, input.end(), lexer
  170. , phoenix::push_back(phoenix::ref(tokens), _1)));
  171. BOOST_TEST(test_token_ids(ids, tokens));
  172. BOOST_TEST(test_token_positions(input.begin(), positions, tokens));
  173. }
  174. // minimal token type: holds just token id and state, no value
  175. {
  176. typedef lex::lexertl::token<
  177. base_iterator_type, lex::omit, mpl::true_> token_type;
  178. typedef lex::lexertl::actor_lexer<token_type> lexer_type;
  179. token_definitions_with_state<lexer_type> lexer;
  180. std::vector<token_type> tokens;
  181. base_iterator_type first = input.begin();
  182. using phoenix::arg_names::_1;
  183. BOOST_TEST(lex::tokenize(first, input.end(), lexer
  184. , phoenix::push_back(phoenix::ref(tokens), _1)));
  185. BOOST_TEST(test_token_ids(ids, tokens));
  186. BOOST_TEST(test_token_states(states, tokens));
  187. }
  188. {
  189. typedef lex::lexertl::position_token<
  190. base_iterator_type, lex::omit, mpl::true_> token_type;
  191. typedef lex::lexertl::actor_lexer<token_type> lexer_type;
  192. token_definitions_with_state<lexer_type> lexer;
  193. std::vector<token_type> tokens;
  194. base_iterator_type first = input.begin();
  195. using phoenix::arg_names::_1;
  196. BOOST_TEST(lex::tokenize(first, input.end(), lexer
  197. , phoenix::push_back(phoenix::ref(tokens), _1)));
  198. BOOST_TEST(test_token_ids(ids, tokens));
  199. BOOST_TEST(test_token_states(states, tokens));
  200. BOOST_TEST(test_token_positions(input.begin(), positions, tokens));
  201. }
  202. return boost::report_errors();
  203. }