tokenize_and_parse.hpp 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325
  1. // Copyright (c) 2001-2011 Hartmut Kaiser
  2. //
  3. // Distributed under the Boost Software License, Version 1.0. (See accompanying
  4. // file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
  5. #if !defined(BOOST_SPIRIT_LEXER_PARSE_NOV_17_2007_0246PM)
  6. #define BOOST_SPIRIT_LEXER_PARSE_NOV_17_2007_0246PM
  7. #if defined(_MSC_VER)
  8. #pragma once
  9. #endif
  10. #include <boost/spirit/home/qi/skip_over.hpp>
  11. #include <boost/spirit/home/qi/parse.hpp>
  12. #include <boost/spirit/home/qi/nonterminal/grammar.hpp>
  13. #include <boost/spirit/home/support/unused.hpp>
  14. #include <boost/spirit/home/lex/lexer.hpp>
  15. #include <boost/mpl/assert.hpp>
  16. namespace boost { namespace spirit { namespace lex
  17. {
  18. ///////////////////////////////////////////////////////////////////////////
  19. // Import skip_flag enumerator type from Qi namespace
  20. using qi::skip_flag;
  21. ///////////////////////////////////////////////////////////////////////////
  22. //
  23. // The tokenize_and_parse() function is one of the main Spirit API
  24. // functions. It simplifies using a lexer as the underlying token source
  25. // while parsing a given input sequence.
  26. //
  27. // The function takes a pair of iterators spanning the underlying input
  28. // stream to parse, the lexer object (built from the token definitions)
  29. // and a parser object (built from the parser grammar definition).
  30. //
  31. // The second version of this function additionally takes an attribute to
  32. // be used as the top level data structure instance the parser should use
  33. // to store the recognized input to.
  34. //
  35. // The function returns true if the parsing succeeded (the given input
  36. // sequence has been successfully matched by the given grammar).
  37. //
  38. // first, last: The pair of iterators spanning the underlying input
  39. // sequence to parse. These iterators must at least
  40. // conform to the requirements of the std::intput_iterator
  41. // category.
  42. // On exit the iterator 'first' will be updated to the
  43. // position right after the last successfully matched
  44. // token.
  45. // lex: The lexer object (encoding the token definitions) to be
  46. // used to convert the input sequence into a sequence of
  47. // tokens. This token sequence is passed to the parsing
  48. // process. The LexerExpr type must conform to the
  49. // lexer interface described in the corresponding section
  50. // of the documentation.
  51. // xpr: The grammar object (encoding the parser grammar) to be
  52. // used to match the token sequence generated by the lex
  53. // object instance. The ParserExpr type must conform to
  54. // the grammar interface described in the corresponding
  55. // section of the documentation.
  56. // attr: The top level attribute passed to the parser. It will
  57. // be populated during the parsing of the input sequence.
  58. // On exit it will hold the 'parser result' corresponding
  59. // to the matched input sequence.
  60. //
  61. ///////////////////////////////////////////////////////////////////////////
  62. template <typename Iterator, typename Lexer, typename ParserExpr>
  63. inline bool
  64. tokenize_and_parse(Iterator& first, Iterator last, Lexer const& lex,
  65. ParserExpr const& xpr)
  66. {
  67. // Report invalid expression error as early as possible.
  68. // If you got an error_invalid_expression error message here,
  69. // then the expression (expr) is not a valid spirit qi expression.
  70. BOOST_SPIRIT_ASSERT_MATCH(qi::domain, ParserExpr);
  71. typename Lexer::iterator_type iter = lex.begin(first, last);
  72. return compile<qi::domain>(xpr).parse(
  73. iter, lex.end(), unused, unused, unused);
  74. }
  75. ///////////////////////////////////////////////////////////////////////////
  76. template <typename Iterator, typename Lexer, typename ParserExpr
  77. , typename Attribute>
  78. inline bool
  79. tokenize_and_parse(Iterator& first, Iterator last, Lexer const& lex
  80. , ParserExpr const& xpr, Attribute& attr)
  81. {
  82. // Report invalid expression error as early as possible.
  83. // If you got an error_invalid_expression error message here,
  84. // then the expression (expr) is not a valid spirit qi expression.
  85. BOOST_SPIRIT_ASSERT_MATCH(qi::domain, ParserExpr);
  86. typename Lexer::iterator_type iter = lex.begin(first, last);
  87. return compile<qi::domain>(xpr).parse(
  88. iter, lex.end(), unused, unused, attr);
  89. }
  90. ///////////////////////////////////////////////////////////////////////////
  91. //
  92. // The tokenize_and_phrase_parse() function is one of the main Spirit API
  93. // functions. It simplifies using a lexer as the underlying token source
  94. // while phrase parsing a given input sequence.
  95. //
  96. // The function takes a pair of iterators spanning the underlying input
  97. // stream to parse, the lexer object (built from the token definitions)
  98. // and a parser object (built from the parser grammar definition). The
  99. // additional skipper parameter will be used as the skip parser during
  100. // the parsing process.
  101. //
  102. // The second version of this function additionally takes an attribute to
  103. // be used as the top level data structure instance the parser should use
  104. // to store the recognized input to.
  105. //
  106. // The function returns true if the parsing succeeded (the given input
  107. // sequence has been successfully matched by the given grammar).
  108. //
  109. // first, last: The pair of iterators spanning the underlying input
  110. // sequence to parse. These iterators must at least
  111. // conform to the requirements of the std::intput_iterator
  112. // category.
  113. // On exit the iterator 'first' will be updated to the
  114. // position right after the last successfully matched
  115. // token.
  116. // lex: The lexer object (encoding the token definitions) to be
  117. // used to convert the input sequence into a sequence of
  118. // tokens. This token sequence is passed to the parsing
  119. // process. The LexerExpr type must conform to the
  120. // lexer interface described in the corresponding section
  121. // of the documentation.
  122. // xpr: The grammar object (encoding the parser grammar) to be
  123. // used to match the token sequence generated by the lex
  124. // object instance. The ParserExpr type must conform to
  125. // the grammar interface described in the corresponding
  126. // section of the documentation.
  127. // skipper: The skip parser to be used while parsing the given
  128. // input sequence. Note, the skip parser will have to
  129. // act on the same token sequence as the main parser
  130. // 'xpr'.
  131. // post_skip: The post_skip flag controls whether the function will
  132. // invoke an additional post skip after the main parser
  133. // returned.
  134. // attr: The top level attribute passed to the parser. It will
  135. // be populated during the parsing of the input sequence.
  136. // On exit it will hold the 'parser result' corresponding
  137. // to the matched input sequence.
  138. //
  139. ///////////////////////////////////////////////////////////////////////////
  140. template <typename Iterator, typename Lexer, typename ParserExpr
  141. , typename Skipper>
  142. inline bool
  143. tokenize_and_phrase_parse(Iterator& first, Iterator last
  144. , Lexer const& lex, ParserExpr const& xpr, Skipper const& skipper
  145. , BOOST_SCOPED_ENUM(skip_flag) post_skip = skip_flag::postskip)
  146. {
  147. // Report invalid expression error as early as possible.
  148. // If you got an error_invalid_expression error message here,
  149. // then the expression (expr) is not a valid spirit qi expression.
  150. BOOST_SPIRIT_ASSERT_MATCH(qi::domain, ParserExpr);
  151. BOOST_SPIRIT_ASSERT_MATCH(qi::domain, Skipper);
  152. typedef
  153. typename spirit::result_of::compile<qi::domain, Skipper>::type
  154. skipper_type;
  155. skipper_type const skipper_ = compile<qi::domain>(skipper);
  156. typename Lexer::iterator_type iter = lex.begin(first, last);
  157. typename Lexer::iterator_type end = lex.end();
  158. if (!compile<qi::domain>(xpr).parse(
  159. iter, end, unused, skipper_, unused))
  160. return false;
  161. // do a final post-skip
  162. if (post_skip == skip_flag::postskip)
  163. qi::skip_over(iter, end, skipper_);
  164. return true;
  165. }
  166. template <typename Iterator, typename Lexer, typename ParserExpr
  167. , typename Skipper, typename Attribute>
  168. inline bool
  169. tokenize_and_phrase_parse(Iterator& first, Iterator last
  170. , Lexer const& lex, ParserExpr const& xpr, Skipper const& skipper
  171. , BOOST_SCOPED_ENUM(skip_flag) post_skip, Attribute& attr)
  172. {
  173. // Report invalid expression error as early as possible.
  174. // If you got an error_invalid_expression error message here,
  175. // then the expression (expr) is not a valid spirit qi expression.
  176. BOOST_SPIRIT_ASSERT_MATCH(qi::domain, ParserExpr);
  177. BOOST_SPIRIT_ASSERT_MATCH(qi::domain, Skipper);
  178. typedef
  179. typename spirit::result_of::compile<qi::domain, Skipper>::type
  180. skipper_type;
  181. skipper_type const skipper_ = compile<qi::domain>(skipper);
  182. typename Lexer::iterator_type iter = lex.begin(first, last);
  183. typename Lexer::iterator_type end = lex.end();
  184. if (!compile<qi::domain>(xpr).parse(
  185. iter, end, unused, skipper_, attr))
  186. return false;
  187. // do a final post-skip
  188. if (post_skip == skip_flag::postskip)
  189. qi::skip_over(iter, end, skipper_);
  190. return true;
  191. }
  192. ///////////////////////////////////////////////////////////////////////////
  193. template <typename Iterator, typename Lexer, typename ParserExpr
  194. , typename Skipper, typename Attribute>
  195. inline bool
  196. tokenize_and_phrase_parse(Iterator& first, Iterator last
  197. , Lexer const& lex, ParserExpr const& xpr, Skipper const& skipper
  198. , Attribute& attr)
  199. {
  200. return tokenize_and_phrase_parse(first, last, lex, xpr, skipper
  201. , skip_flag::postskip, attr);
  202. }
  203. ///////////////////////////////////////////////////////////////////////////
  204. //
  205. // The tokenize() function is one of the main Spirit API functions. It
  206. // simplifies using a lexer to tokenize a given input sequence. It's main
  207. // purpose is to use the lexer to tokenize all the input.
  208. //
  209. // The second version below discards all generated tokens afterwards.
  210. // This is useful whenever all the needed functionality has been
  211. // implemented directly inside the lexer semantic actions, which are being
  212. // executed while the tokens are matched.
  213. //
  214. // The function takes a pair of iterators spanning the underlying input
  215. // stream to scan, the lexer object (built from the token definitions),
  216. // and a (optional) functor being called for each of the generated tokens.
  217. //
  218. // The function returns true if the scanning of the input succeeded (the
  219. // given input sequence has been successfully matched by the given token
  220. // definitions).
  221. //
  222. // first, last: The pair of iterators spanning the underlying input
  223. // sequence to parse. These iterators must at least
  224. // conform to the requirements of the std::intput_iterator
  225. // category.
  226. // On exit the iterator 'first' will be updated to the
  227. // position right after the last successfully matched
  228. // token.
  229. // lex: The lexer object (encoding the token definitions) to be
  230. // used to convert the input sequence into a sequence of
  231. // tokens. The LexerExpr type must conform to the
  232. // lexer interface described in the corresponding section
  233. // of the documentation.
  234. // f: A functor (callable object) taking a single argument of
  235. // the token type and returning a bool, indicating whether
  236. // the tokenization should be canceled.
  237. // initial_state: The name of the state the lexer should start matching.
  238. // The default value is zero, causing the lexer to start
  239. // in its 'INITIAL' state.
  240. //
  241. ///////////////////////////////////////////////////////////////////////////
  242. namespace detail
  243. {
  244. template <typename Token, typename F>
  245. bool tokenize_callback(Token const& t, F f)
  246. {
  247. return f(t);
  248. }
  249. template <typename Token, typename Eval>
  250. bool tokenize_callback(Token const& t, phoenix::actor<Eval> const& f)
  251. {
  252. f(t);
  253. return true;
  254. }
  255. template <typename Token>
  256. bool tokenize_callback(Token const& t, void (*f)(Token const&))
  257. {
  258. f(t);
  259. return true;
  260. }
  261. template <typename Token>
  262. bool tokenize_callback(Token const& t, bool (*f)(Token const&))
  263. {
  264. return f(t);
  265. }
  266. }
  267. template <typename Iterator, typename Lexer, typename F>
  268. inline bool
  269. tokenize(Iterator& first, Iterator last, Lexer const& lex, F f
  270. , typename Lexer::char_type const* initial_state = 0)
  271. {
  272. typedef typename Lexer::iterator_type iterator_type;
  273. iterator_type iter = lex.begin(first, last, initial_state);
  274. iterator_type end = lex.end();
  275. for (/**/; iter != end && token_is_valid(*iter); ++iter)
  276. {
  277. if (!detail::tokenize_callback(*iter, f))
  278. return false;
  279. }
  280. return (iter == end) ? true : false;
  281. }
  282. ///////////////////////////////////////////////////////////////////////////
  283. template <typename Iterator, typename Lexer>
  284. inline bool
  285. tokenize(Iterator& first, Iterator last, Lexer const& lex
  286. , typename Lexer::char_type const* initial_state = 0)
  287. {
  288. typedef typename Lexer::iterator_type iterator_type;
  289. iterator_type iter = lex.begin(first, last, initial_state);
  290. iterator_type end = lex.end();
  291. while (iter != end && token_is_valid(*iter))
  292. ++iter;
  293. return (iter == end) ? true : false;
  294. }
  295. }}}
  296. #endif