xpressive_lexer.hpp 4.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135
  1. /*=============================================================================
  2. Boost.Wave: A Standard compliant C++ preprocessor library
  3. Xpressive based generic lexer
  4. http://www.boost.org/
  5. Copyright (c) 2001-2010 Hartmut Kaiser. Distributed under the Boost
  6. Software License, Version 1.0. (See accompanying file
  7. LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
  8. =============================================================================*/
  9. #if !defined(XPRESSIVE_LEXER_HPP)
  10. #define XPRESSIVE_LEXER_HPP
  11. #include <string>
  12. #include <vector>
  13. #include <utility>
  14. #include <algorithm>
  15. #include <boost/detail/iterator.hpp>
  16. #include <boost/xpressive/xpressive.hpp>
  17. namespace boost {
  18. namespace wave {
  19. namespace cpplexer {
  20. namespace xlex {
  21. ///////////////////////////////////////////////////////////////////////////////
  22. template <
  23. typename Iterator = char const*,
  24. typename Token = int,
  25. typename Callback = bool (*)(
  26. Iterator const&, Iterator&, Iterator const&, Token const&)
  27. >
  28. class xpressive_lexer
  29. {
  30. private:
  31. typedef typename boost::detail::iterator_traits<Iterator>::value_type
  32. char_type;
  33. typedef std::basic_string<char_type> string_type;
  34. // this represents a single token to match
  35. struct regex_info
  36. {
  37. typedef boost::xpressive::basic_regex<Iterator> regex_type;
  38. string_type str;
  39. Token token;
  40. regex_type regex;
  41. Callback callback;
  42. regex_info(string_type const& str, Token const& token,
  43. Callback const& callback)
  44. : str(str), token(token),
  45. regex(regex_type::compile(str)),
  46. callback(callback)
  47. {}
  48. // these structures are to be ordered by the token id
  49. friend bool operator< (regex_info const& lhs, regex_info const& rhs)
  50. {
  51. return lhs.token < rhs.token;
  52. }
  53. };
  54. typedef std::vector<regex_info> regex_list_type;
  55. public:
  56. typedef Callback callback_type;
  57. xpressive_lexer() {}
  58. // register a the regex with the lexer
  59. void register_regex(string_type const& regex, Token const& id,
  60. Callback const& cb = Callback());
  61. // match the given input and return the next recognized token
  62. Token next_token(Iterator &first, Iterator const& last, string_type& token);
  63. private:
  64. regex_list_type regex_list;
  65. };
  66. ///////////////////////////////////////////////////////////////////////////////
  67. template <typename Iterator, typename Token, typename Callback>
  68. inline void
  69. xpressive_lexer<Iterator, Token, Callback>::register_regex(
  70. string_type const& regex, Token const& id, Callback const& cb)
  71. {
  72. regex_list.push_back(regex_info(regex, id, cb));
  73. }
  74. ///////////////////////////////////////////////////////////////////////////////
  75. template <typename Iterator, typename Token, typename Callback>
  76. inline Token
  77. xpressive_lexer<Iterator, Token, Callback>::next_token(
  78. Iterator &first, Iterator const& last, string_type& token)
  79. {
  80. typedef typename regex_list_type::iterator iterator;
  81. xpressive::match_results<Iterator> regex_result;
  82. for (iterator it = regex_list.begin(), end = regex_list.end(); it != end; ++it)
  83. {
  84. namespace xpressive = boost::xpressive;
  85. // regex_info const& curr_regex = *it;
  86. // xpressive::match_results<Iterator> regex_result;
  87. if (xpressive::regex_search(first, last, regex_result, (*it).regex,
  88. xpressive::regex_constants::match_continuous))
  89. {
  90. Iterator saved = first;
  91. Token rval = (*it).token;
  92. std::advance(first, regex_result.length());
  93. token = string_type(saved, first);
  94. if (NULL != (*it).callback) {
  95. // execute corresponding callback
  96. if ((*it).callback(saved, first, last, (*it).token))
  97. rval = next_token(first, last, token);
  98. }
  99. return rval;
  100. }
  101. }
  102. return Token(-1); // TODO: change this to use token_traits<Token>
  103. }
  104. ///////////////////////////////////////////////////////////////////////////////
  105. }}}} // boost::wave::cpplexer::xlex
  106. #endif // !defined(XPRESSIVE_LEXER_HPP)