123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135 |
- /*=============================================================================
- Boost.Wave: A Standard compliant C++ preprocessor library
- Xpressive based generic lexer
-
- http://www.boost.org/
- Copyright (c) 2001-2010 Hartmut Kaiser. Distributed under the Boost
- Software License, Version 1.0. (See accompanying file
- LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
- =============================================================================*/
- #if !defined(XPRESSIVE_LEXER_HPP)
- #define XPRESSIVE_LEXER_HPP
- #include <string>
- #include <vector>
- #include <utility>
- #include <algorithm>
- #include <boost/detail/iterator.hpp>
- #include <boost/xpressive/xpressive.hpp>
- namespace boost {
- namespace wave {
- namespace cpplexer {
- namespace xlex {
- ///////////////////////////////////////////////////////////////////////////////
- template <
- typename Iterator = char const*,
- typename Token = int,
- typename Callback = bool (*)(
- Iterator const&, Iterator&, Iterator const&, Token const&)
- >
- class xpressive_lexer
- {
- private:
- typedef typename boost::detail::iterator_traits<Iterator>::value_type
- char_type;
- typedef std::basic_string<char_type> string_type;
-
- // this represents a single token to match
- struct regex_info
- {
- typedef boost::xpressive::basic_regex<Iterator> regex_type;
-
- string_type str;
- Token token;
- regex_type regex;
- Callback callback;
- regex_info(string_type const& str, Token const& token,
- Callback const& callback)
- : str(str), token(token),
- regex(regex_type::compile(str)),
- callback(callback)
- {}
-
- // these structures are to be ordered by the token id
- friend bool operator< (regex_info const& lhs, regex_info const& rhs)
- {
- return lhs.token < rhs.token;
- }
- };
- typedef std::vector<regex_info> regex_list_type;
- public:
- typedef Callback callback_type;
-
- xpressive_lexer() {}
- // register a the regex with the lexer
- void register_regex(string_type const& regex, Token const& id,
- Callback const& cb = Callback());
- // match the given input and return the next recognized token
- Token next_token(Iterator &first, Iterator const& last, string_type& token);
- private:
- regex_list_type regex_list;
- };
- ///////////////////////////////////////////////////////////////////////////////
- template <typename Iterator, typename Token, typename Callback>
- inline void
- xpressive_lexer<Iterator, Token, Callback>::register_regex(
- string_type const& regex, Token const& id, Callback const& cb)
- {
- regex_list.push_back(regex_info(regex, id, cb));
- }
- ///////////////////////////////////////////////////////////////////////////////
- template <typename Iterator, typename Token, typename Callback>
- inline Token
- xpressive_lexer<Iterator, Token, Callback>::next_token(
- Iterator &first, Iterator const& last, string_type& token)
- {
- typedef typename regex_list_type::iterator iterator;
-
- xpressive::match_results<Iterator> regex_result;
- for (iterator it = regex_list.begin(), end = regex_list.end(); it != end; ++it)
- {
- namespace xpressive = boost::xpressive;
- // regex_info const& curr_regex = *it;
- // xpressive::match_results<Iterator> regex_result;
- if (xpressive::regex_search(first, last, regex_result, (*it).regex,
- xpressive::regex_constants::match_continuous))
- {
- Iterator saved = first;
- Token rval = (*it).token;
-
- std::advance(first, regex_result.length());
- token = string_type(saved, first);
- if (NULL != (*it).callback) {
- // execute corresponding callback
- if ((*it).callback(saved, first, last, (*it).token))
- rval = next_token(first, last, token);
- }
-
- return rval;
- }
- }
- return Token(-1); // TODO: change this to use token_traits<Token>
- }
- ///////////////////////////////////////////////////////////////////////////////
- }}}} // boost::wave::cpplexer::xlex
- #endif // !defined(XPRESSIVE_LEXER_HPP)
|