/*============================================================================= Boost.Wave: A Standard compliant C++ preprocessor library Xpressive based generic lexer http://www.boost.org/ Copyright (c) 2001-2010 Hartmut Kaiser. Distributed under the Boost Software License, Version 1.0. (See accompanying file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) =============================================================================*/ #if !defined(XPRESSIVE_LEXER_HPP) #define XPRESSIVE_LEXER_HPP #include #include #include #include #include #include namespace boost { namespace wave { namespace cpplexer { namespace xlex { /////////////////////////////////////////////////////////////////////////////// template < typename Iterator = char const*, typename Token = int, typename Callback = bool (*)( Iterator const&, Iterator&, Iterator const&, Token const&) > class xpressive_lexer { private: typedef typename boost::detail::iterator_traits::value_type char_type; typedef std::basic_string string_type; // this represents a single token to match struct regex_info { typedef boost::xpressive::basic_regex regex_type; string_type str; Token token; regex_type regex; Callback callback; regex_info(string_type const& str, Token const& token, Callback const& callback) : str(str), token(token), regex(regex_type::compile(str)), callback(callback) {} // these structures are to be ordered by the token id friend bool operator< (regex_info const& lhs, regex_info const& rhs) { return lhs.token < rhs.token; } }; typedef std::vector regex_list_type; public: typedef Callback callback_type; xpressive_lexer() {} // register a the regex with the lexer void register_regex(string_type const& regex, Token const& id, Callback const& cb = Callback()); // match the given input and return the next recognized token Token next_token(Iterator &first, Iterator const& last, string_type& token); private: regex_list_type regex_list; }; /////////////////////////////////////////////////////////////////////////////// template inline void xpressive_lexer::register_regex( string_type const& regex, Token const& id, Callback const& cb) { regex_list.push_back(regex_info(regex, id, cb)); } /////////////////////////////////////////////////////////////////////////////// template inline Token xpressive_lexer::next_token( Iterator &first, Iterator const& last, string_type& token) { typedef typename regex_list_type::iterator iterator; xpressive::match_results regex_result; for (iterator it = regex_list.begin(), end = regex_list.end(); it != end; ++it) { namespace xpressive = boost::xpressive; // regex_info const& curr_regex = *it; // xpressive::match_results regex_result; if (xpressive::regex_search(first, last, regex_result, (*it).regex, xpressive::regex_constants::match_continuous)) { Iterator saved = first; Token rval = (*it).token; std::advance(first, regex_result.length()); token = string_type(saved, first); if (NULL != (*it).callback) { // execute corresponding callback if ((*it).callback(saved, first, last, (*it).token)) rval = next_token(first, last, token); } return rval; } } return Token(-1); // TODO: change this to use token_traits } /////////////////////////////////////////////////////////////////////////////// }}}} // boost::wave::cpplexer::xlex #endif // !defined(XPRESSIVE_LEXER_HPP)