123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255 |
- // Copyright (c) 2001-2011 Hartmut Kaiser
- //
- // Distributed under the Boost Software License, Version 1.0. (See accompanying
- // file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
- #include <boost/detail/lightweight_test.hpp>
- #include <boost/spirit/include/phoenix_object.hpp>
- #include <boost/spirit/include/phoenix_operator.hpp>
- #include <boost/spirit/include/phoenix_container.hpp>
- #include <boost/spirit/include/lex_lexertl.hpp>
- #include <boost/foreach.hpp>
- using namespace boost::spirit;
- ///////////////////////////////////////////////////////////////////////////////
- // semantic action analyzing leading whitespace
- enum tokenids
- {
- ID_INDENT = 1000,
- ID_DEDENT
- };
- struct handle_whitespace
- {
- handle_whitespace(std::stack<unsigned int>& indents)
- : indents_(indents) {}
- template <typename Iterator, typename IdType, typename Context>
- void operator()(Iterator& start, Iterator& end
- , BOOST_SCOPED_ENUM(lex::pass_flags)& pass, IdType& id
- , Context& ctx)
- {
- unsigned int level = 0;
- if (is_indent(start, end, level)) {
- id = ID_INDENT;
- ctx.set_value(level);
- }
- else if (is_dedent(start, end, level)) {
- id = ID_DEDENT;
- ctx.set_value(level);
- }
- else {
- pass = lex::pass_flags::pass_ignore;
- }
- }
- // Get indentation level, for now (no tabs) we just count the spaces
- // once we allow tabs in the regex this needs to be expanded
- template <typename Iterator>
- unsigned int get_indent(Iterator& start, Iterator& end)
- {
- return static_cast<unsigned int>(std::distance(start, end));
- }
- template <typename Iterator>
- bool is_dedent(Iterator& start, Iterator& end, unsigned int& level)
- {
- unsigned int newindent = get_indent(start, end);
- while (!indents_.empty() && newindent < indents_.top()) {
- level++; // dedent one more level
- indents_.pop();
- }
- return level > 0;
- }
- // Handle additional indentation
- template <typename Iterator>
- bool is_indent(Iterator& start, Iterator& end, unsigned int& level)
- {
- unsigned int newindent = get_indent(start, end);
- if (indents_.empty() || newindent > indents_.top()) {
- level = 1; // indent one more level
- indents_.push(newindent);
- return true;
- }
- return false;
- }
- std::stack<unsigned int>& indents_;
- // silence MSVC warning C4512: assignment operator could not be generated
- BOOST_DELETED_FUNCTION(handle_whitespace& operator= (handle_whitespace const&));
- };
- ///////////////////////////////////////////////////////////////////////////////
- // Token definition
- template <typename Lexer>
- struct set_token_value : boost::spirit::lex::lexer<Lexer>
- {
- set_token_value()
- {
- using lex::_pass;
- // define tokens and associate them with the lexer
- whitespace = "^[ ]+";
- newline = '\n';
- this->self = whitespace[ handle_whitespace(indents) ];
- this->self += newline[ _pass = lex::pass_flags::pass_ignore ];
- }
- lex::token_def<unsigned int> whitespace;
- lex::token_def<> newline;
- std::stack<unsigned int> indents;
- };
- ///////////////////////////////////////////////////////////////////////////////
- struct token_data
- {
- int id;
- unsigned int value;
- };
- template <typename Token>
- inline
- bool test_tokens(token_data const* d, std::vector<Token> const& tokens)
- {
- BOOST_FOREACH(Token const& t, tokens)
- {
- if (d->id == -1)
- return false; // reached end of expected data
- typename Token::token_value_type const& value (t.value());
- if (t.id() != static_cast<std::size_t>(d->id)) // token id must match
- return false;
- if (value.which() != 1) // must have an integer value
- return false;
- if (boost::get<unsigned int>(value) != d->value) // value must match
- return false;
- ++d;
- }
- return (d->id == -1) ? true : false;
- }
- inline
- bool test_indents(int *i, std::stack<unsigned int>& indents)
- {
- while (!indents.empty())
- {
- if (*i == -1)
- return false; // reached end of expected data
- if (indents.top() != static_cast<unsigned int>(*i))
- return false; // value must match
- ++i;
- indents.pop();
- }
- return (*i == -1) ? true : false;
- }
- ///////////////////////////////////////////////////////////////////////////////
- int main()
- {
- namespace lex = boost::spirit::lex;
- namespace phoenix = boost::phoenix;
- typedef std::string::iterator base_iterator_type;
- typedef boost::mpl::vector<unsigned int> token_value_types;
- typedef lex::lexertl::token<base_iterator_type, token_value_types> token_type;
- typedef lex::lexertl::actor_lexer<token_type> lexer_type;
- // test simple indent
- {
- set_token_value<lexer_type> lexer;
- std::vector<token_type> tokens;
- std::string input(" ");
- base_iterator_type first = input.begin();
- using phoenix::arg_names::_1;
- BOOST_TEST(lex::tokenize(first, input.end(), lexer
- , phoenix::push_back(phoenix::ref(tokens), _1)));
- int i[] = { 4, -1 };
- BOOST_TEST(test_indents(i, lexer.indents));
- token_data d[] = { { ID_INDENT, 1 }, { -1, 0 } };
- BOOST_TEST(test_tokens(d, tokens));
- }
- // test two indents
- {
- set_token_value<lexer_type> lexer;
- std::vector<token_type> tokens;
- std::string input(
- " \n"
- " \n");
- base_iterator_type first = input.begin();
- using phoenix::arg_names::_1;
- BOOST_TEST(lex::tokenize(first, input.end(), lexer
- , phoenix::push_back(phoenix::ref(tokens), _1)));
- int i[] = { 8, 4, -1 };
- BOOST_TEST(test_indents(i, lexer.indents));
- token_data d[] = {
- { ID_INDENT, 1 }, { ID_INDENT, 1 }
- , { -1, 0 } };
- BOOST_TEST(test_tokens(d, tokens));
- }
- // test one dedent
- {
- set_token_value<lexer_type> lexer;
- std::vector<token_type> tokens;
- std::string input(
- " \n"
- " \n"
- " \n");
- base_iterator_type first = input.begin();
- using phoenix::arg_names::_1;
- BOOST_TEST(lex::tokenize(first, input.end(), lexer
- , phoenix::push_back(phoenix::ref(tokens), _1)));
- int i[] = { 4, -1 };
- BOOST_TEST(test_indents(i, lexer.indents));
- token_data d[] = {
- { ID_INDENT, 1 }, { ID_INDENT, 1 }
- , { ID_DEDENT, 1 }
- , { -1, 0 } };
- BOOST_TEST(test_tokens(d, tokens));
- }
- // test two dedents
- {
- set_token_value<lexer_type> lexer;
- std::vector<token_type> tokens;
- std::string input(
- " \n"
- " \n"
- " \n"
- " \n");
- base_iterator_type first = input.begin();
- using phoenix::arg_names::_1;
- BOOST_TEST(lex::tokenize(first, input.end(), lexer
- , phoenix::push_back(phoenix::ref(tokens), _1)));
- int i[] = { 4, -1 };
- BOOST_TEST(test_indents(i, lexer.indents));
- token_data d[] = {
- { ID_INDENT, 1 }, { ID_INDENT, 1 }, { ID_INDENT, 1 }
- , { ID_DEDENT, 2 }
- , { -1, 0 } };
- BOOST_TEST(test_tokens(d, tokens));
- }
- return boost::report_errors();
- }
|