123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166 |
- // Copyright (c) 2001-2010 Hartmut Kaiser
- //
- // Distributed under the Boost Software License, Version 1.0. (See accompanying
- // file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
- // This example is the equivalent to the following lex program:
- /*
- //[wcp_flex_version
- %{
- int c = 0, w = 0, l = 0;
- %}
- word [^ \t\n]+
- eol \n
- %%
- {word} { ++w; c += yyleng; }
- {eol} { ++c; ++l; }
- . { ++c; }
- %%
- main()
- {
- yylex();
- printf("%d %d %d\n", l, w, c);
- }
- //]
- */
- // Its purpose is to do the word count function of the wc command in UNIX. It
- // prints the number of lines, words and characters in a file.
- //
- // The example additionally demonstrates how to use the add_pattern(...)(...)
- // syntax to define lexer patterns. These patterns are essentially parameter-
- // less 'macros' for regular expressions, allowing to simplify their
- // definition.
- // #define BOOST_SPIRIT_LEXERTL_DEBUG
- #define BOOST_VARIANT_MINIMIZE_SIZE
- #include <boost/config/warning_disable.hpp>
- //[wcp_includes
- #include <boost/spirit/include/qi.hpp>
- #include <boost/spirit/include/lex_lexertl.hpp>
- #include <boost/spirit/include/phoenix_operator.hpp>
- #include <boost/spirit/include/phoenix_statement.hpp>
- #include <boost/spirit/include/phoenix_container.hpp>
- //]
- #include <iostream>
- #include <string>
- #include "example.hpp"
- //[wcp_namespaces
- using namespace boost::spirit;
- using namespace boost::spirit::ascii;
- //]
- ///////////////////////////////////////////////////////////////////////////////
- // Token definition: We use the lexertl based lexer engine as the underlying
- // lexer type.
- ///////////////////////////////////////////////////////////////////////////////
- //[wcp_token_ids
- enum tokenids
- {
- IDANY = lex::min_token_id + 10
- };
- //]
- //[wcp_token_definition
- template <typename Lexer>
- struct word_count_tokens : lex::lexer<Lexer>
- {
- word_count_tokens()
- {
- // define patterns (lexer macros) to be used during token definition
- // below
- this->self.add_pattern
- ("WORD", "[^ \t\n]+")
- ;
- // define tokens and associate them with the lexer
- word = "{WORD}"; // reference the pattern 'WORD' as defined above
- // this lexer will recognize 3 token types: words, newlines, and
- // everything else
- this->self.add
- (word) // no token id is needed here
- ('\n') // characters are usable as tokens as well
- (".", IDANY) // string literals will not be escaped by the library
- ;
- }
- // the token 'word' exposes the matched string as its parser attribute
- lex::token_def<std::string> word;
- };
- //]
- ///////////////////////////////////////////////////////////////////////////////
- // Grammar definition
- ///////////////////////////////////////////////////////////////////////////////
- //[wcp_grammar_definition
- template <typename Iterator>
- struct word_count_grammar : qi::grammar<Iterator>
- {
- template <typename TokenDef>
- word_count_grammar(TokenDef const& tok)
- : word_count_grammar::base_type(start)
- , c(0), w(0), l(0)
- {
- using boost::phoenix::ref;
- using boost::phoenix::size;
- start = *( tok.word [++ref(w), ref(c) += size(_1)]
- | lit('\n') [++ref(c), ++ref(l)]
- | qi::token(IDANY) [++ref(c)]
- )
- ;
- }
- std::size_t c, w, l;
- qi::rule<Iterator> start;
- };
- //]
- ///////////////////////////////////////////////////////////////////////////////
- //[wcp_main
- int main(int argc, char* argv[])
- {
- /*< Define the token type to be used: `std::string` is available as the
- type of the token attribute
- >*/ typedef lex::lexertl::token<
- char const*, boost::mpl::vector<std::string>
- > token_type;
- /*< Define the lexer type to use implementing the state machine
- >*/ typedef lex::lexertl::lexer<token_type> lexer_type;
- /*< Define the iterator type exposed by the lexer type
- >*/ typedef word_count_tokens<lexer_type>::iterator_type iterator_type;
- // now we use the types defined above to create the lexer and grammar
- // object instances needed to invoke the parsing process
- word_count_tokens<lexer_type> word_count; // Our lexer
- word_count_grammar<iterator_type> g (word_count); // Our parser
- // read in the file int memory
- std::string str (read_from_file(1 == argc ? "word_count.input" : argv[1]));
- char const* first = str.c_str();
- char const* last = &first[str.size()];
- /*< Parsing is done based on the token stream, not the character
- stream read from the input. The function `tokenize_and_parse()` wraps
- the passed iterator range `[first, last)` by the lexical analyzer and
- uses its exposed iterators to parse the token stream.
- >*/ bool r = lex::tokenize_and_parse(first, last, word_count, g);
- if (r) {
- std::cout << "lines: " << g.l << ", words: " << g.w
- << ", characters: " << g.c << "\n";
- }
- else {
- std::string rest(first, last);
- std::cerr << "Parsing failed\n" << "stopped at: \""
- << rest << "\"\n";
- }
- return 0;
- }
- //]
|