123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152 |
- // Copyright (c) 2001-2010 Hartmut Kaiser
- //
- // Distributed under the Boost Software License, Version 1.0. (See accompanying
- // file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
- // This example is the equivalent to the following lex program:
- /*
- //[wcl_flex_version
- %{
- int c = 0, w = 0, l = 0;
- %}
- %%
- [^ \t\n]+ { ++w; c += yyleng; }
- \n { ++c; ++l; }
- . { ++c; }
- %%
- main()
- {
- yylex();
- printf("%d %d %d\n", l, w, c);
- }
- //]
- */
- // Its purpose is to do the word count function of the wc command in UNIX. It
- // prints the number of lines, words and characters in a file.
- //
- // This examples shows how to use semantic actions associated with token
- // definitions to directly attach actions to tokens. These get executed
- // whenever the corresponding token got matched in the input sequence. Note,
- // how this example implements all functionality directly in the lexer
- // definition without any need for a parser.
- // #define BOOST_SPIRIT_LEXERTL_DEBUG
- #include <boost/config/warning_disable.hpp>
- //[wcl_includes
- #include <boost/spirit/include/lex_lexertl.hpp>
- #include <boost/spirit/include/phoenix_operator.hpp>
- #include <boost/spirit/include/phoenix_statement.hpp>
- #include <boost/spirit/include/phoenix_algorithm.hpp>
- #include <boost/spirit/include/phoenix_core.hpp>
- //]
- #include <iostream>
- #include <string>
- #include "example.hpp"
- //[wcl_namespaces
- namespace lex = boost::spirit::lex;
- //]
- ///////////////////////////////////////////////////////////////////////////////
- // Token definition: We use the lexertl based lexer engine as the underlying
- // lexer type.
- //
- // Note, the token definition type is derived from the 'lexertl_actor_lexer'
- // template, which is a necessary to being able to use lexer semantic actions.
- ///////////////////////////////////////////////////////////////////////////////
- struct distance_func
- {
- template <typename Iterator1, typename Iterator2>
- struct result : boost::iterator_difference<Iterator1> {};
- template <typename Iterator1, typename Iterator2>
- typename result<Iterator1, Iterator2>::type
- operator()(Iterator1 const& begin, Iterator2 const& end) const
- {
- return std::distance(begin, end);
- }
- };
- boost::phoenix::function<distance_func> const distance = distance_func();
- //[wcl_token_definition
- template <typename Lexer>
- struct word_count_tokens : lex::lexer<Lexer>
- {
- word_count_tokens()
- : c(0), w(0), l(0)
- , word("[^ \t\n]+") // define tokens
- , eol("\n")
- , any(".")
- {
- using boost::spirit::lex::_start;
- using boost::spirit::lex::_end;
- using boost::phoenix::ref;
- // associate tokens with the lexer
- this->self
- = word [++ref(w), ref(c) += distance(_start, _end)]
- | eol [++ref(c), ++ref(l)]
- | any [++ref(c)]
- ;
- }
- std::size_t c, w, l;
- lex::token_def<> word, eol, any;
- };
- //]
- ///////////////////////////////////////////////////////////////////////////////
- //[wcl_main
- int main(int argc, char* argv[])
- {
- /*< Specifying `omit` as the token attribute type generates a token class
- not holding any token attribute at all (not even the iterator range of the
- matched input sequence), therefore optimizing the token, the lexer, and
- possibly the parser implementation as much as possible. Specifying
- `mpl::false_` as the 3rd template parameter generates a token
- type and an iterator, both holding no lexer state, allowing for even more
- aggressive optimizations. As a result the token instances contain the token
- ids as the only data member.
- >*/ typedef
- lex::lexertl::token<char const*, lex::omit, boost::mpl::false_>
- token_type;
- /*< This defines the lexer type to use
- >*/ typedef lex::lexertl::actor_lexer<token_type> lexer_type;
- /*< Create the lexer object instance needed to invoke the lexical analysis
- >*/ word_count_tokens<lexer_type> word_count_lexer;
- /*< Read input from the given file, tokenize all the input, while discarding
- all generated tokens
- >*/ std::string str (read_from_file(1 == argc ? "word_count.input" : argv[1]));
- char const* first = str.c_str();
- char const* last = &first[str.size()];
- /*< Create a pair of iterators returning the sequence of generated tokens
- >*/ lexer_type::iterator_type iter = word_count_lexer.begin(first, last);
- lexer_type::iterator_type end = word_count_lexer.end();
- /*< Here we simply iterate over all tokens, making sure to break the loop
- if an invalid token gets returned from the lexer
- >*/ while (iter != end && token_is_valid(*iter))
- ++iter;
- if (iter == end) {
- std::cout << "lines: " << word_count_lexer.l
- << ", words: " << word_count_lexer.w
- << ", characters: " << word_count_lexer.c
- << "\n";
- }
- else {
- std::string rest(first, last);
- std::cout << "Lexical analysis failed\n" << "stopped at: \""
- << rest << "\"\n";
- }
- return 0;
- }
- //]
|