strip_comments.cpp 5.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163
  1. // Copyright (c) 2001-2010 Hartmut Kaiser
  2. //
  3. // Distributed under the Boost Software License, Version 1.0. (See accompanying
  4. // file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
  5. // This example is the equivalent to the following lex program:
  6. //
  7. // %{
  8. // /* INITIAL is the default start state. COMMENT is our new */
  9. // /* state where we remove comments. */
  10. // %}
  11. //
  12. // %s COMMENT
  13. // %%
  14. // <INITIAL>"//".* ;
  15. // <INITIAL>"/*" BEGIN COMMENT;
  16. // <INITIAL>. ECHO;
  17. // <INITIAL>[\n] ECHO;
  18. // <COMMENT>"*/" BEGIN INITIAL;
  19. // <COMMENT>. ;
  20. // <COMMENT>[\n] ;
  21. // %%
  22. //
  23. // main()
  24. // {
  25. // yylex();
  26. // }
  27. //
  28. // Its purpose is to strip comments out of C code.
  29. //
  30. // Additionally this example demonstrates the use of lexer states to structure
  31. // the lexer definition.
  32. // #define BOOST_SPIRIT_LEXERTL_DEBUG
  33. #include <boost/config/warning_disable.hpp>
  34. #include <boost/spirit/include/qi.hpp>
  35. #include <boost/spirit/include/lex_lexertl.hpp>
  36. #include <boost/spirit/include/phoenix_operator.hpp>
  37. #include <boost/spirit/include/phoenix_container.hpp>
  38. #include <iostream>
  39. #include <string>
  40. #include "example.hpp"
  41. using namespace boost::spirit;
  42. ///////////////////////////////////////////////////////////////////////////////
  43. // Token definition: We use the lexertl based lexer engine as the underlying
  44. // lexer type.
  45. ///////////////////////////////////////////////////////////////////////////////
  46. enum tokenids
  47. {
  48. IDANY = lex::min_token_id + 10
  49. };
  50. template <typename Lexer>
  51. struct strip_comments_tokens : lex::lexer<Lexer>
  52. {
  53. strip_comments_tokens()
  54. : strip_comments_tokens::base_type(lex::match_flags::match_default)
  55. {
  56. // define tokens and associate them with the lexer
  57. cppcomment = "\"//\"[^\n]*"; // '//[^\n]*'
  58. ccomment = "\"/*\""; // '/*'
  59. endcomment = "\"*/\""; // '*/'
  60. // The following tokens are associated with the default lexer state
  61. // (the "INITIAL" state). Specifying 'INITIAL' as a lexer state is
  62. // strictly optional.
  63. this->self.add
  64. (cppcomment) // no explicit token id is associated
  65. (ccomment)
  66. (".", IDANY) // IDANY is the token id associated with this token
  67. // definition
  68. ;
  69. // The following tokens are associated with the lexer state "COMMENT".
  70. // We switch lexer states from inside the parsing process using the
  71. // in_state("COMMENT")[] parser component as shown below.
  72. this->self("COMMENT").add
  73. (endcomment)
  74. (".", IDANY)
  75. ;
  76. }
  77. lex::token_def<> cppcomment, ccomment, endcomment;
  78. };
  79. ///////////////////////////////////////////////////////////////////////////////
  80. // Grammar definition
  81. ///////////////////////////////////////////////////////////////////////////////
  82. template <typename Iterator>
  83. struct strip_comments_grammar : qi::grammar<Iterator>
  84. {
  85. template <typename TokenDef>
  86. strip_comments_grammar(TokenDef const& tok)
  87. : strip_comments_grammar::base_type(start)
  88. {
  89. // The in_state("COMMENT")[...] parser component switches the lexer
  90. // state to be 'COMMENT' during the matching of the embedded parser.
  91. start = *( tok.ccomment
  92. >> qi::in_state("COMMENT")
  93. [
  94. // the lexer is in the 'COMMENT' state during
  95. // matching of the following parser components
  96. *token(IDANY) >> tok.endcomment
  97. ]
  98. | tok.cppcomment
  99. | qi::token(IDANY) [ std::cout << _1 ]
  100. )
  101. ;
  102. }
  103. qi::rule<Iterator> start;
  104. };
  105. ///////////////////////////////////////////////////////////////////////////////
  106. int main(int argc, char* argv[])
  107. {
  108. // iterator type used to expose the underlying input stream
  109. typedef std::string::iterator base_iterator_type;
  110. // lexer type
  111. typedef
  112. lex::lexertl::lexer<lex::lexertl::token<base_iterator_type> >
  113. lexer_type;
  114. // iterator type exposed by the lexer
  115. typedef strip_comments_tokens<lexer_type>::iterator_type iterator_type;
  116. // now we use the types defined above to create the lexer and grammar
  117. // object instances needed to invoke the parsing process
  118. strip_comments_tokens<lexer_type> strip_comments; // Our lexer
  119. strip_comments_grammar<iterator_type> g (strip_comments); // Our parser
  120. // Parsing is done based on the token stream, not the character
  121. // stream read from the input.
  122. std::string str (read_from_file(1 == argc ? "strip_comments.input" : argv[1]));
  123. base_iterator_type first = str.begin();
  124. bool r = lex::tokenize_and_parse(first, str.end(), strip_comments, g);
  125. if (r) {
  126. std::cout << "-------------------------\n";
  127. std::cout << "Parsing succeeded\n";
  128. std::cout << "-------------------------\n";
  129. }
  130. else {
  131. std::string rest(first, str.end());
  132. std::cout << "-------------------------\n";
  133. std::cout << "Parsing failed\n";
  134. std::cout << "stopped at: \"" << rest << "\"\n";
  135. std::cout << "-------------------------\n";
  136. }
  137. std::cout << "Bye... :-) \n\n";
  138. return 0;
  139. }