set_token_value.cpp 7.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255
  1. // Copyright (c) 2001-2011 Hartmut Kaiser
  2. //
  3. // Distributed under the Boost Software License, Version 1.0. (See accompanying
  4. // file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
  5. #include <boost/detail/lightweight_test.hpp>
  6. #include <boost/spirit/include/phoenix_object.hpp>
  7. #include <boost/spirit/include/phoenix_operator.hpp>
  8. #include <boost/spirit/include/phoenix_container.hpp>
  9. #include <boost/spirit/include/lex_lexertl.hpp>
  10. #include <boost/foreach.hpp>
  11. using namespace boost::spirit;
  12. ///////////////////////////////////////////////////////////////////////////////
  13. // semantic action analyzing leading whitespace
  14. enum tokenids
  15. {
  16. ID_INDENT = 1000,
  17. ID_DEDENT
  18. };
  19. struct handle_whitespace
  20. {
  21. handle_whitespace(std::stack<unsigned int>& indents)
  22. : indents_(indents) {}
  23. template <typename Iterator, typename IdType, typename Context>
  24. void operator()(Iterator& start, Iterator& end
  25. , BOOST_SCOPED_ENUM(lex::pass_flags)& pass, IdType& id
  26. , Context& ctx)
  27. {
  28. unsigned int level = 0;
  29. if (is_indent(start, end, level)) {
  30. id = ID_INDENT;
  31. ctx.set_value(level);
  32. }
  33. else if (is_dedent(start, end, level)) {
  34. id = ID_DEDENT;
  35. ctx.set_value(level);
  36. }
  37. else {
  38. pass = lex::pass_flags::pass_ignore;
  39. }
  40. }
  41. // Get indentation level, for now (no tabs) we just count the spaces
  42. // once we allow tabs in the regex this needs to be expanded
  43. template <typename Iterator>
  44. unsigned int get_indent(Iterator& start, Iterator& end)
  45. {
  46. return static_cast<unsigned int>(std::distance(start, end));
  47. }
  48. template <typename Iterator>
  49. bool is_dedent(Iterator& start, Iterator& end, unsigned int& level)
  50. {
  51. unsigned int newindent = get_indent(start, end);
  52. while (!indents_.empty() && newindent < indents_.top()) {
  53. level++; // dedent one more level
  54. indents_.pop();
  55. }
  56. return level > 0;
  57. }
  58. // Handle additional indentation
  59. template <typename Iterator>
  60. bool is_indent(Iterator& start, Iterator& end, unsigned int& level)
  61. {
  62. unsigned int newindent = get_indent(start, end);
  63. if (indents_.empty() || newindent > indents_.top()) {
  64. level = 1; // indent one more level
  65. indents_.push(newindent);
  66. return true;
  67. }
  68. return false;
  69. }
  70. std::stack<unsigned int>& indents_;
  71. // silence MSVC warning C4512: assignment operator could not be generated
  72. BOOST_DELETED_FUNCTION(handle_whitespace& operator= (handle_whitespace const&));
  73. };
  74. ///////////////////////////////////////////////////////////////////////////////
  75. // Token definition
  76. template <typename Lexer>
  77. struct set_token_value : boost::spirit::lex::lexer<Lexer>
  78. {
  79. set_token_value()
  80. {
  81. using lex::_pass;
  82. // define tokens and associate them with the lexer
  83. whitespace = "^[ ]+";
  84. newline = '\n';
  85. this->self = whitespace[ handle_whitespace(indents) ];
  86. this->self += newline[ _pass = lex::pass_flags::pass_ignore ];
  87. }
  88. lex::token_def<unsigned int> whitespace;
  89. lex::token_def<> newline;
  90. std::stack<unsigned int> indents;
  91. };
  92. ///////////////////////////////////////////////////////////////////////////////
  93. struct token_data
  94. {
  95. int id;
  96. unsigned int value;
  97. };
  98. template <typename Token>
  99. inline
  100. bool test_tokens(token_data const* d, std::vector<Token> const& tokens)
  101. {
  102. BOOST_FOREACH(Token const& t, tokens)
  103. {
  104. if (d->id == -1)
  105. return false; // reached end of expected data
  106. typename Token::token_value_type const& value (t.value());
  107. if (t.id() != static_cast<std::size_t>(d->id)) // token id must match
  108. return false;
  109. if (value.which() != 1) // must have an integer value
  110. return false;
  111. if (boost::get<unsigned int>(value) != d->value) // value must match
  112. return false;
  113. ++d;
  114. }
  115. return (d->id == -1) ? true : false;
  116. }
  117. inline
  118. bool test_indents(int *i, std::stack<unsigned int>& indents)
  119. {
  120. while (!indents.empty())
  121. {
  122. if (*i == -1)
  123. return false; // reached end of expected data
  124. if (indents.top() != static_cast<unsigned int>(*i))
  125. return false; // value must match
  126. ++i;
  127. indents.pop();
  128. }
  129. return (*i == -1) ? true : false;
  130. }
  131. ///////////////////////////////////////////////////////////////////////////////
  132. int main()
  133. {
  134. namespace lex = boost::spirit::lex;
  135. namespace phoenix = boost::phoenix;
  136. typedef std::string::iterator base_iterator_type;
  137. typedef boost::mpl::vector<unsigned int> token_value_types;
  138. typedef lex::lexertl::token<base_iterator_type, token_value_types> token_type;
  139. typedef lex::lexertl::actor_lexer<token_type> lexer_type;
  140. // test simple indent
  141. {
  142. set_token_value<lexer_type> lexer;
  143. std::vector<token_type> tokens;
  144. std::string input(" ");
  145. base_iterator_type first = input.begin();
  146. using phoenix::arg_names::_1;
  147. BOOST_TEST(lex::tokenize(first, input.end(), lexer
  148. , phoenix::push_back(phoenix::ref(tokens), _1)));
  149. int i[] = { 4, -1 };
  150. BOOST_TEST(test_indents(i, lexer.indents));
  151. token_data d[] = { { ID_INDENT, 1 }, { -1, 0 } };
  152. BOOST_TEST(test_tokens(d, tokens));
  153. }
  154. // test two indents
  155. {
  156. set_token_value<lexer_type> lexer;
  157. std::vector<token_type> tokens;
  158. std::string input(
  159. " \n"
  160. " \n");
  161. base_iterator_type first = input.begin();
  162. using phoenix::arg_names::_1;
  163. BOOST_TEST(lex::tokenize(first, input.end(), lexer
  164. , phoenix::push_back(phoenix::ref(tokens), _1)));
  165. int i[] = { 8, 4, -1 };
  166. BOOST_TEST(test_indents(i, lexer.indents));
  167. token_data d[] = {
  168. { ID_INDENT, 1 }, { ID_INDENT, 1 }
  169. , { -1, 0 } };
  170. BOOST_TEST(test_tokens(d, tokens));
  171. }
  172. // test one dedent
  173. {
  174. set_token_value<lexer_type> lexer;
  175. std::vector<token_type> tokens;
  176. std::string input(
  177. " \n"
  178. " \n"
  179. " \n");
  180. base_iterator_type first = input.begin();
  181. using phoenix::arg_names::_1;
  182. BOOST_TEST(lex::tokenize(first, input.end(), lexer
  183. , phoenix::push_back(phoenix::ref(tokens), _1)));
  184. int i[] = { 4, -1 };
  185. BOOST_TEST(test_indents(i, lexer.indents));
  186. token_data d[] = {
  187. { ID_INDENT, 1 }, { ID_INDENT, 1 }
  188. , { ID_DEDENT, 1 }
  189. , { -1, 0 } };
  190. BOOST_TEST(test_tokens(d, tokens));
  191. }
  192. // test two dedents
  193. {
  194. set_token_value<lexer_type> lexer;
  195. std::vector<token_type> tokens;
  196. std::string input(
  197. " \n"
  198. " \n"
  199. " \n"
  200. " \n");
  201. base_iterator_type first = input.begin();
  202. using phoenix::arg_names::_1;
  203. BOOST_TEST(lex::tokenize(first, input.end(), lexer
  204. , phoenix::push_back(phoenix::ref(tokens), _1)));
  205. int i[] = { 4, -1 };
  206. BOOST_TEST(test_indents(i, lexer.indents));
  207. token_data d[] = {
  208. { ID_INDENT, 1 }, { ID_INDENT, 1 }, { ID_INDENT, 1 }
  209. , { ID_DEDENT, 2 }
  210. , { -1, 0 } };
  211. BOOST_TEST(test_tokens(d, tokens));
  212. }
  213. return boost::report_errors();
  214. }