functor.hpp 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295
  1. // Copyright (c) 2001-2011 Hartmut Kaiser
  2. //
  3. // Distributed under the Boost Software License, Version 1.0. (See accompanying
  4. // file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
  5. #if !defined(BOOST_SPIRIT_LEX_LEXER_FUNCTOR_NOV_18_2007_1112PM)
  6. #define BOOST_SPIRIT_LEX_LEXER_FUNCTOR_NOV_18_2007_1112PM
  7. #if defined(_MSC_VER)
  8. #pragma once
  9. #endif
  10. #include <boost/mpl/bool.hpp>
  11. #include <boost/detail/workaround.hpp>
  12. #include <boost/spirit/home/lex/lexer/pass_flags.hpp>
  13. #include <boost/assert.hpp>
  14. #include <iterator> // for std::iterator_traits
  15. #if 0 != __COMO_VERSION__ || !BOOST_WORKAROUND(BOOST_MSVC, <= 1310)
  16. #define BOOST_SPIRIT_STATIC_EOF 1
  17. #define BOOST_SPIRIT_EOF_PREFIX static
  18. #else
  19. #define BOOST_SPIRIT_EOF_PREFIX
  20. #endif
  21. namespace boost { namespace spirit { namespace lex { namespace lexertl
  22. {
  23. ///////////////////////////////////////////////////////////////////////////
  24. //
  25. // functor is a template usable as the functor object for the
  26. // multi_pass iterator allowing to wrap a lexertl based dfa into a
  27. // iterator based interface.
  28. //
  29. // Token: the type of the tokens produced by this functor
  30. // this needs to expose a constructor with the following
  31. // prototype:
  32. //
  33. // Token(std::size_t id, std::size_t state,
  34. // Iterator start, Iterator end)
  35. //
  36. // where 'id' is the token id, state is the lexer state,
  37. // this token has been matched in, and 'first' and 'end'
  38. // mark the start and the end of the token with respect
  39. // to the underlying character stream.
  40. // FunctorData:
  41. // this is expected to encapsulate the shared part of the
  42. // functor (see lex/lexer/lexertl/functor_data.hpp for an
  43. // example and documentation).
  44. // Iterator: the type of the underlying iterator
  45. // SupportsActors:
  46. // this is expected to be a mpl::bool_, if mpl::true_ the
  47. // functor invokes functors which (optionally) have
  48. // been attached to the token definitions.
  49. // SupportState:
  50. // this is expected to be a mpl::bool_, if mpl::true_ the
  51. // functor supports different lexer states,
  52. // otherwise no lexer state is supported.
  53. //
  54. ///////////////////////////////////////////////////////////////////////////
  55. template <typename Token
  56. , template <typename, typename, typename, typename> class FunctorData
  57. , typename Iterator = typename Token::iterator_type
  58. , typename SupportsActors = mpl::false_
  59. , typename SupportsState = typename Token::has_state>
  60. class functor
  61. {
  62. public:
  63. typedef typename
  64. std::iterator_traits<Iterator>::value_type
  65. char_type;
  66. private:
  67. // Needed by compilers not implementing the resolution to DR45. For
  68. // reference, see
  69. // http://www.open-std.org/JTC1/SC22/WG21/docs/cwg_defects.html#45.
  70. typedef typename Token::token_value_type token_value_type;
  71. friend class FunctorData<Iterator, SupportsActors, SupportsState
  72. , token_value_type>;
  73. // Helper template allowing to assign a value on exit
  74. template <typename T>
  75. struct assign_on_exit
  76. {
  77. assign_on_exit(T& dst, T const& src)
  78. : dst_(dst), src_(src) {}
  79. ~assign_on_exit()
  80. {
  81. dst_ = src_;
  82. }
  83. T& dst_;
  84. T const& src_;
  85. // silence MSVC warning C4512: assignment operator could not be generated
  86. BOOST_DELETED_FUNCTION(assign_on_exit& operator= (assign_on_exit const&))
  87. };
  88. public:
  89. functor() {}
  90. #if BOOST_WORKAROUND(BOOST_MSVC, <= 1310)
  91. // somehow VC7.1 needs this (meaningless) assignment operator
  92. functor& operator=(functor const& rhs)
  93. {
  94. return *this;
  95. }
  96. #endif
  97. ///////////////////////////////////////////////////////////////////////
  98. // interface to the iterator_policies::split_functor_input policy
  99. typedef Token result_type;
  100. typedef functor unique;
  101. typedef FunctorData<Iterator, SupportsActors, SupportsState
  102. , token_value_type> shared;
  103. BOOST_SPIRIT_EOF_PREFIX result_type const eof;
  104. ///////////////////////////////////////////////////////////////////////
  105. typedef Iterator iterator_type;
  106. typedef typename shared::semantic_actions_type semantic_actions_type;
  107. typedef typename shared::next_token_functor next_token_functor;
  108. typedef typename shared::get_state_name_type get_state_name_type;
  109. // this is needed to wrap the semantic actions in a proper way
  110. typedef typename shared::wrap_action_type wrap_action_type;
  111. ///////////////////////////////////////////////////////////////////////
  112. template <typename MultiPass>
  113. static result_type& get_next(MultiPass& mp, result_type& result)
  114. {
  115. typedef typename result_type::id_type id_type;
  116. shared& data = mp.shared()->ftor;
  117. for(;;)
  118. {
  119. if (data.get_first() == data.get_last())
  120. #if defined(BOOST_SPIRIT_STATIC_EOF)
  121. return result = eof;
  122. #else
  123. return result = mp.ftor.eof;
  124. #endif
  125. data.reset_value();
  126. Iterator end = data.get_first();
  127. std::size_t unique_id = boost::lexer::npos;
  128. bool prev_bol = false;
  129. // lexer matching might change state
  130. std::size_t state = data.get_state();
  131. std::size_t id = data.next(end, unique_id, prev_bol);
  132. if (boost::lexer::npos == id) { // no match
  133. #if defined(BOOST_SPIRIT_LEXERTL_DEBUG)
  134. std::string next;
  135. Iterator it = data.get_first();
  136. for (std::size_t i = 0; i < 10 && it != data.get_last(); ++it, ++i)
  137. next += *it;
  138. std::cerr << "Not matched, in state: " << state
  139. << ", lookahead: >" << next << "<" << std::endl;
  140. #endif
  141. return result = result_type(0);
  142. }
  143. else if (0 == id) { // EOF reached
  144. #if defined(BOOST_SPIRIT_STATIC_EOF)
  145. return result = eof;
  146. #else
  147. return result = mp.ftor.eof;
  148. #endif
  149. }
  150. #if defined(BOOST_SPIRIT_LEXERTL_DEBUG)
  151. {
  152. std::string next;
  153. Iterator it = end;
  154. for (std::size_t i = 0; i < 10 && it != data.get_last(); ++it, ++i)
  155. next += *it;
  156. std::cerr << "Matched: " << id << ", in state: "
  157. << state << ", string: >"
  158. << std::basic_string<char_type>(data.get_first(), end) << "<"
  159. << ", lookahead: >" << next << "<" << std::endl;
  160. if (data.get_state() != state) {
  161. std::cerr << "Switched to state: "
  162. << data.get_state() << std::endl;
  163. }
  164. }
  165. #endif
  166. // account for a possibly pending lex::more(), i.e. moving
  167. // data.first_ back to the start of the previously matched token.
  168. bool adjusted = data.adjust_start();
  169. // set the end of the matched input sequence in the token data
  170. data.set_end(end);
  171. // invoke attached semantic actions, if defined, might change
  172. // state, id, data.first_, and/or end
  173. BOOST_SCOPED_ENUM(pass_flags) pass =
  174. data.invoke_actions(state, id, unique_id, end);
  175. if (data.has_value()) {
  176. // return matched token using the token value as set before
  177. // using data.set_value(), advancing 'data.first_' past the
  178. // matched sequence
  179. assign_on_exit<Iterator> on_exit(data.get_first(), end);
  180. return result = result_type(id_type(id), state, data.get_value());
  181. }
  182. else if (pass_flags::pass_normal == pass) {
  183. // return matched token, advancing 'data.first_' past the
  184. // matched sequence
  185. assign_on_exit<Iterator> on_exit(data.get_first(), end);
  186. return result = result_type(id_type(id), state, data.get_first(), end);
  187. }
  188. else if (pass_flags::pass_fail == pass) {
  189. #if defined(BOOST_SPIRIT_LEXERTL_DEBUG)
  190. std::cerr << "Matching forced to fail" << std::endl;
  191. #endif
  192. // if the data.first_ got adjusted above, revert this adjustment
  193. if (adjusted)
  194. data.revert_adjust_start();
  195. // one of the semantic actions signaled no-match
  196. data.reset_bol(prev_bol);
  197. if (state != data.get_state())
  198. continue; // retry matching if state has changed
  199. // if the state is unchanged repeating the match wouldn't
  200. // move the input forward, causing an infinite loop
  201. return result = result_type(0);
  202. }
  203. #if defined(BOOST_SPIRIT_LEXERTL_DEBUG)
  204. std::cerr << "Token ignored, continuing matching" << std::endl;
  205. #endif
  206. // if this token needs to be ignored, just repeat the matching,
  207. // while starting right after the current match
  208. data.get_first() = end;
  209. }
  210. }
  211. // set_state are propagated up to the iterator interface, allowing to
  212. // manipulate the current lexer state through any of the exposed
  213. // iterators.
  214. template <typename MultiPass>
  215. static std::size_t set_state(MultiPass& mp, std::size_t state)
  216. {
  217. std::size_t oldstate = mp.shared()->ftor.get_state();
  218. mp.shared()->ftor.set_state(state);
  219. #if defined(BOOST_SPIRIT_LEXERTL_DEBUG)
  220. std::cerr << "Switching state from: " << oldstate
  221. << " to: " << state
  222. << std::endl;
  223. #endif
  224. return oldstate;
  225. }
  226. template <typename MultiPass>
  227. static std::size_t get_state(MultiPass& mp)
  228. {
  229. return mp.shared()->ftor.get_state();
  230. }
  231. template <typename MultiPass>
  232. static std::size_t
  233. map_state(MultiPass const& mp, char_type const* statename)
  234. {
  235. return mp.shared()->ftor.get_state_id(statename);
  236. }
  237. // we don't need this, but it must be there
  238. template <typename MultiPass>
  239. static void destroy(MultiPass const&) {}
  240. };
  241. #if defined(BOOST_SPIRIT_STATIC_EOF)
  242. ///////////////////////////////////////////////////////////////////////////
  243. // eof token
  244. ///////////////////////////////////////////////////////////////////////////
  245. template <typename Token
  246. , template <typename, typename, typename, typename> class FunctorData
  247. , typename Iterator, typename SupportsActors, typename SupportsState>
  248. typename functor<Token, FunctorData, Iterator, SupportsActors, SupportsState>::result_type const
  249. functor<Token, FunctorData, Iterator, SupportsActors, SupportsState>::eof =
  250. typename functor<Token, FunctorData, Iterator, SupportsActors
  251. , SupportsState>::result_type();
  252. #endif
  253. }}}}
  254. #undef BOOST_SPIRIT_EOF_PREFIX
  255. #undef BOOST_SPIRIT_STATIC_EOF
  256. #endif