static_functor_data.hpp 24 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569
  1. // Copyright (c) 2001-2011 Hartmut Kaiser
  2. //
  3. // Distributed under the Boost Software License, Version 1.0. (See accompanying
  4. // file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
  5. #if !defined(BOOST_SPIRIT_LEX_LEXER_STATIC_FUNCTOR_DATA_FEB_10_2008_0755PM)
  6. #define BOOST_SPIRIT_LEX_LEXER_STATIC_FUNCTOR_DATA_FEB_10_2008_0755PM
  7. #if defined(_MSC_VER)
  8. #pragma once
  9. #endif
  10. #include <boost/spirit/home/support/detail/lexer/generator.hpp>
  11. #include <boost/spirit/home/support/detail/lexer/rules.hpp>
  12. #include <boost/spirit/home/support/detail/lexer/state_machine.hpp>
  13. #include <boost/spirit/home/lex/lexer/lexertl/iterator_tokenizer.hpp>
  14. #include <boost/spirit/home/lex/lexer/lexertl/semantic_action_data.hpp>
  15. #include <boost/spirit/home/lex/lexer/lexertl/wrap_action.hpp>
  16. #include <boost/mpl/bool.hpp>
  17. #include <boost/algorithm/string/predicate.hpp>
  18. #include <iterator> // for std::iterator_traits
  19. namespace boost { namespace spirit { namespace lex { namespace lexertl
  20. {
  21. namespace detail
  22. {
  23. ///////////////////////////////////////////////////////////////////////
  24. template <typename Char, typename F>
  25. inline std::size_t get_state_id(Char const* state, F f
  26. , std::size_t numstates)
  27. {
  28. for (std::size_t i = 0; i < numstates; ++i)
  29. {
  30. if (boost::algorithm::equals(f(i), state))
  31. return i;
  32. }
  33. return boost::lexer::npos;
  34. }
  35. ///////////////////////////////////////////////////////////////////////
  36. template <typename Iterator, typename HasActors, typename HasState
  37. , typename TokenValue>
  38. class static_data; // no default specialization
  39. ///////////////////////////////////////////////////////////////////////
  40. // doesn't support no state and no actors
  41. template <typename Iterator, typename TokenValue>
  42. class static_data<Iterator, mpl::false_, mpl::false_, TokenValue>
  43. {
  44. protected:
  45. typedef typename
  46. std::iterator_traits<Iterator>::value_type
  47. char_type;
  48. public:
  49. typedef Iterator base_iterator_type;
  50. typedef iterator_range<Iterator> token_value_type;
  51. typedef token_value_type get_value_type;
  52. typedef std::size_t state_type;
  53. typedef char_type const* state_name_type;
  54. typedef unused_type semantic_actions_type;
  55. typedef detail::wrap_action<unused_type, Iterator, static_data
  56. , std::size_t> wrap_action_type;
  57. typedef std::size_t (*next_token_functor)(std::size_t&,
  58. bool&, Iterator&, Iterator const&, std::size_t&);
  59. typedef char_type const* (*get_state_name_type)(std::size_t);
  60. // initialize the shared data
  61. template <typename IterData>
  62. static_data (IterData const& data, Iterator& first
  63. , Iterator const& last)
  64. : first_(first), last_(last)
  65. , next_token_(data.next_)
  66. , get_state_name_(data.get_state_name_)
  67. , bol_(data.bol_) {}
  68. // The following functions are used by the implementation of the
  69. // placeholder '_state'.
  70. template <typename Char>
  71. void set_state_name (Char const*)
  72. {
  73. // some (random) versions of gcc instantiate this function even if it's not
  74. // needed leading to false static asserts
  75. #if !defined(__GNUC__)
  76. // If you see a compile time assertion below you're probably
  77. // using a token type not supporting lexer states (the 3rd
  78. // template parameter of the token is mpl::false_), but your
  79. // code uses state changes anyways.
  80. BOOST_STATIC_ASSERT(false);
  81. #endif
  82. }
  83. char_type const* get_state_name() const
  84. {
  85. return get_state_name_(0);
  86. }
  87. std::size_t get_state_id(char_type const*) const
  88. {
  89. return 0;
  90. }
  91. // The function get_eoi() is used by the implementation of the
  92. // placeholder '_eoi'.
  93. Iterator const& get_eoi() const { return last_; }
  94. // The function less() is used by the implementation of the support
  95. // function lex::less(). Its functionality is equivalent to flex'
  96. // function yyless(): it returns an iterator positioned to the
  97. // nth input character beyond the current start iterator (i.e. by
  98. // assigning the return value to the placeholder '_end' it is
  99. // possible to return all but the first n characters of the current
  100. // token back to the input stream.
  101. //
  102. // This function does nothing as long as no semantic actions are
  103. // used.
  104. Iterator const& less(Iterator const& it, int)
  105. {
  106. // The following assertion fires most likely because you are
  107. // using lexer semantic actions without using the actor_lexer
  108. // as the base class for your token definition class.
  109. BOOST_ASSERT(false &&
  110. "Are you using lexer semantic actions without using the "
  111. "actor_lexer base?");
  112. return it;
  113. }
  114. // The function more() is used by the implementation of the support
  115. // function lex::more(). Its functionality is equivalent to flex'
  116. // function yymore(): it tells the lexer that the next time it
  117. // matches a rule, the corresponding token should be appended onto
  118. // the current token value rather than replacing it.
  119. //
  120. // These functions do nothing as long as no semantic actions are
  121. // used.
  122. void more()
  123. {
  124. // The following assertion fires most likely because you are
  125. // using lexer semantic actions without using the actor_lexer
  126. // as the base class for your token definition class.
  127. BOOST_ASSERT(false &&
  128. "Are you using lexer semantic actions without using the "
  129. "actor_lexer base?");
  130. }
  131. bool adjust_start() { return false; }
  132. void revert_adjust_start() {}
  133. // The function lookahead() is used by the implementation of the
  134. // support function lex::lookahead. It can be used to implement
  135. // lookahead for lexer engines not supporting constructs like flex'
  136. // a/b (match a, but only when followed by b):
  137. //
  138. // This function does nothing as long as no semantic actions are
  139. // used.
  140. bool lookahead(std::size_t, std::size_t /*state*/ = std::size_t(~0))
  141. {
  142. // The following assertion fires most likely because you are
  143. // using lexer semantic actions without using the actor_lexer
  144. // as the base class for your token definition class.
  145. BOOST_ASSERT(false &&
  146. "Are you using lexer semantic actions without using the "
  147. "actor_lexer base?");
  148. return false;
  149. }
  150. // the functions next, invoke_actions, and get_state are used by
  151. // the functor implementation below
  152. // The function next() tries to match the next token from the
  153. // underlying input sequence.
  154. std::size_t next(Iterator& end, std::size_t& unique_id, bool& prev_bol)
  155. {
  156. prev_bol = bol_;
  157. std::size_t state = 0;
  158. return next_token_(state, bol_, end, last_, unique_id);
  159. }
  160. // nothing to invoke, so this is empty
  161. BOOST_SCOPED_ENUM(pass_flags) invoke_actions(std::size_t
  162. , std::size_t, std::size_t, Iterator const&)
  163. {
  164. return pass_flags::pass_normal; // always accept
  165. }
  166. std::size_t get_state() const { return 0; }
  167. void set_state(std::size_t) {}
  168. void set_end(Iterator const& it) {}
  169. Iterator& get_first() { return first_; }
  170. Iterator const& get_first() const { return first_; }
  171. Iterator const& get_last() const { return last_; }
  172. iterator_range<Iterator> get_value() const
  173. {
  174. return iterator_range<Iterator>(first_, last_);
  175. }
  176. bool has_value() const { return false; }
  177. void reset_value() {}
  178. void reset_bol(bool bol) { bol_ = bol; }
  179. protected:
  180. Iterator& first_;
  181. Iterator last_;
  182. next_token_functor next_token_;
  183. get_state_name_type get_state_name_;
  184. bool bol_; // helper storing whether last character was \n
  185. // silence MSVC warning C4512: assignment operator could not be generated
  186. BOOST_DELETED_FUNCTION(static_data& operator= (static_data const&))
  187. };
  188. ///////////////////////////////////////////////////////////////////////
  189. // doesn't support lexer semantic actions, but supports state
  190. template <typename Iterator, typename TokenValue>
  191. class static_data<Iterator, mpl::false_, mpl::true_, TokenValue>
  192. : public static_data<Iterator, mpl::false_, mpl::false_, TokenValue>
  193. {
  194. protected:
  195. typedef static_data<Iterator, mpl::false_, mpl::false_, TokenValue> base_type;
  196. typedef typename base_type::char_type char_type;
  197. public:
  198. typedef Iterator base_iterator_type;
  199. typedef iterator_range<Iterator> token_value_type;
  200. typedef token_value_type get_value_type;
  201. typedef typename base_type::state_type state_type;
  202. typedef typename base_type::state_name_type state_name_type;
  203. typedef typename base_type::semantic_actions_type
  204. semantic_actions_type;
  205. // initialize the shared data
  206. template <typename IterData>
  207. static_data (IterData const& data, Iterator& first
  208. , Iterator const& last)
  209. : base_type(data, first, last), state_(0)
  210. , num_states_(data.num_states_) {}
  211. // The following functions are used by the implementation of the
  212. // placeholder '_state'.
  213. void set_state_name (char_type const* new_state)
  214. {
  215. std::size_t state_id = lexertl::detail::get_state_id(new_state
  216. , this->get_state_name_, num_states_);
  217. // if the following assertion fires you've probably been using
  218. // a lexer state name which was not defined in your token
  219. // definition
  220. BOOST_ASSERT(state_id != boost::lexer::npos);
  221. if (state_id != boost::lexer::npos)
  222. state_ = state_id;
  223. }
  224. char_type const* get_state_name() const
  225. {
  226. return this->get_state_name_(state_);
  227. }
  228. std::size_t get_state_id(char_type const* state) const
  229. {
  230. return lexertl::detail::get_state_id(state
  231. , this->get_state_name_, num_states_);
  232. }
  233. // the functions next() and get_state() are used by the functor
  234. // implementation below
  235. // The function next() tries to match the next token from the
  236. // underlying input sequence.
  237. std::size_t next(Iterator& end, std::size_t& unique_id, bool& prev_bol)
  238. {
  239. prev_bol = this->bol_;
  240. return this->next_token_(state_, this->bol_, end, this->last_
  241. , unique_id);
  242. }
  243. std::size_t& get_state() { return state_; }
  244. void set_state(std::size_t state) { state_ = state; }
  245. protected:
  246. std::size_t state_;
  247. std::size_t num_states_;
  248. // silence MSVC warning C4512: assignment operator could not be generated
  249. BOOST_DELETED_FUNCTION(static_data& operator= (static_data const&))
  250. };
  251. ///////////////////////////////////////////////////////////////////////
  252. // does support actors, but may have no state
  253. template <typename Iterator, typename HasState, typename TokenValue>
  254. class static_data<Iterator, mpl::true_, HasState, TokenValue>
  255. : public static_data<Iterator, mpl::false_, HasState, TokenValue>
  256. {
  257. public:
  258. typedef semantic_actions<Iterator, HasState, static_data>
  259. semantic_actions_type;
  260. protected:
  261. typedef static_data<Iterator, mpl::false_, HasState, TokenValue>
  262. base_type;
  263. typedef typename base_type::char_type char_type;
  264. typedef typename semantic_actions_type::functor_wrapper_type
  265. functor_wrapper_type;
  266. public:
  267. typedef Iterator base_iterator_type;
  268. typedef TokenValue token_value_type;
  269. typedef TokenValue const& get_value_type;
  270. typedef typename base_type::state_type state_type;
  271. typedef typename base_type::state_name_type state_name_type;
  272. typedef detail::wrap_action<functor_wrapper_type
  273. , Iterator, static_data, std::size_t> wrap_action_type;
  274. template <typename IterData>
  275. static_data (IterData const& data, Iterator& first
  276. , Iterator const& last)
  277. : base_type(data, first, last)
  278. , actions_(data.actions_), hold_()
  279. , value_(iterator_range<Iterator>(first, last))
  280. , has_value_(false)
  281. , has_hold_(false)
  282. {}
  283. // invoke attached semantic actions, if defined
  284. BOOST_SCOPED_ENUM(pass_flags) invoke_actions(std::size_t state
  285. , std::size_t& id, std::size_t unique_id, Iterator& end)
  286. {
  287. return actions_.invoke_actions(state, id, unique_id, end, *this);
  288. }
  289. // The function less() is used by the implementation of the support
  290. // function lex::less(). Its functionality is equivalent to flex'
  291. // function yyless(): it returns an iterator positioned to the
  292. // nth input character beyond the current start iterator (i.e. by
  293. // assigning the return value to the placeholder '_end' it is
  294. // possible to return all but the first n characters of the current
  295. // token back to the input stream).
  296. Iterator const& less(Iterator& it, int n)
  297. {
  298. it = this->get_first();
  299. std::advance(it, n);
  300. return it;
  301. }
  302. // The function more() is used by the implementation of the support
  303. // function lex::more(). Its functionality is equivalent to flex'
  304. // function yymore(): it tells the lexer that the next time it
  305. // matches a rule, the corresponding token should be appended onto
  306. // the current token value rather than replacing it.
  307. void more()
  308. {
  309. hold_ = this->get_first();
  310. has_hold_ = true;
  311. }
  312. // The function lookahead() is used by the implementation of the
  313. // support function lex::lookahead. It can be used to implement
  314. // lookahead for lexer engines not supporting constructs like flex'
  315. // a/b (match a, but only when followed by b)
  316. bool lookahead(std::size_t id, std::size_t state = std::size_t(~0))
  317. {
  318. Iterator end = end_;
  319. std::size_t unique_id = boost::lexer::npos;
  320. bool bol = this->bol_;
  321. if (std::size_t(~0) == state)
  322. state = this->state_;
  323. return id == this->next_token_(
  324. state, bol, end, this->get_eoi(), unique_id);
  325. }
  326. // The adjust_start() and revert_adjust_start() are helper
  327. // functions needed to implement the functionality required for
  328. // lex::more(). It is called from the functor body below.
  329. bool adjust_start()
  330. {
  331. if (!has_hold_)
  332. return false;
  333. std::swap(this->get_first(), hold_);
  334. has_hold_ = false;
  335. return true;
  336. }
  337. void revert_adjust_start()
  338. {
  339. // this will be called only if adjust_start above returned true
  340. std::swap(this->get_first(), hold_);
  341. has_hold_ = true;
  342. }
  343. TokenValue const& get_value() const
  344. {
  345. if (!has_value_) {
  346. value_ = iterator_range<Iterator>(this->get_first(), end_);
  347. has_value_ = true;
  348. }
  349. return value_;
  350. }
  351. template <typename Value>
  352. void set_value(Value const& val)
  353. {
  354. value_ = val;
  355. has_value_ = true;
  356. }
  357. void set_end(Iterator const& it)
  358. {
  359. end_ = it;
  360. }
  361. bool has_value() const { return has_value_; }
  362. void reset_value() { has_value_ = false; }
  363. protected:
  364. semantic_actions_type const& actions_;
  365. Iterator hold_; // iterator needed to support lex::more()
  366. Iterator end_; // iterator pointing to end of matched token
  367. mutable TokenValue value_; // token value to use
  368. mutable bool has_value_; // 'true' if value_ is valid
  369. bool has_hold_; // 'true' if hold_ is valid
  370. // silence MSVC warning C4512: assignment operator could not be generated
  371. BOOST_DELETED_FUNCTION(static_data& operator= (static_data const&))
  372. };
  373. ///////////////////////////////////////////////////////////////////////
  374. // does support lexer semantic actions, may support state, is used for
  375. // position_token exposing exactly one type
  376. template <typename Iterator, typename HasState, typename TokenValue>
  377. class static_data<Iterator, mpl::true_, HasState, boost::optional<TokenValue> >
  378. : public static_data<Iterator, mpl::false_, HasState, TokenValue>
  379. {
  380. public:
  381. typedef semantic_actions<Iterator, HasState, static_data>
  382. semantic_actions_type;
  383. protected:
  384. typedef static_data<Iterator, mpl::false_, HasState, TokenValue>
  385. base_type;
  386. typedef typename base_type::char_type char_type;
  387. typedef typename semantic_actions_type::functor_wrapper_type
  388. functor_wrapper_type;
  389. public:
  390. typedef Iterator base_iterator_type;
  391. typedef boost::optional<TokenValue> token_value_type;
  392. typedef boost::optional<TokenValue> const& get_value_type;
  393. typedef typename base_type::state_type state_type;
  394. typedef typename base_type::state_name_type state_name_type;
  395. typedef detail::wrap_action<functor_wrapper_type
  396. , Iterator, static_data, std::size_t> wrap_action_type;
  397. template <typename IterData>
  398. static_data (IterData const& data_, Iterator& first, Iterator const& last)
  399. : base_type(data_, first, last)
  400. , actions_(data_.actions_), hold_()
  401. , has_value_(false), has_hold_(false)
  402. {
  403. spirit::traits::assign_to(first, last, value_);
  404. has_value_ = true;
  405. }
  406. // invoke attached semantic actions, if defined
  407. BOOST_SCOPED_ENUM(pass_flags) invoke_actions(std::size_t state
  408. , std::size_t& id, std::size_t unique_id, Iterator& end)
  409. {
  410. return actions_.invoke_actions(state, id, unique_id, end, *this);
  411. }
  412. // The function less() is used by the implementation of the support
  413. // function lex::less(). Its functionality is equivalent to flex'
  414. // function yyless(): it returns an iterator positioned to the
  415. // nth input character beyond the current start iterator (i.e. by
  416. // assigning the return value to the placeholder '_end' it is
  417. // possible to return all but the first n characters of the current
  418. // token back to the input stream).
  419. Iterator const& less(Iterator& it, int n)
  420. {
  421. it = this->get_first();
  422. std::advance(it, n);
  423. return it;
  424. }
  425. // The function more() is used by the implementation of the support
  426. // function lex::more(). Its functionality is equivalent to flex'
  427. // function yymore(): it tells the lexer that the next time it
  428. // matches a rule, the corresponding token should be appended onto
  429. // the current token value rather than replacing it.
  430. void more()
  431. {
  432. hold_ = this->get_first();
  433. has_hold_ = true;
  434. }
  435. // The function lookahead() is used by the implementation of the
  436. // support function lex::lookahead. It can be used to implement
  437. // lookahead for lexer engines not supporting constructs like flex'
  438. // a/b (match a, but only when followed by b)
  439. bool lookahead(std::size_t id, std::size_t state = std::size_t(~0))
  440. {
  441. Iterator end = end_;
  442. std::size_t unique_id = boost::lexer::npos;
  443. bool bol = this->bol_;
  444. if (std::size_t(~0) == state)
  445. state = this->state_;
  446. return id == this->next_token_(
  447. state, bol, end, this->get_eoi(), unique_id);
  448. }
  449. // The adjust_start() and revert_adjust_start() are helper
  450. // functions needed to implement the functionality required for
  451. // lex::more(). It is called from the functor body below.
  452. bool adjust_start()
  453. {
  454. if (!has_hold_)
  455. return false;
  456. std::swap(this->get_first(), hold_);
  457. has_hold_ = false;
  458. return true;
  459. }
  460. void revert_adjust_start()
  461. {
  462. // this will be called only if adjust_start above returned true
  463. std::swap(this->get_first(), hold_);
  464. has_hold_ = true;
  465. }
  466. TokenValue const& get_value() const
  467. {
  468. if (!has_value_) {
  469. spirit::traits::assign_to(this->get_first(), end_, value_);
  470. has_value_ = true;
  471. }
  472. return value_;
  473. }
  474. template <typename Value>
  475. void set_value(Value const& val)
  476. {
  477. value_ = val;
  478. has_value_ = true;
  479. }
  480. void set_end(Iterator const& it)
  481. {
  482. end_ = it;
  483. }
  484. bool has_value() const { return has_value_; }
  485. void reset_value() { has_value_ = false; }
  486. protected:
  487. semantic_actions_type const& actions_;
  488. Iterator hold_; // iterator needed to support lex::more()
  489. Iterator end_; // iterator pointing to end of matched token
  490. mutable token_value_type value_; // token value to use
  491. mutable bool has_value_; // 'true' if value_ is valid
  492. bool has_hold_; // 'true' if hold_ is valid
  493. // silence MSVC warning C4512: assignment operator could not be generated
  494. BOOST_DELETED_FUNCTION(static_data& operator= (static_data const&))
  495. };
  496. }
  497. }}}}
  498. #endif