iterator_tokenizer.hpp 8.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254
  1. // Copyright (c) 2001-2011 Hartmut Kaiser
  2. //
  3. // Distributed under the Boost Software License, Version 1.0. (See accompanying
  4. // file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
  5. #if !defined(BOOST_SPIRIT_LEXERTL_ITERATOR_TOKENISER_MARCH_22_2007_0859AM)
  6. #define BOOST_SPIRIT_LEXERTL_ITERATOR_TOKENISER_MARCH_22_2007_0859AM
  7. #if defined(_MSC_VER)
  8. #pragma once
  9. #endif
  10. #include <boost/spirit/home/support/detail/lexer/state_machine.hpp>
  11. #include <boost/spirit/home/support/detail/lexer/consts.hpp>
  12. #include <boost/spirit/home/support/detail/lexer/size_t.hpp>
  13. #include <boost/spirit/home/support/detail/lexer/char_traits.hpp>
  14. #include <iterator> // for std::iterator_traits
  15. #include <vector>
  16. namespace boost { namespace spirit { namespace lex { namespace lexertl
  17. {
  18. ///////////////////////////////////////////////////////////////////////////
  19. template<typename Iterator>
  20. class basic_iterator_tokeniser
  21. {
  22. public:
  23. typedef std::vector<std::size_t> size_t_vector;
  24. typedef typename std::iterator_traits<Iterator>::value_type char_type;
  25. static std::size_t next (
  26. boost::lexer::basic_state_machine<char_type> const& state_machine_
  27. , std::size_t &dfa_state_, bool& bol_, Iterator &start_token_
  28. , Iterator const& end_, std::size_t& unique_id_)
  29. {
  30. if (start_token_ == end_)
  31. {
  32. unique_id_ = boost::lexer::npos;
  33. return 0;
  34. }
  35. bool bol = bol_;
  36. boost::lexer::detail::internals const& internals_ =
  37. state_machine_.data();
  38. again:
  39. std::size_t const* lookup_ = &internals_._lookup[dfa_state_]->
  40. front ();
  41. std::size_t dfa_alphabet_ = internals_._dfa_alphabet[dfa_state_];
  42. std::size_t const* dfa_ = &internals_._dfa[dfa_state_]->front ();
  43. std::size_t const* ptr_ = dfa_ + dfa_alphabet_;
  44. Iterator curr_ = start_token_;
  45. bool end_state_ = *ptr_ != 0;
  46. std::size_t id_ = *(ptr_ + boost::lexer::id_index);
  47. std::size_t uid_ = *(ptr_ + boost::lexer::unique_id_index);
  48. std::size_t end_start_state_ = dfa_state_;
  49. bool end_bol_ = bol_;
  50. Iterator end_token_ = start_token_;
  51. while (curr_ != end_)
  52. {
  53. std::size_t const BOL_state_ = ptr_[boost::lexer::bol_index];
  54. std::size_t const EOL_state_ = ptr_[boost::lexer::eol_index];
  55. if (BOL_state_ && bol)
  56. {
  57. ptr_ = &dfa_[BOL_state_ * dfa_alphabet_];
  58. }
  59. else if (EOL_state_ && *curr_ == '\n')
  60. {
  61. ptr_ = &dfa_[EOL_state_ * dfa_alphabet_];
  62. }
  63. else
  64. {
  65. typedef typename
  66. std::iterator_traits<Iterator>::value_type
  67. value_type;
  68. typedef typename
  69. boost::lexer::char_traits<value_type>::index_type
  70. index_type;
  71. index_type index =
  72. boost::lexer::char_traits<value_type>::call(*curr_++);
  73. bol = (index == '\n') ? true : false;
  74. std::size_t const state_ = ptr_[
  75. lookup_[static_cast<std::size_t>(index)]];
  76. if (state_ == 0)
  77. {
  78. break;
  79. }
  80. ptr_ = &dfa_[state_ * dfa_alphabet_];
  81. }
  82. if (*ptr_)
  83. {
  84. end_state_ = true;
  85. id_ = *(ptr_ + boost::lexer::id_index);
  86. uid_ = *(ptr_ + boost::lexer::unique_id_index);
  87. end_start_state_ = *(ptr_ + boost::lexer::state_index);
  88. end_bol_ = bol;
  89. end_token_ = curr_;
  90. }
  91. }
  92. std::size_t const EOL_state_ = ptr_[boost::lexer::eol_index];
  93. if (EOL_state_ && curr_ == end_)
  94. {
  95. ptr_ = &dfa_[EOL_state_ * dfa_alphabet_];
  96. if (*ptr_)
  97. {
  98. end_state_ = true;
  99. id_ = *(ptr_ + boost::lexer::id_index);
  100. uid_ = *(ptr_ + boost::lexer::unique_id_index);
  101. end_start_state_ = *(ptr_ + boost::lexer::state_index);
  102. end_bol_ = bol;
  103. end_token_ = curr_;
  104. }
  105. }
  106. if (end_state_) {
  107. // return longest match
  108. dfa_state_ = end_start_state_;
  109. start_token_ = end_token_;
  110. if (id_ == 0)
  111. {
  112. bol = end_bol_;
  113. goto again;
  114. }
  115. else
  116. {
  117. bol_ = end_bol_;
  118. }
  119. }
  120. else {
  121. bol_ = (*start_token_ == '\n') ? true : false;
  122. id_ = boost::lexer::npos;
  123. uid_ = boost::lexer::npos;
  124. }
  125. unique_id_ = uid_;
  126. return id_;
  127. }
  128. ///////////////////////////////////////////////////////////////////////
  129. static std::size_t next (
  130. boost::lexer::basic_state_machine<char_type> const& state_machine_
  131. , bool& bol_, Iterator &start_token_, Iterator const& end_
  132. , std::size_t& unique_id_)
  133. {
  134. if (start_token_ == end_)
  135. {
  136. unique_id_ = boost::lexer::npos;
  137. return 0;
  138. }
  139. bool bol = bol_;
  140. std::size_t const* lookup_ = &state_machine_.data()._lookup[0]->front();
  141. std::size_t dfa_alphabet_ = state_machine_.data()._dfa_alphabet[0];
  142. std::size_t const* dfa_ = &state_machine_.data()._dfa[0]->front ();
  143. std::size_t const* ptr_ = dfa_ + dfa_alphabet_;
  144. Iterator curr_ = start_token_;
  145. bool end_state_ = *ptr_ != 0;
  146. std::size_t id_ = *(ptr_ + boost::lexer::id_index);
  147. std::size_t uid_ = *(ptr_ + boost::lexer::unique_id_index);
  148. bool end_bol_ = bol_;
  149. Iterator end_token_ = start_token_;
  150. while (curr_ != end_)
  151. {
  152. std::size_t const BOL_state_ = ptr_[boost::lexer::bol_index];
  153. std::size_t const EOL_state_ = ptr_[boost::lexer::eol_index];
  154. if (BOL_state_ && bol)
  155. {
  156. ptr_ = &dfa_[BOL_state_ * dfa_alphabet_];
  157. }
  158. else if (EOL_state_ && *curr_ == '\n')
  159. {
  160. ptr_ = &dfa_[EOL_state_ * dfa_alphabet_];
  161. }
  162. else
  163. {
  164. typedef typename
  165. std::iterator_traits<Iterator>::value_type
  166. value_type;
  167. typedef typename
  168. boost::lexer::char_traits<value_type>::index_type
  169. index_type;
  170. index_type index =
  171. boost::lexer::char_traits<value_type>::call(*curr_++);
  172. bol = (index == '\n') ? true : false;
  173. std::size_t const state_ = ptr_[
  174. lookup_[static_cast<std::size_t>(index)]];
  175. if (state_ == 0)
  176. {
  177. break;
  178. }
  179. ptr_ = &dfa_[state_ * dfa_alphabet_];
  180. }
  181. if (*ptr_)
  182. {
  183. end_state_ = true;
  184. id_ = *(ptr_ + boost::lexer::id_index);
  185. uid_ = *(ptr_ + boost::lexer::unique_id_index);
  186. end_bol_ = bol;
  187. end_token_ = curr_;
  188. }
  189. }
  190. std::size_t const EOL_state_ = ptr_[boost::lexer::eol_index];
  191. if (EOL_state_ && curr_ == end_)
  192. {
  193. ptr_ = &dfa_[EOL_state_ * dfa_alphabet_];
  194. if (*ptr_)
  195. {
  196. end_state_ = true;
  197. id_ = *(ptr_ + boost::lexer::id_index);
  198. uid_ = *(ptr_ + boost::lexer::unique_id_index);
  199. end_bol_ = bol;
  200. end_token_ = curr_;
  201. }
  202. }
  203. if (end_state_) {
  204. // return longest match
  205. bol_ = end_bol_;
  206. start_token_ = end_token_;
  207. }
  208. else {
  209. bol_ = *start_token_ == '\n';
  210. id_ = boost::lexer::npos;
  211. uid_ = boost::lexer::npos;
  212. }
  213. unique_id_ = uid_;
  214. return id_;
  215. }
  216. };
  217. }}}}
  218. #endif