123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254 |
- // Copyright (c) 2001-2011 Hartmut Kaiser
- //
- // Distributed under the Boost Software License, Version 1.0. (See accompanying
- // file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
- #if !defined(BOOST_SPIRIT_LEXERTL_ITERATOR_TOKENISER_MARCH_22_2007_0859AM)
- #define BOOST_SPIRIT_LEXERTL_ITERATOR_TOKENISER_MARCH_22_2007_0859AM
- #if defined(_MSC_VER)
- #pragma once
- #endif
- #include <boost/spirit/home/support/detail/lexer/state_machine.hpp>
- #include <boost/spirit/home/support/detail/lexer/consts.hpp>
- #include <boost/spirit/home/support/detail/lexer/size_t.hpp>
- #include <boost/spirit/home/support/detail/lexer/char_traits.hpp>
- #include <iterator> // for std::iterator_traits
- #include <vector>
- namespace boost { namespace spirit { namespace lex { namespace lexertl
- {
- ///////////////////////////////////////////////////////////////////////////
- template<typename Iterator>
- class basic_iterator_tokeniser
- {
- public:
- typedef std::vector<std::size_t> size_t_vector;
- typedef typename std::iterator_traits<Iterator>::value_type char_type;
- static std::size_t next (
- boost::lexer::basic_state_machine<char_type> const& state_machine_
- , std::size_t &dfa_state_, bool& bol_, Iterator &start_token_
- , Iterator const& end_, std::size_t& unique_id_)
- {
- if (start_token_ == end_)
- {
- unique_id_ = boost::lexer::npos;
- return 0;
- }
- bool bol = bol_;
- boost::lexer::detail::internals const& internals_ =
- state_machine_.data();
- again:
- std::size_t const* lookup_ = &internals_._lookup[dfa_state_]->
- front ();
- std::size_t dfa_alphabet_ = internals_._dfa_alphabet[dfa_state_];
- std::size_t const* dfa_ = &internals_._dfa[dfa_state_]->front ();
- std::size_t const* ptr_ = dfa_ + dfa_alphabet_;
- Iterator curr_ = start_token_;
- bool end_state_ = *ptr_ != 0;
- std::size_t id_ = *(ptr_ + boost::lexer::id_index);
- std::size_t uid_ = *(ptr_ + boost::lexer::unique_id_index);
- std::size_t end_start_state_ = dfa_state_;
- bool end_bol_ = bol_;
- Iterator end_token_ = start_token_;
- while (curr_ != end_)
- {
- std::size_t const BOL_state_ = ptr_[boost::lexer::bol_index];
- std::size_t const EOL_state_ = ptr_[boost::lexer::eol_index];
- if (BOL_state_ && bol)
- {
- ptr_ = &dfa_[BOL_state_ * dfa_alphabet_];
- }
- else if (EOL_state_ && *curr_ == '\n')
- {
- ptr_ = &dfa_[EOL_state_ * dfa_alphabet_];
- }
- else
- {
- typedef typename
- std::iterator_traits<Iterator>::value_type
- value_type;
- typedef typename
- boost::lexer::char_traits<value_type>::index_type
- index_type;
- index_type index =
- boost::lexer::char_traits<value_type>::call(*curr_++);
- bol = (index == '\n') ? true : false;
- std::size_t const state_ = ptr_[
- lookup_[static_cast<std::size_t>(index)]];
- if (state_ == 0)
- {
- break;
- }
- ptr_ = &dfa_[state_ * dfa_alphabet_];
- }
- if (*ptr_)
- {
- end_state_ = true;
- id_ = *(ptr_ + boost::lexer::id_index);
- uid_ = *(ptr_ + boost::lexer::unique_id_index);
- end_start_state_ = *(ptr_ + boost::lexer::state_index);
- end_bol_ = bol;
- end_token_ = curr_;
- }
- }
- std::size_t const EOL_state_ = ptr_[boost::lexer::eol_index];
- if (EOL_state_ && curr_ == end_)
- {
- ptr_ = &dfa_[EOL_state_ * dfa_alphabet_];
- if (*ptr_)
- {
- end_state_ = true;
- id_ = *(ptr_ + boost::lexer::id_index);
- uid_ = *(ptr_ + boost::lexer::unique_id_index);
- end_start_state_ = *(ptr_ + boost::lexer::state_index);
- end_bol_ = bol;
- end_token_ = curr_;
- }
- }
- if (end_state_) {
- // return longest match
- dfa_state_ = end_start_state_;
- start_token_ = end_token_;
- if (id_ == 0)
- {
- bol = end_bol_;
- goto again;
- }
- else
- {
- bol_ = end_bol_;
- }
- }
- else {
- bol_ = (*start_token_ == '\n') ? true : false;
- id_ = boost::lexer::npos;
- uid_ = boost::lexer::npos;
- }
- unique_id_ = uid_;
- return id_;
- }
- ///////////////////////////////////////////////////////////////////////
- static std::size_t next (
- boost::lexer::basic_state_machine<char_type> const& state_machine_
- , bool& bol_, Iterator &start_token_, Iterator const& end_
- , std::size_t& unique_id_)
- {
- if (start_token_ == end_)
- {
- unique_id_ = boost::lexer::npos;
- return 0;
- }
- bool bol = bol_;
- std::size_t const* lookup_ = &state_machine_.data()._lookup[0]->front();
- std::size_t dfa_alphabet_ = state_machine_.data()._dfa_alphabet[0];
- std::size_t const* dfa_ = &state_machine_.data()._dfa[0]->front ();
- std::size_t const* ptr_ = dfa_ + dfa_alphabet_;
- Iterator curr_ = start_token_;
- bool end_state_ = *ptr_ != 0;
- std::size_t id_ = *(ptr_ + boost::lexer::id_index);
- std::size_t uid_ = *(ptr_ + boost::lexer::unique_id_index);
- bool end_bol_ = bol_;
- Iterator end_token_ = start_token_;
- while (curr_ != end_)
- {
- std::size_t const BOL_state_ = ptr_[boost::lexer::bol_index];
- std::size_t const EOL_state_ = ptr_[boost::lexer::eol_index];
- if (BOL_state_ && bol)
- {
- ptr_ = &dfa_[BOL_state_ * dfa_alphabet_];
- }
- else if (EOL_state_ && *curr_ == '\n')
- {
- ptr_ = &dfa_[EOL_state_ * dfa_alphabet_];
- }
- else
- {
- typedef typename
- std::iterator_traits<Iterator>::value_type
- value_type;
- typedef typename
- boost::lexer::char_traits<value_type>::index_type
- index_type;
- index_type index =
- boost::lexer::char_traits<value_type>::call(*curr_++);
- bol = (index == '\n') ? true : false;
- std::size_t const state_ = ptr_[
- lookup_[static_cast<std::size_t>(index)]];
- if (state_ == 0)
- {
- break;
- }
- ptr_ = &dfa_[state_ * dfa_alphabet_];
- }
- if (*ptr_)
- {
- end_state_ = true;
- id_ = *(ptr_ + boost::lexer::id_index);
- uid_ = *(ptr_ + boost::lexer::unique_id_index);
- end_bol_ = bol;
- end_token_ = curr_;
- }
- }
- std::size_t const EOL_state_ = ptr_[boost::lexer::eol_index];
- if (EOL_state_ && curr_ == end_)
- {
- ptr_ = &dfa_[EOL_state_ * dfa_alphabet_];
- if (*ptr_)
- {
- end_state_ = true;
- id_ = *(ptr_ + boost::lexer::id_index);
- uid_ = *(ptr_ + boost::lexer::unique_id_index);
- end_bol_ = bol;
- end_token_ = curr_;
- }
- }
- if (end_state_) {
- // return longest match
- bol_ = end_bol_;
- start_token_ = end_token_;
- }
- else {
- bol_ = *start_token_ == '\n';
- id_ = boost::lexer::npos;
- uid_ = boost::lexer::npos;
- }
- unique_id_ = uid_;
- return id_;
- }
- };
- }}}}
- #endif
|