123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431 |
- [/
- Copyright 2006-2007 John Maddock.
- Distributed under the Boost Software License, Version 1.0.
- (See accompanying file LICENSE_1_0.txt or copy at
- http://www.boost.org/LICENSE_1_0.txt).
- ]
- [section:regex_token_iterator regex_token_iterator]
- The template class [regex_token_iterator] is an iterator adapter; that is to
- say it represents a new view of an existing iterator sequence,
- by enumerating all the occurrences of a regular expression within that
- sequence, and presenting one or more character sequence for each match found.
- Each position enumerated by the iterator is a [sub_match] object that represents
- what matched a particular sub-expression within the regular expression.
- When class [regex_token_iterator] is used to enumerate a single sub-expression
- with index -1, then the iterator performs field splitting: that is
- to say it enumerates one character sequence for each section of the character
- container sequence that does not match the regular expression specified.
- template <class BidirectionalIterator,
- class charT = iterator_traits<BidirectionalIterator>::value_type,
- class traits = regex_traits<charT> >
- class regex_token_iterator
- {
- public:
- typedef basic_regex<charT, traits> regex_type;
- typedef sub_match<BidirectionalIterator> value_type;
- typedef typename iterator_traits<BidirectionalIterator>::difference_type difference_type;
- typedef const value_type* pointer;
- typedef const value_type& reference;
- typedef std::forward_iterator_tag iterator_category;
-
- ``[link boost_regex.regex_token_iterator.construct1 regex_token_iterator]``();
- ``[link boost_regex.regex_token_iterator.construct2 regex_token_iterator]``(BidirectionalIterator a,
- BidirectionalIterator b,
- const regex_type& re,
- int submatch = 0,
- match_flag_type m = match_default);
- ``[link boost_regex.regex_token_iterator.construct3 regex_token_iterator]``(BidirectionalIterator a,
- BidirectionalIterator b,
- const regex_type& re,
- const std::vector<int>& submatches,
- match_flag_type m = match_default);
- template <std::size_t N>
- ``[link boost_regex.regex_token_iterator.construct4 regex_token_iterator]``(BidirectionalIterator a,
- BidirectionalIterator b,
- const regex_type& re,
- const int (&submatches)[N],
- match_flag_type m = match_default);
- ``[link boost_regex.regex_token_iterator.construct5 regex_token_iterator]``(const regex_token_iterator&);
- regex_token_iterator& ``[link boost_regex.regex_token_iterator.assign operator=]``(const regex_token_iterator&);
- bool ``[link boost_regex.regex_token_iterator.op_eq operator==]``(const regex_token_iterator&)const;
- bool ``[link boost_regex.regex_token_iterator.op_ne operator!=]``(const regex_token_iterator&)const;
- const value_type& ``[link boost_regex.regex_token_iterator.op_deref operator*]``()const;
- const value_type* ``[link boost_regex.regex_token_iterator.op_arrow operator->]``()const;
- regex_token_iterator& ``[link boost_regex.regex_token_iterator.op_inc1 operator++]``();
- regex_token_iterator ``[link boost_regex.regex_token_iterator.op_inc2 operator++]``(int);
- };
- typedef regex_token_iterator<const char*> cregex_token_iterator;
- typedef regex_token_iterator<std::string::const_iterator> sregex_token_iterator;
- #ifndef BOOST_NO_WREGEX
- typedef regex_token_iterator<const wchar_t*> wcregex_token_iterator;
- typedef regex_token_iterator<<std::wstring::const_iterator> wsregex_token_iterator;
- #endif
- template <class charT, class traits>
- regex_token_iterator<const charT*, charT, traits>
- ``[link boost_regex.regex_token_iterator.make make_regex_token_iterator]``(
- const charT* p,
- const basic_regex<charT, traits>& e,
- int submatch = 0,
- regex_constants::match_flag_type m = regex_constants::match_default);
-
- template <class charT, class traits, class ST, class SA>
- regex_token_iterator<typename std::basic_string<charT, ST, SA>::const_iterator, charT, traits>
- ``[link boost_regex.regex_token_iterator.make make_regex_token_iterator]``(
- const std::basic_string<charT, ST, SA>& p,
- const basic_regex<charT, traits>& e,
- int submatch = 0,
- regex_constants::match_flag_type m = regex_constants::match_default);
-
- template <class charT, class traits, std::size_t N>
- regex_token_iterator<const charT*, charT, traits>
- ``[link boost_regex.regex_token_iterator.make make_regex_token_iterator]``(
- const charT* p,
- const basic_regex<charT, traits>& e,
- const int (&submatch)[N],
- regex_constants::match_flag_type m = regex_constants::match_default);
-
- template <class charT, class traits, class ST, class SA, std::size_t N>
- regex_token_iterator<typename std::basic_string<charT, ST, SA>::const_iterator, charT, traits>
- ``[link boost_regex.regex_token_iterator.make make_regex_token_iterator]``(
- const std::basic_string<charT, ST, SA>& p,
- const basic_regex<charT, traits>& e,
- const int (&submatch)[N],
- regex_constants::match_flag_type m = regex_constants::match_default);
-
- template <class charT, class traits>
- regex_token_iterator<const charT*, charT, traits>
- ``[link boost_regex.regex_token_iterator.make make_regex_token_iterator]``(
- const charT* p,
- const basic_regex<charT, traits>& e,
- const std::vector<int>& submatch,
- regex_constants::match_flag_type m = regex_constants::match_default);
-
- template <class charT, class traits, class ST, class SA>
- regex_token_iterator<
- typename std::basic_string<charT, ST, SA>::const_iterator, charT, traits>
- ``[link boost_regex.regex_token_iterator.make make_regex_token_iterator]``(
- const std::basic_string<charT, ST, SA>& p,
- const basic_regex<charT, traits>& e,
- const std::vector<int>& submatch,
- regex_constants::match_flag_type m = regex_constants::match_default);
- [h4 Description]
- [#boost_regex.regex_token_iterator.construct1]
- regex_token_iterator();
- [*Effects]: constructs an end of sequence iterator.
- [#boost_regex.regex_token_iterator.construct2]
- regex_token_iterator(BidirectionalIterator a,
- BidirectionalIterator b,
- const regex_type& re,
- int submatch = 0,
- match_flag_type m = match_default);
- [*Preconditions]: `!re.empty()`. Object /re/ shall exist for the lifetime of
- the iterator constructed from it.
- [*Effects]: constructs a [regex_token_iterator] that will enumerate one string for
- each regular expression match of the expression /re/ found within the sequence \[a,b),
- using match flags /m/ (see [match_flag_type]). The string enumerated is the sub-expression /submatch/
- for each match found; if /submatch/ is -1, then enumerates all the text
- sequences that did not match the expression /re/ (that is to performs field
- splitting).
- [*Throws]: `std::runtime_error` if the complexity of matching the expression against
- an N character string begins to exceed O(N[super 2]), or if the program runs
- out of stack space while matching the expression (if Boost.Regex is configured
- in recursive mode), or if the matcher exhausts its permitted memory
- allocation (if Boost.Regex is configured in non-recursive mode).
- [#boost_regex.regex_token_iterator.construct3]
- regex_token_iterator(BidirectionalIterator a,
- BidirectionalIterator b,
- const regex_type& re,
- const std::vector<int>& submatches,
- match_flag_type m = match_default);
- [*Preconditions]: `submatches.size() && !re.empty()`. Object /re/ shall
- exist for the lifetime of the iterator constructed from it.
- [*Effects]: constructs a [regex_token_iterator] that will enumerate
- `submatches.size()` strings for each regular expression match of
- the expression /re/ found within the sequence \[a,b), using match flags /m/
- (see [match_flag_type]). For each match found one string will be enumerated
- for each sub-expression index contained within submatches vector; if
- `submatches[0]` is -1, then the first string enumerated for each match will be
- all of the text from end of the last match to the start of the current match,
- in addition there will be one extra string enumerated when no more matches can
- be found: from the end of the last match found, to the end of the underlying sequence.
- [*Throws]: `std::runtime_error` if the complexity of matching the expression
- against an N character string begins to exceed O(N[super 2]), or if the
- program runs out of stack space while matching the expression (if Boost.Regex is
- configured in recursive mode), or if the matcher exhausts its permitted memory
- allocation (if Boost.Regex is configured in non-recursive mode).
- [#boost_regex.regex_token_iterator.construct4]
- template <std::size_t N>
- regex_token_iterator(BidirectionalIterator a,
- BidirectionalIterator b,
- const regex_type& re,
- const int (&submatches)[R],
- match_flag_type m = match_default);
- [*Preconditions]: `!re.empty()`. Object /re/ shall exist for the lifetime of the iterator constructed from it.
- [*Effects]: constructs a [regex_token_iterator] that will enumerate /R/ strings
- for each regular expression match of the expression /re/ found within the sequence
- \[a,b), using match flags /m/ (see [match_flag_type]). For each match found one
- string will be enumerated for each sub-expression index contained within the
- /submatches/ array; if `submatches[0]` is -1, then the first string enumerated for
- each match will be all of the text from end of the last match to the start
- of the current match, in addition there will be one extra string enumerated when
- no more matches can be found: from the end of the last match found, to
- the end of the underlying sequence.
- [*Throws]: `std::runtime_error` if the complexity of matching the expression
- against an N character string begins to exceed O(N[super 2]), or if the
- program runs out of stack space while matching the expression (if Boost.Regex
- is configured in recursive mode), or if the matcher exhausts its
- permitted memory allocation (if Boost.Regex is configured in non-recursive mode).
- [#boost_regex.regex_token_iterator.construct5]
- regex_token_iterator(const regex_token_iterator& that);
- [*Effects]: constructs a copy of `that`.
- [*Postconditions]: `*this == that`.
- [#boost_regex.regex_token_iterator.assign]
- regex_token_iterator& operator=(const regex_token_iterator& that);
- [*Effects]: sets `*this` to be equal to `that`.
- [*Postconditions]: `*this == that`.
- [#boost_regex.regex_token_iterator.op_eq]
- bool operator==(const regex_token_iterator&)const;
- [*Effects]: returns true if `*this` is the same position as `that`.
- [#boost_regex.regex_token_iterator.op_ne]
- bool operator!=(const regex_token_iterator&)const;
- [*Effects]: returns `!(*this == that)`.
- [#boost_regex.regex_token_iterator.op_deref]
- const value_type& operator*()const;
- [*Effects]: returns the current character sequence being enumerated.
- [#boost_regex.regex_token_iterator.op_arrow]
- const value_type* operator->()const;
- [*Effects]: returns `&(*this)`.
- [#boost_regex.regex_token_iterator.op_inc1]
- regex_token_iterator& operator++();
- [*Effects]: Moves on to the next character sequence to be enumerated.
- [*Throws]: `std::runtime_error` if the complexity of matching the expression
- against an N character string begins to exceed O(N[super 2]), or if the program
- runs out of stack space while matching the expression (if Boost.Regex is
- configured in recursive mode), or if the matcher exhausts its permitted
- memory allocation (if Boost.Regex is configured in non-recursive mode).
- [*Returns]: `*this`.
- [#boost_regex.regex_token_iterator.op_inc2]
- regex_token_iterator& operator++(int);
- [*Effects]: constructs a copy result of `*this`, then calls `++(*this)`.
- [*Returns]: result.
- [#boost_regex.regex_token_iterator.make]
- template <class charT, class traits>
- regex_token_iterator<const charT*, charT, traits>
- make_regex_token_iterator(
- const charT* p,
- const basic_regex<charT, traits>& e,
- int submatch = 0,
- regex_constants::match_flag_type m = regex_constants::match_default);
-
- template <class charT, class traits, class ST, class SA>
- regex_token_iterator<typename std::basic_string<charT, ST, SA>::const_iterator, charT, traits>
- make_regex_token_iterator(
- const std::basic_string<charT, ST, SA>& p,
- const basic_regex<charT, traits>& e,
- int submatch = 0,
- regex_constants::match_flag_type m = regex_constants::match_default);
-
- template <class charT, class traits, std::size_t N>
- regex_token_iterator<const charT*, charT, traits>
- make_regex_token_iterator(
- const charT* p,
- const basic_regex<charT, traits>& e,
- const int (&submatch)[N],
- regex_constants::match_flag_type m = regex_constants::match_default);
-
- template <class charT, class traits, class ST, class SA, std::size_t N>
- regex_token_iterator<
- typename std::basic_string<charT, ST, SA>::const_iterator, charT, traits>
- make_regex_token_iterator(
- const std::basic_string<charT, ST, SA>& p,
- const basic_regex<charT, traits>& e,
- const int (&submatch)[N],
- regex_constants::match_flag_type m = regex_constants::match_default);
-
- template <class charT, class traits>
- regex_token_iterator<const charT*, charT, traits>
- make_regex_token_iterator(
- const charT* p,
- const basic_regex<charT, traits>& e,
- const std::vector<int>& submatch,
- regex_constants::match_flag_type m = regex_constants::match_default);
-
- template <class charT, class traits, class ST, class SA>
- regex_token_iterator<
- typename std::basic_string<charT, ST, SA>::const_iterator, charT, traits>
- make_regex_token_iterator(
- const std::basic_string<charT, ST, SA>& p,
- const basic_regex<charT, traits>& e,
- const std::vector<int>& submatch,
- regex_constants::match_flag_type m = regex_constants::match_default);
- [*Effects]: returns a [regex_token_iterator] that enumerates one [sub_match]
- for each value in /submatch/ for each occurrence of regular expression /e/
- in string /p/, matched using [match_flag_type] /m/.
- [h4 Examples]
- The following example takes a string and splits it into a series of tokens:
- #include <iostream>
- #include <boost/regex.hpp>
- using namespace std;
- int main(int argc)
- {
- string s;
- do{
- if(argc == 1)
- {
- cout << "Enter text to split (or \"quit\" to exit): ";
- getline(cin, s);
- if(s == "quit") break;
- }
- else
- s = "This is a string of tokens";
- boost::regex re("\\s+");
- boost::sregex_token_iterator i(s.begin(), s.end(), re, -1);
- boost::sregex_token_iterator j;
- unsigned count = 0;
- while(i != j)
- {
- cout << *i++ << endl;
- count++;
- }
- cout << "There were " << count << " tokens found." << endl;
- }while(argc == 1);
- return 0;
- }
- The following example takes a html file and outputs a list of all the linked files:
- #include <fstream>
- #include <iostream>
- #include <iterator>
- #include <boost/regex.hpp>
- boost::regex e("<\\s*A\\s+[^>]*href\\s*=\\s*\"([^\"]*)\"",
- boost::regex::normal | boost::regbase::icase);
- void load_file(std::string& s, std::istream& is)
- {
- s.erase();
- //
- // attempt to grow string buffer to match file size,
- // this doesn't always work...
- s.reserve(is.rdbuf()->in_avail());
- char c;
- while(is.get(c))
- {
- // use logarithmic growth strategy, in case
- // in_avail (above) returned zero:
- if(s.capacity() == s.size())
- s.reserve(s.capacity() * 3);
- s.append(1, c);
- }
- }
- int main(int argc, char** argv)
- {
- std::string s;
- int i;
- for(i = 1; i < argc; ++i)
- {
- std::cout << "Findings URL's in " << argv[i] << ":" << std::endl;
- s.erase();
- std::ifstream is(argv[i]);
- load_file(s, is);
- boost::sregex_token_iterator i(s.begin(), s.end(), e, 1);
- boost::sregex_token_iterator j;
- while(i != j)
- {
- std::cout << *i++ << std::endl;
- }
- }
- //
- // alternative method:
- // test the array-literal constructor, and split out the whole
- // match as well as $1....
- //
- for(i = 1; i < argc; ++i)
- {
- std::cout << "Findings URL's in " << argv[i] << ":" << std::endl;
- s.erase();
- std::ifstream is(argv[i]);
- load_file(s, is);
- const int subs[] = {1, 0,};
- boost::sregex_token_iterator i(s.begin(), s.end(), e, subs);
- boost::sregex_token_iterator j;
- while(i != j)
- {
- std::cout << *i++ << std::endl;
- }
- }
- return 0;
- }
-
- [endsect]
|