123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382 |
- /*
- *
- * Copyright (c) 1998-2002
- * John Maddock
- *
- * Use, modification and distribution are subject to the
- * Boost Software License, Version 1.0. (See accompanying file
- * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
- *
- */
- #ifdef _MSC_VER
- #pragma warning(disable: 4996 4127)
- #endif
- #include <boost/config.hpp>
- #include <boost/regex.hpp>
- #include <boost/cregex.hpp>
- #include <boost/timer.hpp>
- #include <boost/smart_ptr.hpp>
- #include <string>
- #include <algorithm>
- #include <deque>
- #include <iterator>
- #ifdef BOOST_RE_OLD_IOSTREAM
- #include <iostream.h>
- #include <fstream.h>
- #else
- #include <iostream>
- #include <fstream>
- using std::cout;
- using std::cin;
- using std::cerr;
- using std::istream;
- using std::ostream;
- using std::endl;
- using std::ifstream;
- using std::streambuf;
- using std::getline;
- #endif
- #if defined(_WIN32) && defined(BOOST_REGEX_USE_WIN32_LOCALE)
- #include <windows.h>
- #endif
- #if (defined(_MSC_VER) && (_MSC_VER <= 1300)) || defined(__sgi)
- // maybe no Koenig lookup, use using declaration instead:
- using namespace boost;
- #endif
- #ifndef BOOST_NO_WREGEX
- ostream& operator << (ostream& os, const std::wstring& s)
- {
- std::wstring::const_iterator i, j;
- i = s.begin();
- j = s.end();
- while(i != j)
- {
- os.put(static_cast<char>(*i));
- ++i;
- }
- return os;
- }
- #endif
- template <class S>
- class string_out_iterator
- {
- public:
- typedef std::output_iterator_tag iterator_category;
- typedef void value_type;
- typedef void difference_type;
- typedef void pointer;
- typedef void reference;
- private:
- S* out;
- public:
- string_out_iterator(S& s) : out(&s) {}
- string_out_iterator& operator++() { return *this; }
- string_out_iterator& operator++(int) { return *this; }
- string_out_iterator& operator*() { return *this; }
- string_out_iterator& operator=(typename S::value_type v)
- {
- out->append(1, v);
- return *this;
- }
- };
- namespace boost{
- #if defined(BOOST_MSVC) || (defined(__BORLANDC__) && (__BORLANDC__ == 0x550)) || defined(__SGI_STL_PORT)
- //
- // problem with std::getline under MSVC6sp3
- // and C++ Builder 5.5, is this really that hard?
- istream& getline(istream& is, std::string& s)
- {
- s.erase();
- char c = static_cast<char>(is.get());
- while(c != '\n')
- {
- BOOST_ASSERT(is.good());
- s.append(1, c);
- c = static_cast<char>(is.get());
- }
- return is;
- }
- #else
- istream& getline(istream& is, std::string& s)
- {
- std::getline(is, s);
- if(s.size() && (s[s.size() -1] == '\r'))
- s.erase(s.size() - 1);
- return is;
- }
- #endif
- }
- int main(int argc, char**argv)
- {
- ifstream ifs;
- std::istream* p_in = &std::cin;
- if(argc == 2)
- {
- ifs.open(argv[1]);
- ifs.peek();
- if(!ifs.good())
- {
- cout << "Bad filename: " << argv[1] << endl;
- return -1;
- }
- p_in = &ifs;
- }
-
- boost::regex ex;
- boost::match_results<std::string::const_iterator> sm;
- #ifndef BOOST_NO_WREGEX
- std::wstring ws1, ws2;
- boost::wregex wex;
- boost::match_results<std::wstring::const_iterator> wsm;
- #endif
- boost::match_results<std::deque<char>::iterator> dm;
- std::string s1, s2, ts;
- std::deque<char> ds;
- boost::regex_tA r;
- boost::scoped_array<boost::regmatch_t> matches;
- std::size_t nsubs;
- boost::timer t;
- double tim;
- int result = 0;
- unsigned iters = 100;
- double wait_time = (std::min)(t.elapsed_min() * 1000, 0.5);
- while(true)
- {
- cout << "Enter expression (or \"quit\" to exit): ";
- boost::getline(*p_in, s1);
- if(argc == 2)
- cout << endl << s1 << endl;
- if(s1 == "quit")
- break;
- #ifndef BOOST_NO_WREGEX
- ws1.erase();
- std::copy(s1.begin(), s1.end(), string_out_iterator<std::wstring>(ws1));
- #endif
- try{
- ex.assign(s1);
- #ifndef BOOST_NO_WREGEX
- wex.assign(ws1);
- #endif
- }
- catch(std::exception& e)
- {
- cout << "Error in expression: \"" << e.what() << "\"" << endl;
- continue;
- }
- int code = regcompA(&r, s1.c_str(), boost::REG_PERL);
- if(code != 0)
- {
- char buf[256];
- regerrorA(code, &r, buf, 256);
- cout << "regcompA error: \"" << buf << "\"" << endl;
- continue;
- }
- nsubs = r.re_nsub + 1;
- matches.reset(new boost::regmatch_t[nsubs]);
- while(true)
- {
- cout << "Enter string to search (or \"quit\" to exit): ";
- boost::getline(*p_in, s2);
- if(argc == 2)
- cout << endl << s2 << endl;
- if(s2 == "quit")
- break;
- #ifndef BOOST_NO_WREGEX
- ws2.erase();
- std::copy(s2.begin(), s2.end(), string_out_iterator<std::wstring>(ws2));
- #endif
- ds.erase(ds.begin(), ds.end());
- std::copy(s2.begin(), s2.end(), std::back_inserter(ds));
- unsigned i;
- iters = 10;
- tim = 1.1;
- #if defined(_WIN32) && defined(BOOST_REGEX_USE_WIN32_LOCALE)
- MSG msg;
- PeekMessage(&msg, 0, 0, 0, 0);
- Sleep(0);
- #endif
- // cache load:
- regex_search(s2, sm, ex);
- // measure time interval for basic_regex<char>
- do{
- iters *= static_cast<unsigned>((tim > 0.001) ? (1.1/tim) : 100);
- t.restart();
- for(i =0; i < iters; ++i)
- {
- result = regex_search(s2, sm, ex);
- }
- tim = t.elapsed();
- }while(tim < wait_time);
- cout << "regex time: " << (tim * 1000000 / iters) << "us" << endl;
- if(result)
- {
- for(i = 0; i < sm.size(); ++i)
- {
- ts = sm[i];
- cout << "\tmatch " << i << ": \"";
- cout << ts;
- cout << "\" (matched=" << sm[i].matched << ")" << endl;
- }
- cout << "\tmatch $`: \"";
- cout << std::string(sm[-1]);
- cout << "\" (matched=" << sm[-1].matched << ")" << endl;
- cout << "\tmatch $': \"";
- cout << std::string(sm[-2]);
- cout << "\" (matched=" << sm[-2].matched << ")" << endl << endl;
- }
- #ifndef BOOST_NO_WREGEX
- // measure time interval for boost::wregex
- iters = 10;
- tim = 1.1;
- // cache load:
- regex_search(ws2, wsm, wex);
- do{
- iters *= static_cast<unsigned>((tim > 0.001) ? (1.1/tim) : 100);
- t.restart();
- for(i = 0; i < iters; ++i)
- {
- result = regex_search(ws2, wsm, wex);
- }
- tim = t.elapsed();
- }while(tim < wait_time);
- cout << "wregex time: " << (tim * 1000000 / iters) << "us" << endl;
- if(result)
- {
- std::wstring tw;
- for(i = 0; i < wsm.size(); ++i)
- {
- tw.erase();
- std::copy(wsm[i].first, wsm[i].second, string_out_iterator<std::wstring>(tw));
- cout << "\tmatch " << i << ": \"" << tw;
- cout << "\" (matched=" << sm[i].matched << ")" << endl;
- }
- cout << "\tmatch $`: \"";
- tw.erase();
- std::copy(wsm[-1].first, wsm[-1].second, string_out_iterator<std::wstring>(tw));
- cout << tw;
- cout << "\" (matched=" << sm[-1].matched << ")" << endl;
- cout << "\tmatch $': \"";
- tw.erase();
- std::copy(wsm[-2].first, wsm[-2].second, string_out_iterator<std::wstring>(tw));
- cout << tw;
- cout << "\" (matched=" << sm[-2].matched << ")" << endl << endl;
- }
- #endif
-
- // measure time interval for basic_regex<char> using a deque
- iters = 10;
- tim = 1.1;
- // cache load:
- regex_search(ds.begin(), ds.end(), dm, ex);
- do{
- iters *= static_cast<unsigned>((tim > 0.001) ? (1.1/tim) : 100);
- t.restart();
- for(i = 0; i < iters; ++i)
- {
- result = regex_search(ds.begin(), ds.end(), dm, ex);
- }
- tim = t.elapsed();
- }while(tim < wait_time);
- cout << "regex time (search over std::deque<char>): " << (tim * 1000000 / iters) << "us" << endl;
- if(result)
- {
- for(i = 0; i < dm.size(); ++i)
- {
- ts.erase();
- std::copy(dm[i].first, dm[i].second, string_out_iterator<std::string>(ts));
- cout << "\tmatch " << i << ": \"" << ts;
- cout << "\" (matched=" << sm[i].matched << ")" << endl;
- }
- cout << "\tmatch $`: \"";
- ts.erase();
- std::copy(dm[-1].first, dm[-1].second, string_out_iterator<std::string>(ts));
- cout << ts;
- cout << "\" (matched=" << sm[-1].matched << ")" << endl;
- cout << "\tmatch $': \"";
- ts.erase();
- std::copy(dm[-2].first, dm[-2].second, string_out_iterator<std::string>(ts));
- cout << ts;
- cout << "\" (matched=" << sm[-2].matched << ")" << endl << endl;
- }
-
- // measure time interval for POSIX matcher:
- iters = 10;
- tim = 1.1;
- // cache load:
- regexecA(&r, s2.c_str(), nsubs, matches.get(), 0);
- do{
- iters *= static_cast<unsigned>((tim > 0.001) ? (1.1/tim) : 100);
- t.restart();
- for(i = 0; i < iters; ++i)
- {
- result = regexecA(&r, s2.c_str(), nsubs, matches.get(), 0);
- }
- tim = t.elapsed();
- }while(tim < wait_time);
- cout << "POSIX regexecA time: " << (tim * 1000000 / iters) << "us" << endl;
- if(result == 0)
- {
- for(i = 0; i < nsubs; ++i)
- {
- if(matches[i].rm_so >= 0)
- {
- ts.assign(s2.begin() + matches[i].rm_so, s2.begin() + matches[i].rm_eo);
- cout << "\tmatch " << i << ": \"" << ts << "\" (matched=" << (matches[i].rm_so != -1) << ")"<< endl;
- }
- else
- cout << "\tmatch " << i << ": \"\" (matched=" << (matches[i].rm_so != -1) << ")" << endl; // no match
- }
- cout << "\tmatch $`: \"";
- ts.erase();
- ts.assign(s2.begin(), s2.begin() + matches[0].rm_so);
- cout << ts;
- cout << "\" (matched=" << (matches[0].rm_so != 0) << ")" << endl;
- cout << "\tmatch $': \"";
- ts.erase();
- ts.assign(s2.begin() + matches[0].rm_eo, s2.end());
- cout << ts;
- cout << "\" (matched=" << (matches[0].rm_eo != (int)s2.size()) << ")" << endl << endl;
- }
- }
- regfreeA(&r);
- }
- return 0;
- }
- #if defined(_WIN32) && defined(BOOST_REGEX_USE_WIN32_LOCALE) && !defined(UNDER_CE)
- #pragma comment(lib, "user32.lib")
- #endif
|