regex_timer.cpp 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382
  1. /*
  2. *
  3. * Copyright (c) 1998-2002
  4. * John Maddock
  5. *
  6. * Use, modification and distribution are subject to the
  7. * Boost Software License, Version 1.0. (See accompanying file
  8. * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
  9. *
  10. */
  11. #ifdef _MSC_VER
  12. #pragma warning(disable: 4996 4127)
  13. #endif
  14. #include <boost/config.hpp>
  15. #include <boost/regex.hpp>
  16. #include <boost/cregex.hpp>
  17. #include <boost/timer.hpp>
  18. #include <boost/smart_ptr.hpp>
  19. #include <string>
  20. #include <algorithm>
  21. #include <deque>
  22. #include <iterator>
  23. #ifdef BOOST_RE_OLD_IOSTREAM
  24. #include <iostream.h>
  25. #include <fstream.h>
  26. #else
  27. #include <iostream>
  28. #include <fstream>
  29. using std::cout;
  30. using std::cin;
  31. using std::cerr;
  32. using std::istream;
  33. using std::ostream;
  34. using std::endl;
  35. using std::ifstream;
  36. using std::streambuf;
  37. using std::getline;
  38. #endif
  39. #if defined(_WIN32) && defined(BOOST_REGEX_USE_WIN32_LOCALE)
  40. #include <windows.h>
  41. #endif
  42. #if (defined(_MSC_VER) && (_MSC_VER <= 1300)) || defined(__sgi)
  43. // maybe no Koenig lookup, use using declaration instead:
  44. using namespace boost;
  45. #endif
  46. #ifndef BOOST_NO_WREGEX
  47. ostream& operator << (ostream& os, const std::wstring& s)
  48. {
  49. std::wstring::const_iterator i, j;
  50. i = s.begin();
  51. j = s.end();
  52. while(i != j)
  53. {
  54. os.put(static_cast<char>(*i));
  55. ++i;
  56. }
  57. return os;
  58. }
  59. #endif
  60. template <class S>
  61. class string_out_iterator
  62. {
  63. public:
  64. typedef std::output_iterator_tag iterator_category;
  65. typedef void value_type;
  66. typedef void difference_type;
  67. typedef void pointer;
  68. typedef void reference;
  69. private:
  70. S* out;
  71. public:
  72. string_out_iterator(S& s) : out(&s) {}
  73. string_out_iterator& operator++() { return *this; }
  74. string_out_iterator& operator++(int) { return *this; }
  75. string_out_iterator& operator*() { return *this; }
  76. string_out_iterator& operator=(typename S::value_type v)
  77. {
  78. out->append(1, v);
  79. return *this;
  80. }
  81. };
  82. namespace boost{
  83. #if defined(BOOST_MSVC) || (defined(__BORLANDC__) && (__BORLANDC__ == 0x550)) || defined(__SGI_STL_PORT)
  84. //
  85. // problem with std::getline under MSVC6sp3
  86. // and C++ Builder 5.5, is this really that hard?
  87. istream& getline(istream& is, std::string& s)
  88. {
  89. s.erase();
  90. char c = static_cast<char>(is.get());
  91. while(c != '\n')
  92. {
  93. BOOST_ASSERT(is.good());
  94. s.append(1, c);
  95. c = static_cast<char>(is.get());
  96. }
  97. return is;
  98. }
  99. #else
  100. istream& getline(istream& is, std::string& s)
  101. {
  102. std::getline(is, s);
  103. if(s.size() && (s[s.size() -1] == '\r'))
  104. s.erase(s.size() - 1);
  105. return is;
  106. }
  107. #endif
  108. }
  109. int main(int argc, char**argv)
  110. {
  111. ifstream ifs;
  112. std::istream* p_in = &std::cin;
  113. if(argc == 2)
  114. {
  115. ifs.open(argv[1]);
  116. ifs.peek();
  117. if(!ifs.good())
  118. {
  119. cout << "Bad filename: " << argv[1] << endl;
  120. return -1;
  121. }
  122. p_in = &ifs;
  123. }
  124. boost::regex ex;
  125. boost::match_results<std::string::const_iterator> sm;
  126. #ifndef BOOST_NO_WREGEX
  127. std::wstring ws1, ws2;
  128. boost::wregex wex;
  129. boost::match_results<std::wstring::const_iterator> wsm;
  130. #endif
  131. boost::match_results<std::deque<char>::iterator> dm;
  132. std::string s1, s2, ts;
  133. std::deque<char> ds;
  134. boost::regex_tA r;
  135. boost::scoped_array<boost::regmatch_t> matches;
  136. std::size_t nsubs;
  137. boost::timer t;
  138. double tim;
  139. int result = 0;
  140. unsigned iters = 100;
  141. double wait_time = (std::min)(t.elapsed_min() * 1000, 0.5);
  142. while(true)
  143. {
  144. cout << "Enter expression (or \"quit\" to exit): ";
  145. boost::getline(*p_in, s1);
  146. if(argc == 2)
  147. cout << endl << s1 << endl;
  148. if(s1 == "quit")
  149. break;
  150. #ifndef BOOST_NO_WREGEX
  151. ws1.erase();
  152. std::copy(s1.begin(), s1.end(), string_out_iterator<std::wstring>(ws1));
  153. #endif
  154. try{
  155. ex.assign(s1);
  156. #ifndef BOOST_NO_WREGEX
  157. wex.assign(ws1);
  158. #endif
  159. }
  160. catch(std::exception& e)
  161. {
  162. cout << "Error in expression: \"" << e.what() << "\"" << endl;
  163. continue;
  164. }
  165. int code = regcompA(&r, s1.c_str(), boost::REG_PERL);
  166. if(code != 0)
  167. {
  168. char buf[256];
  169. regerrorA(code, &r, buf, 256);
  170. cout << "regcompA error: \"" << buf << "\"" << endl;
  171. continue;
  172. }
  173. nsubs = r.re_nsub + 1;
  174. matches.reset(new boost::regmatch_t[nsubs]);
  175. while(true)
  176. {
  177. cout << "Enter string to search (or \"quit\" to exit): ";
  178. boost::getline(*p_in, s2);
  179. if(argc == 2)
  180. cout << endl << s2 << endl;
  181. if(s2 == "quit")
  182. break;
  183. #ifndef BOOST_NO_WREGEX
  184. ws2.erase();
  185. std::copy(s2.begin(), s2.end(), string_out_iterator<std::wstring>(ws2));
  186. #endif
  187. ds.erase(ds.begin(), ds.end());
  188. std::copy(s2.begin(), s2.end(), std::back_inserter(ds));
  189. unsigned i;
  190. iters = 10;
  191. tim = 1.1;
  192. #if defined(_WIN32) && defined(BOOST_REGEX_USE_WIN32_LOCALE)
  193. MSG msg;
  194. PeekMessage(&msg, 0, 0, 0, 0);
  195. Sleep(0);
  196. #endif
  197. // cache load:
  198. regex_search(s2, sm, ex);
  199. // measure time interval for basic_regex<char>
  200. do{
  201. iters *= static_cast<unsigned>((tim > 0.001) ? (1.1/tim) : 100);
  202. t.restart();
  203. for(i =0; i < iters; ++i)
  204. {
  205. result = regex_search(s2, sm, ex);
  206. }
  207. tim = t.elapsed();
  208. }while(tim < wait_time);
  209. cout << "regex time: " << (tim * 1000000 / iters) << "us" << endl;
  210. if(result)
  211. {
  212. for(i = 0; i < sm.size(); ++i)
  213. {
  214. ts = sm[i];
  215. cout << "\tmatch " << i << ": \"";
  216. cout << ts;
  217. cout << "\" (matched=" << sm[i].matched << ")" << endl;
  218. }
  219. cout << "\tmatch $`: \"";
  220. cout << std::string(sm[-1]);
  221. cout << "\" (matched=" << sm[-1].matched << ")" << endl;
  222. cout << "\tmatch $': \"";
  223. cout << std::string(sm[-2]);
  224. cout << "\" (matched=" << sm[-2].matched << ")" << endl << endl;
  225. }
  226. #ifndef BOOST_NO_WREGEX
  227. // measure time interval for boost::wregex
  228. iters = 10;
  229. tim = 1.1;
  230. // cache load:
  231. regex_search(ws2, wsm, wex);
  232. do{
  233. iters *= static_cast<unsigned>((tim > 0.001) ? (1.1/tim) : 100);
  234. t.restart();
  235. for(i = 0; i < iters; ++i)
  236. {
  237. result = regex_search(ws2, wsm, wex);
  238. }
  239. tim = t.elapsed();
  240. }while(tim < wait_time);
  241. cout << "wregex time: " << (tim * 1000000 / iters) << "us" << endl;
  242. if(result)
  243. {
  244. std::wstring tw;
  245. for(i = 0; i < wsm.size(); ++i)
  246. {
  247. tw.erase();
  248. std::copy(wsm[i].first, wsm[i].second, string_out_iterator<std::wstring>(tw));
  249. cout << "\tmatch " << i << ": \"" << tw;
  250. cout << "\" (matched=" << sm[i].matched << ")" << endl;
  251. }
  252. cout << "\tmatch $`: \"";
  253. tw.erase();
  254. std::copy(wsm[-1].first, wsm[-1].second, string_out_iterator<std::wstring>(tw));
  255. cout << tw;
  256. cout << "\" (matched=" << sm[-1].matched << ")" << endl;
  257. cout << "\tmatch $': \"";
  258. tw.erase();
  259. std::copy(wsm[-2].first, wsm[-2].second, string_out_iterator<std::wstring>(tw));
  260. cout << tw;
  261. cout << "\" (matched=" << sm[-2].matched << ")" << endl << endl;
  262. }
  263. #endif
  264. // measure time interval for basic_regex<char> using a deque
  265. iters = 10;
  266. tim = 1.1;
  267. // cache load:
  268. regex_search(ds.begin(), ds.end(), dm, ex);
  269. do{
  270. iters *= static_cast<unsigned>((tim > 0.001) ? (1.1/tim) : 100);
  271. t.restart();
  272. for(i = 0; i < iters; ++i)
  273. {
  274. result = regex_search(ds.begin(), ds.end(), dm, ex);
  275. }
  276. tim = t.elapsed();
  277. }while(tim < wait_time);
  278. cout << "regex time (search over std::deque<char>): " << (tim * 1000000 / iters) << "us" << endl;
  279. if(result)
  280. {
  281. for(i = 0; i < dm.size(); ++i)
  282. {
  283. ts.erase();
  284. std::copy(dm[i].first, dm[i].second, string_out_iterator<std::string>(ts));
  285. cout << "\tmatch " << i << ": \"" << ts;
  286. cout << "\" (matched=" << sm[i].matched << ")" << endl;
  287. }
  288. cout << "\tmatch $`: \"";
  289. ts.erase();
  290. std::copy(dm[-1].first, dm[-1].second, string_out_iterator<std::string>(ts));
  291. cout << ts;
  292. cout << "\" (matched=" << sm[-1].matched << ")" << endl;
  293. cout << "\tmatch $': \"";
  294. ts.erase();
  295. std::copy(dm[-2].first, dm[-2].second, string_out_iterator<std::string>(ts));
  296. cout << ts;
  297. cout << "\" (matched=" << sm[-2].matched << ")" << endl << endl;
  298. }
  299. // measure time interval for POSIX matcher:
  300. iters = 10;
  301. tim = 1.1;
  302. // cache load:
  303. regexecA(&r, s2.c_str(), nsubs, matches.get(), 0);
  304. do{
  305. iters *= static_cast<unsigned>((tim > 0.001) ? (1.1/tim) : 100);
  306. t.restart();
  307. for(i = 0; i < iters; ++i)
  308. {
  309. result = regexecA(&r, s2.c_str(), nsubs, matches.get(), 0);
  310. }
  311. tim = t.elapsed();
  312. }while(tim < wait_time);
  313. cout << "POSIX regexecA time: " << (tim * 1000000 / iters) << "us" << endl;
  314. if(result == 0)
  315. {
  316. for(i = 0; i < nsubs; ++i)
  317. {
  318. if(matches[i].rm_so >= 0)
  319. {
  320. ts.assign(s2.begin() + matches[i].rm_so, s2.begin() + matches[i].rm_eo);
  321. cout << "\tmatch " << i << ": \"" << ts << "\" (matched=" << (matches[i].rm_so != -1) << ")"<< endl;
  322. }
  323. else
  324. cout << "\tmatch " << i << ": \"\" (matched=" << (matches[i].rm_so != -1) << ")" << endl; // no match
  325. }
  326. cout << "\tmatch $`: \"";
  327. ts.erase();
  328. ts.assign(s2.begin(), s2.begin() + matches[0].rm_so);
  329. cout << ts;
  330. cout << "\" (matched=" << (matches[0].rm_so != 0) << ")" << endl;
  331. cout << "\tmatch $': \"";
  332. ts.erase();
  333. ts.assign(s2.begin() + matches[0].rm_eo, s2.end());
  334. cout << ts;
  335. cout << "\" (matched=" << (matches[0].rm_eo != (int)s2.size()) << ")" << endl << endl;
  336. }
  337. }
  338. regfreeA(&r);
  339. }
  340. return 0;
  341. }
  342. #if defined(_WIN32) && defined(BOOST_REGEX_USE_WIN32_LOCALE) && !defined(UNDER_CE)
  343. #pragma comment(lib, "user32.lib")
  344. #endif