search_test3.cpp 6.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153
  1. /*
  2. Copyright (c) Marshall Clow 2010-2012.
  3. Distributed under the Boost Software License, Version 1.0. (See accompanying
  4. file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
  5. For more information, see http://www.boost.org
  6. */
  7. #include <boost/algorithm/searching/boyer_moore.hpp>
  8. #include <boost/algorithm/searching/boyer_moore_horspool.hpp>
  9. #include <boost/algorithm/searching/knuth_morris_pratt.hpp>
  10. #define BOOST_TEST_MAIN
  11. #include <boost/test/unit_test.hpp>
  12. #include <ctime> // for clock_t
  13. #include <iostream>
  14. #include <fstream>
  15. #include <iomanip>
  16. #include <algorithm>
  17. #include <vector>
  18. #include <string>
  19. typedef std::vector<std::string> vec;
  20. #define NUM_TRIES 100
  21. #define runOne(call, refDiff) { \
  22. std::clock_t bTime, eTime; \
  23. bTime = std::clock (); \
  24. for ( i = 0; i < NUM_TRIES; ++i ) { \
  25. res = boost::algorithm::call \
  26. ( haystack.begin (), haystack.end (), \
  27. needle.begin (), needle.end ()); \
  28. if ( res != exp ) { \
  29. std::cout << "On run # " << i << " expected " \
  30. << exp.first - haystack.begin () << " got " \
  31. << res.first - haystack.begin () << std::endl; \
  32. throw std::runtime_error \
  33. ( "Unexpected result from " #call ); \
  34. } \
  35. } \
  36. eTime = std::clock (); \
  37. printRes ( #call, eTime - bTime, refDiff ); }
  38. #define runObject(obj, refDiff) { \
  39. std::clock_t bTime, eTime; \
  40. bTime = std::clock (); \
  41. boost::algorithm::obj <vec::const_iterator> \
  42. s_o ( needle.begin (), needle.end ()); \
  43. for ( i = 0; i < NUM_TRIES; ++i ) { \
  44. res = s_o ( haystack.begin (), haystack.end ()); \
  45. if ( res != exp ) { \
  46. std::cout << "On run # " << i << " expected " \
  47. << exp.first - haystack.begin () << " got " \
  48. << res.first - haystack.begin () << std::endl; \
  49. throw std::runtime_error \
  50. ( "Unexpected result from " #obj " object" ); \
  51. } \
  52. } \
  53. eTime = std::clock (); \
  54. printRes ( #obj " object", eTime - bTime, refDiff ); }
  55. namespace {
  56. vec ReadFromFile ( const char *name ) {
  57. std::ifstream in ( name, std::ios_base::binary | std::ios_base::in );
  58. std::string temp;
  59. vec retVal;
  60. while ( std::getline ( in, temp ))
  61. retVal.push_back ( temp );
  62. return retVal;
  63. }
  64. void printRes ( const char *prompt, unsigned long diff, unsigned long stdDiff ) {
  65. std::cout
  66. << std::setw(34) << prompt << " "
  67. << std::setw(6) << ( 1.0 * diff) / CLOCKS_PER_SEC << " seconds\t"
  68. << std::setw(5) << (100.0 * diff) / stdDiff << "% \t"
  69. << std::setw(12) << diff;
  70. if ( diff > stdDiff )
  71. std::cout << " !!";
  72. std::cout << std::endl;
  73. }
  74. void check_one ( const vec &haystack, const vec &needle, int expected ) {
  75. std::size_t i;
  76. std::clock_t sTime;
  77. unsigned long stdDiff;
  78. std::pair<vec::const_iterator, vec::const_iterator> res;
  79. std::pair<vec::const_iterator, vec::const_iterator> exp; // the expected result
  80. vec::const_iterator exp_start;
  81. if ( expected >= 0 )
  82. exp_start = haystack.begin () + expected;
  83. else if ( expected == -1 )
  84. exp_start = haystack.end (); // we didn't find it1
  85. else if ( expected == -2 )
  86. exp_start = std::search ( haystack.begin (), haystack.end (), needle.begin (), needle.end ());
  87. else
  88. throw std::logic_error ( "Expected must be -2, -1, or >= 0" );
  89. if ( expected == -1 )
  90. exp = std::make_pair(haystack.end(), haystack.end());
  91. else
  92. exp = std::make_pair(exp_start, exp_start + needle.size());
  93. std::cout << "Pattern is " << needle.size () << " entries long" << std::endl;
  94. std::cout << "Corpus is " << haystack.size () << " entries long" << std::endl;
  95. // First, the std library search
  96. sTime = std::clock ();
  97. for ( i = 0; i < NUM_TRIES; ++i ) {
  98. vec::const_iterator s_res = std::search ( haystack.begin (), haystack.end (), needle.begin (), needle.end ());
  99. if ( s_res != exp.first ) {
  100. std::cout << "On run # " << i << " expected " << exp.first - haystack.begin () << " got " << s_res - haystack.begin () << std::endl;
  101. throw std::runtime_error ( "Unexpected result from std::search" );
  102. }
  103. }
  104. stdDiff = std::clock () - sTime;
  105. printRes ( "std::search", stdDiff, stdDiff );
  106. runOne ( boyer_moore_search, stdDiff );
  107. runObject ( boyer_moore, stdDiff );
  108. runOne ( boyer_moore_horspool_search, stdDiff );
  109. runObject ( boyer_moore_horspool, stdDiff );
  110. runOne ( knuth_morris_pratt_search, stdDiff );
  111. runObject ( knuth_morris_pratt, stdDiff );
  112. }
  113. }
  114. BOOST_AUTO_TEST_CASE( test_main )
  115. {
  116. vec c1 = ReadFromFile ( "search_test_data/0001.corpus" );
  117. vec p1b = ReadFromFile ( "search_test_data/0002b.pat" );
  118. vec p1e = ReadFromFile ( "search_test_data/0002e.pat" );
  119. vec p1n = ReadFromFile ( "search_test_data/0002n.pat" );
  120. vec p1f = ReadFromFile ( "search_test_data/0002f.pat" );
  121. std::cout << std::ios::fixed << std::setprecision(4);
  122. // std::cout << "Corpus is " << c1.size () << " entries long\n";
  123. std::cout << "--- Beginning ---" << std::endl;
  124. check_one ( c1, p1b, 0 ); // Find it at position zero
  125. std::cout << "---- Middle -----" << std::endl;
  126. check_one ( c1, p1f, -2 ); // Don't know answer
  127. std::cout << "------ End ------" << std::endl;
  128. check_one ( c1, p1e, c1.size() - p1e.size ());
  129. std::cout << "--- Not found ---" << std::endl;
  130. check_one ( c1, p1n, -1 ); // Not found
  131. }