search_test2.cpp 6.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153
  1. /*
  2. Copyright (c) Marshall Clow 2010-2012.
  3. Distributed under the Boost Software License, Version 1.0. (See accompanying
  4. file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
  5. For more information, see http://www.boost.org
  6. */
  7. #include <boost/algorithm/searching/boyer_moore.hpp>
  8. #include <boost/algorithm/searching/boyer_moore_horspool.hpp>
  9. #include <boost/algorithm/searching/knuth_morris_pratt.hpp>
  10. #define BOOST_TEST_MAIN
  11. #include <boost/test/unit_test.hpp>
  12. #include <ctime> // for clock_t
  13. #include <iostream>
  14. #include <fstream>
  15. #include <iomanip>
  16. #include <algorithm>
  17. #include <vector>
  18. typedef std::vector<char> vec;
  19. #define NUM_TRIES 100
  20. #define runOne(call, refDiff) { \
  21. std::clock_t bTime, eTime; \
  22. bTime = std::clock (); \
  23. for ( i = 0; i < NUM_TRIES; ++i ) { \
  24. res = boost::algorithm::call \
  25. ( haystack.begin (), haystack.end (), \
  26. needle.begin (), needle.end ()); \
  27. if ( res != exp ) { \
  28. std::cout << "On run # " << i << " expected " \
  29. << exp.first - haystack.begin () << " got " \
  30. << res.first - haystack.begin () << std::endl; \
  31. throw std::runtime_error \
  32. ( "Unexpected result from " #call ); \
  33. } \
  34. } \
  35. eTime = std::clock (); \
  36. printRes ( #call, eTime - bTime, refDiff ); }
  37. #define runObject(obj, refDiff) { \
  38. std::clock_t bTime, eTime; \
  39. bTime = std::clock (); \
  40. boost::algorithm::obj <vec::const_iterator> \
  41. s_o ( needle.begin (), needle.end ()); \
  42. for ( i = 0; i < NUM_TRIES; ++i ) { \
  43. res = s_o ( haystack.begin (), haystack.end ()); \
  44. if ( res != exp ) { \
  45. std::cout << "On run # " << i << " expected " \
  46. << exp.first - haystack.begin () << " got " \
  47. << res.first - haystack.begin () << std::endl; \
  48. throw std::runtime_error \
  49. ( "Unexpected result from " #obj " object" ); \
  50. } \
  51. } \
  52. eTime = std::clock (); \
  53. printRes ( #obj " object", eTime - bTime, refDiff ); }
  54. namespace {
  55. vec ReadFromFile ( const char *name ) {
  56. std::ifstream in ( name, std::ios_base::binary | std::ios_base::in );
  57. vec retVal;
  58. std::istream_iterator<char, char> begin(in);
  59. std::istream_iterator<char, char> end;
  60. std::copy ( begin, end, std::back_inserter ( retVal ));
  61. return retVal;
  62. }
  63. void printRes ( const char *prompt, unsigned long diff, unsigned long stdDiff ) {
  64. std::cout
  65. << std::setw(34) << prompt << " "
  66. << std::setw(6) << ( 1.0 * diff) / CLOCKS_PER_SEC << " seconds\t"
  67. << std::setw(5) << (100.0 * diff) / stdDiff << "% \t"
  68. << std::setw(12) << diff;
  69. if ( diff > stdDiff )
  70. std::cout << " !!";
  71. std::cout << std::endl;
  72. }
  73. void check_one ( const vec &haystack, const vec &needle, int expected ) {
  74. std::size_t i;
  75. std::clock_t sTime;
  76. unsigned long stdDiff;
  77. std::pair<vec::const_iterator, vec::const_iterator> res;
  78. std::pair<vec::const_iterator, vec::const_iterator> exp; // the expected result
  79. vec::const_iterator exp_start;
  80. if ( expected >= 0 )
  81. exp_start = haystack.begin () + expected;
  82. else if ( expected == -1 )
  83. exp_start = haystack.end (); // we didn't find it!
  84. else if ( expected == -2 )
  85. exp_start = std::search ( haystack.begin (), haystack.end (), needle.begin (), needle.end ());
  86. else
  87. throw std::logic_error ( "Expected must be -2, -1, or >= 0" );
  88. if ( expected == -1 )
  89. exp = std::make_pair(haystack.end(), haystack.end());
  90. else
  91. exp = std::make_pair(exp_start, exp_start + needle.size());
  92. std::cout << "Pattern is " << needle.size () << " entries long" << std::endl;
  93. std::cout << "Corpus is " << haystack.size () << " entries long" << std::endl;
  94. // First, the std library search
  95. sTime = std::clock ();
  96. for ( i = 0; i < NUM_TRIES; ++i ) {
  97. vec::const_iterator s_res = std::search ( haystack.begin (), haystack.end (), needle.begin (), needle.end ());
  98. if ( s_res != exp.first ) {
  99. std::cout << "On run # " << i << " expected " << exp.first - haystack.begin () << " got " << s_res - haystack.begin () << std::endl;
  100. throw std::runtime_error ( "Unexpected result from std::search" );
  101. }
  102. }
  103. stdDiff = std::clock () - sTime;
  104. printRes ( "std::search", stdDiff, stdDiff );
  105. runOne ( boyer_moore_search, stdDiff );
  106. runObject ( boyer_moore, stdDiff );
  107. runOne ( boyer_moore_horspool_search, stdDiff );
  108. runObject ( boyer_moore_horspool, stdDiff );
  109. runOne ( knuth_morris_pratt_search, stdDiff );
  110. runObject ( knuth_morris_pratt, stdDiff );
  111. }
  112. }
  113. BOOST_AUTO_TEST_CASE( test_main )
  114. {
  115. vec c1 = ReadFromFile ( "search_test_data/0001.corpus" );
  116. vec p1b = ReadFromFile ( "search_test_data/0001b.pat" );
  117. vec p1e = ReadFromFile ( "search_test_data/0001e.pat" );
  118. vec p1n = ReadFromFile ( "search_test_data/0001n.pat" );
  119. vec p1f = ReadFromFile ( "search_test_data/0001f.pat" );
  120. std::cout << std::ios::fixed << std::setprecision(4);
  121. // std::cout << "Corpus is " << c1.size () << " entries long\n";
  122. std::cout << "--- Beginning ---" << std::endl;
  123. check_one ( c1, p1b, 0 ); // Find it at position zero
  124. std::cout << "---- Middle -----" << std::endl;
  125. check_one ( c1, p1f, -2 ); // Don't know answer
  126. std::cout << "------ End ------" << std::endl;
  127. check_one ( c1, p1e, c1.size() - p1e.size ());
  128. std::cout << "--- Not found ---" << std::endl;
  129. check_one ( c1, p1n, -1 ); // Not found
  130. }