boyer_moore_horspool.hpp 8.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203
  1. /*
  2. Copyright (c) Marshall Clow 2010-2012.
  3. Distributed under the Boost Software License, Version 1.0. (See accompanying
  4. file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
  5. For more information, see http://www.boost.org
  6. */
  7. #ifndef BOOST_ALGORITHM_BOYER_MOORE_HORSPOOOL_SEARCH_HPP
  8. #define BOOST_ALGORITHM_BOYER_MOORE_HORSPOOOL_SEARCH_HPP
  9. #include <iterator> // for std::iterator_traits
  10. #include <boost/config.hpp>
  11. #include <boost/assert.hpp>
  12. #include <boost/static_assert.hpp>
  13. #include <boost/range/begin.hpp>
  14. #include <boost/range/end.hpp>
  15. #include <boost/utility/enable_if.hpp>
  16. #include <boost/type_traits/is_same.hpp>
  17. #include <boost/algorithm/searching/detail/bm_traits.hpp>
  18. #include <boost/algorithm/searching/detail/debugging.hpp>
  19. // #define BOOST_ALGORITHM_BOYER_MOORE_HORSPOOL_DEBUG_HPP
  20. namespace boost { namespace algorithm {
  21. /*
  22. A templated version of the boyer-moore-horspool searching algorithm.
  23. Requirements:
  24. * Random access iterators
  25. * The two iterator types (patIter and corpusIter) must
  26. "point to" the same underlying type.
  27. * Additional requirements may be imposed buy the skip table, such as:
  28. ** Numeric type (array-based skip table)
  29. ** Hashable type (map-based skip table)
  30. http://www-igm.univ-mlv.fr/%7Elecroq/string/node18.html
  31. */
  32. template <typename patIter, typename traits = detail::BM_traits<patIter> >
  33. class boyer_moore_horspool {
  34. typedef typename std::iterator_traits<patIter>::difference_type difference_type;
  35. public:
  36. boyer_moore_horspool ( patIter first, patIter last )
  37. : pat_first ( first ), pat_last ( last ),
  38. k_pattern_length ( std::distance ( pat_first, pat_last )),
  39. skip_ ( k_pattern_length, k_pattern_length ) {
  40. // Build the skip table
  41. std::size_t i = 0;
  42. if ( first != last ) // empty pattern?
  43. for ( patIter iter = first; iter != last-1; ++iter, ++i )
  44. skip_.insert ( *iter, k_pattern_length - 1 - i );
  45. #ifdef BOOST_ALGORITHM_BOYER_MOORE_HORSPOOL_DEBUG_HPP
  46. skip_.PrintSkipTable ();
  47. #endif
  48. }
  49. ~boyer_moore_horspool () {}
  50. /// \fn operator ( corpusIter corpus_first, corpusIter corpus_last)
  51. /// \brief Searches the corpus for the pattern that was passed into the constructor
  52. ///
  53. /// \param corpus_first The start of the data to search (Random Access Iterator)
  54. /// \param corpus_last One past the end of the data to search
  55. ///
  56. template <typename corpusIter>
  57. std::pair<corpusIter, corpusIter>
  58. operator () ( corpusIter corpus_first, corpusIter corpus_last ) const {
  59. BOOST_STATIC_ASSERT (( boost::is_same<
  60. typename std::iterator_traits<patIter>::value_type,
  61. typename std::iterator_traits<corpusIter>::value_type>::value ));
  62. if ( corpus_first == corpus_last ) return std::make_pair(corpus_last, corpus_last); // if nothing to search, we didn't find it!
  63. if ( pat_first == pat_last ) return std::make_pair(corpus_first, corpus_first); // empty pattern matches at start
  64. const difference_type k_corpus_length = std::distance ( corpus_first, corpus_last );
  65. // If the pattern is larger than the corpus, we can't find it!
  66. if ( k_corpus_length < k_pattern_length )
  67. return std::make_pair(corpus_last, corpus_last);
  68. // Do the search
  69. return this->do_search ( corpus_first, corpus_last );
  70. }
  71. template <typename Range>
  72. std::pair<typename boost::range_iterator<Range>::type, typename boost::range_iterator<Range>::type>
  73. operator () ( Range &r ) const {
  74. return (*this) (boost::begin(r), boost::end(r));
  75. }
  76. private:
  77. /// \cond DOXYGEN_HIDE
  78. patIter pat_first, pat_last;
  79. const difference_type k_pattern_length;
  80. typename traits::skip_table_t skip_;
  81. /// \fn do_search ( corpusIter corpus_first, corpusIter corpus_last )
  82. /// \brief Searches the corpus for the pattern that was passed into the constructor
  83. ///
  84. /// \param corpus_first The start of the data to search (Random Access Iterator)
  85. /// \param corpus_last One past the end of the data to search
  86. /// \param k_corpus_length The length of the corpus to search
  87. ///
  88. template <typename corpusIter>
  89. std::pair<corpusIter, corpusIter>
  90. do_search ( corpusIter corpus_first, corpusIter corpus_last ) const {
  91. corpusIter curPos = corpus_first;
  92. const corpusIter lastPos = corpus_last - k_pattern_length;
  93. while ( curPos <= lastPos ) {
  94. // Do we match right where we are?
  95. std::size_t j = k_pattern_length - 1;
  96. while ( pat_first [j] == curPos [j] ) {
  97. // We matched - we're done!
  98. if ( j == 0 )
  99. return std::make_pair(curPos, curPos + k_pattern_length);
  100. j--;
  101. }
  102. curPos += skip_ [ curPos [ k_pattern_length - 1 ]];
  103. }
  104. return std::make_pair(corpus_last, corpus_last);
  105. }
  106. // \endcond
  107. };
  108. /* Two ranges as inputs gives us four possibilities; with 2,3,3,4 parameters
  109. Use a bit of TMP to disambiguate the 3-argument templates */
  110. /// \fn boyer_moore_horspool_search ( corpusIter corpus_first, corpusIter corpus_last,
  111. /// patIter pat_first, patIter pat_last )
  112. /// \brief Searches the corpus for the pattern.
  113. ///
  114. /// \param corpus_first The start of the data to search (Random Access Iterator)
  115. /// \param corpus_last One past the end of the data to search
  116. /// \param pat_first The start of the pattern to search for (Random Access Iterator)
  117. /// \param pat_last One past the end of the data to search for
  118. ///
  119. template <typename patIter, typename corpusIter>
  120. std::pair<corpusIter, corpusIter> boyer_moore_horspool_search (
  121. corpusIter corpus_first, corpusIter corpus_last,
  122. patIter pat_first, patIter pat_last )
  123. {
  124. boyer_moore_horspool<patIter> bmh ( pat_first, pat_last );
  125. return bmh ( corpus_first, corpus_last );
  126. }
  127. template <typename PatternRange, typename corpusIter>
  128. std::pair<corpusIter, corpusIter> boyer_moore_horspool_search (
  129. corpusIter corpus_first, corpusIter corpus_last, const PatternRange &pattern )
  130. {
  131. typedef typename boost::range_iterator<const PatternRange>::type pattern_iterator;
  132. boyer_moore_horspool<pattern_iterator> bmh ( boost::begin(pattern), boost::end (pattern));
  133. return bmh ( corpus_first, corpus_last );
  134. }
  135. template <typename patIter, typename CorpusRange>
  136. typename boost::disable_if_c<
  137. boost::is_same<CorpusRange, patIter>::value,
  138. std::pair<typename boost::range_iterator<CorpusRange>::type, typename boost::range_iterator<CorpusRange>::type> >
  139. ::type
  140. boyer_moore_horspool_search ( CorpusRange &corpus, patIter pat_first, patIter pat_last )
  141. {
  142. boyer_moore_horspool<patIter> bmh ( pat_first, pat_last );
  143. return bm (boost::begin (corpus), boost::end (corpus));
  144. }
  145. template <typename PatternRange, typename CorpusRange>
  146. std::pair<typename boost::range_iterator<CorpusRange>::type, typename boost::range_iterator<CorpusRange>::type>
  147. boyer_moore_horspool_search ( CorpusRange &corpus, const PatternRange &pattern )
  148. {
  149. typedef typename boost::range_iterator<const PatternRange>::type pattern_iterator;
  150. boyer_moore_horspool<pattern_iterator> bmh ( boost::begin(pattern), boost::end (pattern));
  151. return bmh (boost::begin (corpus), boost::end (corpus));
  152. }
  153. // Creator functions -- take a pattern range, return an object
  154. template <typename Range>
  155. boost::algorithm::boyer_moore_horspool<typename boost::range_iterator<const Range>::type>
  156. make_boyer_moore_horspool ( const Range &r ) {
  157. return boost::algorithm::boyer_moore_horspool
  158. <typename boost::range_iterator<const Range>::type> (boost::begin(r), boost::end(r));
  159. }
  160. template <typename Range>
  161. boost::algorithm::boyer_moore_horspool<typename boost::range_iterator<Range>::type>
  162. make_boyer_moore_horspool ( Range &r ) {
  163. return boost::algorithm::boyer_moore_horspool
  164. <typename boost::range_iterator<Range>::type> (boost::begin(r), boost::end(r));
  165. }
  166. }}
  167. #endif // BOOST_ALGORITHM_BOYER_MOORE_HORSPOOOL_SEARCH_HPP