rle_example.cpp 6.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248
  1. // Boost string_algo library example file ---------------------------------//
  2. // Copyright Pavol Droba 2002-2003. Use, modification and
  3. // distribution is subject to the Boost Software License, Version
  4. // 1.0. (See accompanying file LICENSE_1_0.txt or copy at
  5. // http://www.boost.org/LICENSE_1_0.txt)
  6. // See http://www.boost.org for updates, documentation, and revision history.
  7. /*
  8. RLE compression using replace framework. Goal is to compress a sequence of
  9. repeating characters into 3 bytes ( repeat mark, character and repetition count ).
  10. For simplification, it works only on numeric-value sequences.
  11. */
  12. #include <string>
  13. #include <iostream>
  14. #include <limits>
  15. #include <boost/detail/iterator.hpp>
  16. #include <boost/algorithm/string/find_format.hpp>
  17. #include <boost/algorithm/string/finder.hpp>
  18. using namespace std;
  19. using namespace boost;
  20. // replace mark specification, specialize for a specific element type
  21. template< typename T > T repeat_mark() { return (std::numeric_limits<T>::max)(); };
  22. // Compression -----------------------------------------------------------------------
  23. // compress finder -rle
  24. /*
  25. Find a sequence which can be compressed. It has to be at least 3-character long
  26. sequence of repetitive characters
  27. */
  28. struct find_compressF
  29. {
  30. // Construction
  31. find_compressF() {}
  32. // Operation
  33. template<typename ForwardIteratorT>
  34. iterator_range<ForwardIteratorT> operator()(
  35. ForwardIteratorT Begin,
  36. ForwardIteratorT End ) const
  37. {
  38. typedef ForwardIteratorT input_iterator_type;
  39. typedef typename boost::detail::iterator_traits<input_iterator_type>::value_type value_type;
  40. typedef iterator_range<input_iterator_type> result_type;
  41. // begin of the matching segment
  42. input_iterator_type MStart=End;
  43. // Repetition counter
  44. value_type Cnt=0;
  45. // Search for a sequence of repetitive characters
  46. for(input_iterator_type It=Begin; It!=End;)
  47. {
  48. input_iterator_type It2=It++;
  49. if ( It==End || Cnt>=(std::numeric_limits<value_type>::max)() )
  50. {
  51. return result_type( MStart, It );
  52. }
  53. if ( *It==*It2 )
  54. {
  55. if ( MStart==End )
  56. {
  57. // Mark the start
  58. MStart=It2;
  59. }
  60. // Increate repetition counter
  61. Cnt++;
  62. }
  63. else
  64. {
  65. if ( MStart!=End )
  66. {
  67. if ( Cnt>2 )
  68. return result_type( MStart, It );
  69. else
  70. {
  71. MStart=End;
  72. Cnt=0;
  73. }
  74. }
  75. }
  76. }
  77. return result_type( End, End );
  78. }
  79. };
  80. // rle compress format
  81. /*
  82. Transform a sequence into repeat mark, character and count
  83. */
  84. template<typename SeqT>
  85. struct format_compressF
  86. {
  87. private:
  88. typedef SeqT result_type;
  89. typedef typename SeqT::value_type value_type;
  90. public:
  91. // Construction
  92. format_compressF() {};
  93. // Operation
  94. template< typename ReplaceT >
  95. result_type operator()( const ReplaceT& Replace ) const
  96. {
  97. SeqT r;
  98. if(!Replace.empty())
  99. {
  100. r.push_back( repeat_mark<value_type>() );
  101. r.push_back( *(Replace.begin()) );
  102. r.push_back( value_type( Replace.size() ) );
  103. }
  104. return r;
  105. }
  106. };
  107. // Decompression -----------------------------------------------------------------------
  108. // find decompress-rle functor
  109. /*
  110. find a repetition block
  111. */
  112. struct find_decompressF
  113. {
  114. // Construction
  115. find_decompressF() {}
  116. // Operation
  117. template<typename ForwardIteratorT>
  118. iterator_range<ForwardIteratorT> operator()(
  119. ForwardIteratorT Begin,
  120. ForwardIteratorT End ) const
  121. {
  122. typedef ForwardIteratorT input_iterator_type;
  123. typedef typename boost::detail::iterator_traits<input_iterator_type>::value_type value_type;
  124. typedef iterator_range<input_iterator_type> result_type;
  125. for(input_iterator_type It=Begin; It!=End; It++)
  126. {
  127. if( *It==repeat_mark<value_type>() )
  128. {
  129. // Repeat mark found, extract body
  130. input_iterator_type It2=It++;
  131. if ( It==End ) break;
  132. It++;
  133. if ( It==End ) break;
  134. It++;
  135. return result_type( It2, It );
  136. }
  137. }
  138. return result_type( End, End );
  139. }
  140. };
  141. // rle decompress format
  142. /*
  143. transform a repetition block into a sequence of characters
  144. */
  145. template< typename SeqT >
  146. struct format_decompressF
  147. {
  148. private:
  149. typedef SeqT result_type;
  150. typedef typename SeqT::value_type value_type;
  151. public:
  152. // Construction
  153. format_decompressF() {};
  154. // Operation
  155. template< typename ReplaceT >
  156. result_type operator()( const ReplaceT& Replace ) const
  157. {
  158. SeqT r;
  159. if(!Replace.empty())
  160. {
  161. // extract info
  162. typename ReplaceT::const_iterator It=Replace.begin();
  163. value_type Value=*(++It);
  164. value_type Repeat=*(++It);
  165. for( value_type Index=0; Index<Repeat; Index++ ) r.push_back( Value );
  166. }
  167. return r;
  168. }
  169. };
  170. int main()
  171. {
  172. cout << "* RLE Compression Example *" << endl << endl;
  173. string original("123_AA_*ZZZZZZZZZZZZZZ*34");
  174. // copy compression
  175. string compress=find_format_all_copy(
  176. original,
  177. find_compressF(),
  178. format_compressF<string>() );
  179. cout << "Compressed string: " << compress << endl;
  180. // Copy decompression
  181. string decompress=find_format_all_copy(
  182. compress,
  183. find_decompressF(),
  184. format_decompressF<string>() );
  185. cout << "Decompressed string: " << decompress << endl;
  186. // in-place compression
  187. find_format_all(
  188. original,
  189. find_compressF(),
  190. format_compressF<string>() );
  191. cout << "Compressed string: " << original << endl;
  192. // in-place decompression
  193. find_format_all(
  194. original,
  195. find_decompressF(),
  196. format_decompressF<string>() );
  197. cout << "Decompressed string: " << original << endl;
  198. cout << endl;
  199. return 0;
  200. }