123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248 |
- // Boost string_algo library example file ---------------------------------//
- // Copyright Pavol Droba 2002-2003. Use, modification and
- // distribution is subject to the Boost Software License, Version
- // 1.0. (See accompanying file LICENSE_1_0.txt or copy at
- // http://www.boost.org/LICENSE_1_0.txt)
- // See http://www.boost.org for updates, documentation, and revision history.
- /*
- RLE compression using replace framework. Goal is to compress a sequence of
- repeating characters into 3 bytes ( repeat mark, character and repetition count ).
- For simplification, it works only on numeric-value sequences.
- */
- #include <string>
- #include <iostream>
- #include <limits>
- #include <boost/detail/iterator.hpp>
- #include <boost/algorithm/string/find_format.hpp>
- #include <boost/algorithm/string/finder.hpp>
- using namespace std;
- using namespace boost;
- // replace mark specification, specialize for a specific element type
- template< typename T > T repeat_mark() { return (std::numeric_limits<T>::max)(); };
- // Compression -----------------------------------------------------------------------
- // compress finder -rle
- /*
- Find a sequence which can be compressed. It has to be at least 3-character long
- sequence of repetitive characters
- */
- struct find_compressF
- {
- // Construction
- find_compressF() {}
- // Operation
- template<typename ForwardIteratorT>
- iterator_range<ForwardIteratorT> operator()(
- ForwardIteratorT Begin,
- ForwardIteratorT End ) const
- {
- typedef ForwardIteratorT input_iterator_type;
- typedef typename boost::detail::iterator_traits<input_iterator_type>::value_type value_type;
- typedef iterator_range<input_iterator_type> result_type;
- // begin of the matching segment
- input_iterator_type MStart=End;
- // Repetition counter
- value_type Cnt=0;
- // Search for a sequence of repetitive characters
- for(input_iterator_type It=Begin; It!=End;)
- {
- input_iterator_type It2=It++;
- if ( It==End || Cnt>=(std::numeric_limits<value_type>::max)() )
- {
- return result_type( MStart, It );
- }
- if ( *It==*It2 )
- {
- if ( MStart==End )
- {
- // Mark the start
- MStart=It2;
- }
- // Increate repetition counter
- Cnt++;
- }
- else
- {
- if ( MStart!=End )
- {
- if ( Cnt>2 )
- return result_type( MStart, It );
- else
- {
- MStart=End;
- Cnt=0;
- }
- }
- }
- }
- return result_type( End, End );
- }
- };
- // rle compress format
- /*
- Transform a sequence into repeat mark, character and count
- */
- template<typename SeqT>
- struct format_compressF
- {
- private:
- typedef SeqT result_type;
- typedef typename SeqT::value_type value_type;
- public:
- // Construction
- format_compressF() {};
- // Operation
- template< typename ReplaceT >
- result_type operator()( const ReplaceT& Replace ) const
- {
- SeqT r;
- if(!Replace.empty())
- {
- r.push_back( repeat_mark<value_type>() );
- r.push_back( *(Replace.begin()) );
- r.push_back( value_type( Replace.size() ) );
- }
- return r;
- }
- };
- // Decompression -----------------------------------------------------------------------
- // find decompress-rle functor
- /*
- find a repetition block
- */
- struct find_decompressF
- {
- // Construction
- find_decompressF() {}
- // Operation
- template<typename ForwardIteratorT>
- iterator_range<ForwardIteratorT> operator()(
- ForwardIteratorT Begin,
- ForwardIteratorT End ) const
- {
- typedef ForwardIteratorT input_iterator_type;
- typedef typename boost::detail::iterator_traits<input_iterator_type>::value_type value_type;
- typedef iterator_range<input_iterator_type> result_type;
- for(input_iterator_type It=Begin; It!=End; It++)
- {
- if( *It==repeat_mark<value_type>() )
- {
- // Repeat mark found, extract body
- input_iterator_type It2=It++;
-
- if ( It==End ) break;
- It++;
- if ( It==End ) break;
- It++;
-
- return result_type( It2, It );
- }
- }
- return result_type( End, End );
- }
- };
- // rle decompress format
- /*
- transform a repetition block into a sequence of characters
- */
- template< typename SeqT >
- struct format_decompressF
- {
- private:
- typedef SeqT result_type;
- typedef typename SeqT::value_type value_type;
- public:
- // Construction
- format_decompressF() {};
- // Operation
- template< typename ReplaceT >
- result_type operator()( const ReplaceT& Replace ) const
- {
- SeqT r;
- if(!Replace.empty())
- {
- // extract info
- typename ReplaceT::const_iterator It=Replace.begin();
- value_type Value=*(++It);
- value_type Repeat=*(++It);
- for( value_type Index=0; Index<Repeat; Index++ ) r.push_back( Value );
- }
- return r;
- }
- };
- int main()
- {
- cout << "* RLE Compression Example *" << endl << endl;
- string original("123_AA_*ZZZZZZZZZZZZZZ*34");
- // copy compression
- string compress=find_format_all_copy(
- original,
- find_compressF(),
- format_compressF<string>() );
- cout << "Compressed string: " << compress << endl;
- // Copy decompression
- string decompress=find_format_all_copy(
- compress,
- find_decompressF(),
- format_decompressF<string>() );
- cout << "Decompressed string: " << decompress << endl;
- // in-place compression
- find_format_all(
- original,
- find_compressF(),
- format_compressF<string>() );
-
- cout << "Compressed string: " << original << endl;
- // in-place decompression
- find_format_all(
- original,
- find_decompressF(),
- format_decompressF<string>() );
- cout << "Decompressed string: " << original << endl;
- cout << endl;
- return 0;
- }
|