mi_hashed_indices.cpp 2.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100
  1. // Boost.Bimap
  2. //
  3. // Copyright (c) 2006-2007 Matias Capeletto
  4. //
  5. // Distributed under the Boost Software License, Version 1.0.
  6. // (See accompanying file LICENSE_1_0.txt or copy at
  7. // http://www.boost.org/LICENSE_1_0.txt)
  8. /*****************************************************************************
  9. Boost.MultiIndex
  10. *****************************************************************************/
  11. #include <boost/config.hpp>
  12. //[ code_mi_to_b_path_mi_hashed_indices
  13. #include <iostream>
  14. #include <iomanip>
  15. #include <boost/tokenizer.hpp>
  16. #include <boost/multi_index_container.hpp>
  17. #include <boost/multi_index/key_extractors.hpp>
  18. #include <boost/multi_index/ordered_index.hpp>
  19. #include <boost/multi_index/hashed_index.hpp>
  20. #include <boost/lambda/lambda.hpp>
  21. using namespace boost::multi_index;
  22. namespace bl = boost::lambda;
  23. // word_counter keeps the ocurrences of words inserted. A hashed
  24. // index allows for fast checking of preexisting entries.
  25. struct word_counter_entry
  26. {
  27. std::string word;
  28. unsigned int occurrences;
  29. word_counter_entry( std::string word_ ) : word(word_), occurrences(0) {}
  30. };
  31. typedef multi_index_container
  32. <
  33. word_counter_entry,
  34. indexed_by
  35. <
  36. ordered_non_unique
  37. <
  38. BOOST_MULTI_INDEX_MEMBER(
  39. word_counter_entry,unsigned int,occurrences),
  40. std::greater<unsigned int>
  41. >,
  42. hashed_unique
  43. <
  44. BOOST_MULTI_INDEX_MEMBER(word_counter_entry,std::string,word)
  45. >
  46. >
  47. > word_counter;
  48. typedef boost::tokenizer<boost::char_separator<char> > text_tokenizer;
  49. int main()
  50. {
  51. std::string text=
  52. "En un lugar de la Mancha, de cuyo nombre no quiero acordarme... "
  53. "...snip..."
  54. "...no se salga un punto de la verdad.";
  55. // feed the text into the container
  56. word_counter wc;
  57. text_tokenizer tok(text,boost::char_separator<char>(" \t\n.,;:!?'\"-"));
  58. unsigned int total_occurrences = 0;
  59. for( text_tokenizer::iterator it = tok.begin(), it_end = tok.end();
  60. it != it_end ; ++it )
  61. {
  62. ++total_occurrences;
  63. word_counter::iterator wit = wc.insert(*it).first;
  64. wc.modify_key( wit, ++ bl::_1 );
  65. }
  66. // list words by frequency of appearance
  67. std::cout << std::fixed << std::setprecision(2);
  68. for( word_counter::iterator wit = wc.begin(), wit_end=wc.end();
  69. wit != wit_end; ++wit )
  70. {
  71. std::cout << std::setw(11) << wit->word << ": "
  72. << std::setw(5)
  73. << 100.0 * wit->occurrences / total_occurrences << "%"
  74. << std::endl;
  75. }
  76. return 0;
  77. }
  78. //]