hashed_indices.cpp 2.8 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394
  1. // Boost.Bimap
  2. //
  3. // Copyright (c) 2006-2007 Matias Capeletto
  4. //
  5. // Distributed under the Boost Software License, Version 1.0.
  6. // (See accompanying file LICENSE_1_0.txt or copy at
  7. // http://www.boost.org/LICENSE_1_0.txt)
  8. // Boost.Bimap Example
  9. //-----------------------------------------------------------------------------
  10. // Hashed indices can be used as an alternative to ordered indices when fast
  11. // lookup is needed and sorting information is of no interest. The example
  12. // features a word counter where duplicate entries are checked by means of a
  13. // hashed index.
  14. #include <boost/config.hpp>
  15. //[ code_mi_to_b_path_hashed_indices
  16. #include <iostream>
  17. #include <iomanip>
  18. #include <boost/tokenizer.hpp>
  19. #include <boost/bimap/bimap.hpp>
  20. #include <boost/bimap/unordered_set_of.hpp>
  21. #include <boost/bimap/multiset_of.hpp>
  22. #include <boost/bimap/support/lambda.hpp>
  23. using namespace boost::bimaps;
  24. struct word {};
  25. struct occurrences {};
  26. typedef bimap
  27. <
  28. multiset_of< tagged<unsigned int,occurrences>, std::greater<unsigned int> >,
  29. unordered_set_of< tagged< std::string, word> >
  30. > word_counter;
  31. typedef boost::tokenizer<boost::char_separator<char> > text_tokenizer;
  32. int main()
  33. {
  34. std::string text=
  35. "Relations between data in the STL are represented with maps."
  36. "A map is a directed relation, by using it you are representing "
  37. "a mapping. In this directed relation, the first type is related to "
  38. "the second type but it is not true that the inverse relationship "
  39. "holds. This is useful in a lot of situations, but there are some "
  40. "relationships that are bidirectional by nature.";
  41. // feed the text into the container
  42. word_counter wc;
  43. text_tokenizer tok(text,boost::char_separator<char>(" \t\n.,;:!?'\"-"));
  44. unsigned int total_occurrences = 0;
  45. for( text_tokenizer::const_iterator it = tok.begin(), it_end = tok.end();
  46. it != it_end ; ++it )
  47. {
  48. ++total_occurrences;
  49. word_counter::map_by<occurrences>::iterator wit =
  50. wc.by<occurrences>().insert(
  51. word_counter::map_by<occurrences>::value_type(0,*it)
  52. ).first;
  53. wc.by<occurrences>().modify_key( wit, ++_key);
  54. }
  55. // list words by frequency of appearance
  56. std::cout << std::fixed << std::setprecision(2);
  57. for( word_counter::map_by<occurrences>::const_iterator
  58. wit = wc.by<occurrences>().begin(),
  59. wit_end = wc.by<occurrences>().end();
  60. wit != wit_end; ++wit )
  61. {
  62. std::cout << std::setw(15) << wit->get<word>() << ": "
  63. << std::setw(5)
  64. << 100.0 * wit->get<occurrences>() / total_occurrences << "%"
  65. << std::endl;
  66. }
  67. return 0;
  68. }
  69. //]