weighted_density.hpp 9.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237
  1. ///////////////////////////////////////////////////////////////////////////////
  2. // weighted_density.hpp
  3. //
  4. // Copyright 2006 Daniel Egloff, Olivier Gygi. Distributed under the Boost
  5. // Software License, Version 1.0. (See accompanying file
  6. // LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
  7. #ifndef BOOST_ACCUMULATORS_STATISTICS_WEIGHTED_DENSITY_HPP_DE_01_01_2006
  8. #define BOOST_ACCUMULATORS_STATISTICS_WEIGHTED_DENSITY_HPP_DE_01_01_2006
  9. #include <vector>
  10. #include <limits>
  11. #include <functional>
  12. #include <boost/range.hpp>
  13. #include <boost/parameter/keyword.hpp>
  14. #include <boost/mpl/placeholders.hpp>
  15. #include <boost/accumulators/framework/accumulator_base.hpp>
  16. #include <boost/accumulators/framework/extractor.hpp>
  17. #include <boost/accumulators/numeric/functional.hpp>
  18. #include <boost/accumulators/framework/parameters/sample.hpp>
  19. #include <boost/accumulators/statistics_fwd.hpp>
  20. #include <boost/accumulators/statistics/sum.hpp>
  21. #include <boost/accumulators/statistics/max.hpp>
  22. #include <boost/accumulators/statistics/min.hpp>
  23. #include <boost/accumulators/statistics/density.hpp> // for named parameters density_cache_size and density_num_bins
  24. #include <boost/serialization/vector.hpp>
  25. #include <boost/serialization/utility.hpp>
  26. namespace boost { namespace accumulators
  27. {
  28. namespace impl
  29. {
  30. ///////////////////////////////////////////////////////////////////////////////
  31. // weighted_density_impl
  32. // density histogram for weighted samples
  33. /**
  34. @brief Histogram density estimator for weighted samples
  35. The histogram density estimator returns a histogram of the sample distribution. The positions and sizes of the bins
  36. are determined using a specifiable number of cached samples (cache_size). The range between the minimum and the
  37. maximum of the cached samples is subdivided into a specifiable number of bins (num_bins) of same size. Additionally,
  38. an under- and an overflow bin is added to capture future under- and overflow samples. Once the bins are determined,
  39. the cached samples and all subsequent samples are added to the correct bins. At the end, a range of std::pair is
  40. returned, where each pair contains the position of the bin (lower bound) and the sum of the weights (normalized with the
  41. sum of all weights).
  42. @param density_cache_size Number of first samples used to determine min and max.
  43. @param density_num_bins Number of bins (two additional bins collect under- and overflow samples).
  44. */
  45. template<typename Sample, typename Weight>
  46. struct weighted_density_impl
  47. : accumulator_base
  48. {
  49. typedef typename numeric::functional::fdiv<Weight, std::size_t>::result_type float_type;
  50. typedef std::vector<std::pair<float_type, float_type> > histogram_type;
  51. typedef std::vector<float_type> array_type;
  52. // for boost::result_of
  53. typedef iterator_range<typename histogram_type::iterator> result_type;
  54. template<typename Args>
  55. weighted_density_impl(Args const &args)
  56. : cache_size(args[density_cache_size])
  57. , cache(cache_size)
  58. , num_bins(args[density_num_bins])
  59. , samples_in_bin(num_bins + 2, 0.)
  60. , bin_positions(num_bins + 2)
  61. , histogram(
  62. num_bins + 2
  63. , std::make_pair(
  64. numeric::fdiv(args[sample | Sample()],(std::size_t)1)
  65. , numeric::fdiv(args[sample | Sample()],(std::size_t)1)
  66. )
  67. )
  68. , is_dirty(true)
  69. {
  70. }
  71. template<typename Args>
  72. void operator ()(Args const &args)
  73. {
  74. this->is_dirty = true;
  75. std::size_t cnt = count(args);
  76. // Fill up cache with cache_size first samples
  77. if (cnt <= this->cache_size)
  78. {
  79. this->cache[cnt - 1] = std::make_pair(args[sample], args[weight]);
  80. }
  81. // Once cache_size samples have been accumulated, create num_bins bins of same size between
  82. // the minimum and maximum of the cached samples as well as an under- and an overflow bin.
  83. // Store their lower bounds (bin_positions) and fill the bins with the cached samples (samples_in_bin).
  84. if (cnt == this->cache_size)
  85. {
  86. float_type minimum = numeric::fdiv((min)(args),(std::size_t)1);
  87. float_type maximum = numeric::fdiv((max)(args),(std::size_t)1);
  88. float_type bin_size = numeric::fdiv(maximum - minimum, this->num_bins);
  89. // determine bin positions (their lower bounds)
  90. for (std::size_t i = 0; i < this->num_bins + 2; ++i)
  91. {
  92. this->bin_positions[i] = minimum + (i - 1.) * bin_size;
  93. }
  94. for (typename histogram_type::const_iterator iter = this->cache.begin(); iter != this->cache.end(); ++iter)
  95. {
  96. if (iter->first < this->bin_positions[1])
  97. {
  98. this->samples_in_bin[0] += iter->second;
  99. }
  100. else if (iter->first >= this->bin_positions[this->num_bins + 1])
  101. {
  102. this->samples_in_bin[this->num_bins + 1] += iter->second;
  103. }
  104. else
  105. {
  106. typename array_type::iterator it = std::upper_bound(
  107. this->bin_positions.begin()
  108. , this->bin_positions.end()
  109. , iter->first
  110. );
  111. std::size_t d = std::distance(this->bin_positions.begin(), it);
  112. this->samples_in_bin[d - 1] += iter->second;
  113. }
  114. }
  115. }
  116. // Add each subsequent sample to the correct bin
  117. else if (cnt > this->cache_size)
  118. {
  119. if (args[sample] < this->bin_positions[1])
  120. {
  121. this->samples_in_bin[0] += args[weight];
  122. }
  123. else if (args[sample] >= this->bin_positions[this->num_bins + 1])
  124. {
  125. this->samples_in_bin[this->num_bins + 1] += args[weight];
  126. }
  127. else
  128. {
  129. typename array_type::iterator it = std::upper_bound(
  130. this->bin_positions.begin()
  131. , this->bin_positions.end()
  132. , args[sample]
  133. );
  134. std::size_t d = std::distance(this->bin_positions.begin(), it);
  135. this->samples_in_bin[d - 1] += args[weight];
  136. }
  137. }
  138. }
  139. template<typename Args>
  140. result_type result(Args const &args) const
  141. {
  142. if (this->is_dirty)
  143. {
  144. this->is_dirty = false;
  145. // creates a vector of std::pair where each pair i holds
  146. // the values bin_positions[i] (x-axis of histogram) and
  147. // samples_in_bin[i] / cnt (y-axis of histogram).
  148. for (std::size_t i = 0; i < this->num_bins + 2; ++i)
  149. {
  150. this->histogram[i] = std::make_pair(this->bin_positions[i], numeric::fdiv(this->samples_in_bin[i], sum_of_weights(args)));
  151. }
  152. }
  153. // returns a range of pairs
  154. return make_iterator_range(this->histogram);
  155. }
  156. // make this accumulator serializeable
  157. // TODO split to save/load and check on parameters provided in ctor
  158. template<class Archive>
  159. void serialize(Archive & ar, const unsigned int file_version)
  160. {
  161. ar & cache_size;
  162. ar & cache;
  163. ar & num_bins;
  164. ar & samples_in_bin;
  165. ar & bin_positions;
  166. ar & histogram;
  167. ar & is_dirty;
  168. }
  169. private:
  170. std::size_t cache_size; // number of cached samples
  171. histogram_type cache; // cache to store the first cache_size samples with their weights as std::pair
  172. std::size_t num_bins; // number of bins
  173. array_type samples_in_bin; // number of samples in each bin
  174. array_type bin_positions; // lower bounds of bins
  175. mutable histogram_type histogram; // histogram
  176. mutable bool is_dirty;
  177. };
  178. } // namespace impl
  179. ///////////////////////////////////////////////////////////////////////////////
  180. // tag::weighted_density
  181. //
  182. namespace tag
  183. {
  184. struct weighted_density
  185. : depends_on<count, sum_of_weights, min, max>
  186. , density_cache_size
  187. , density_num_bins
  188. {
  189. /// INTERNAL ONLY
  190. ///
  191. typedef accumulators::impl::weighted_density_impl<mpl::_1, mpl::_2> impl;
  192. #ifdef BOOST_ACCUMULATORS_DOXYGEN_INVOKED
  193. static boost::parameter::keyword<density_cache_size> const cache_size;
  194. static boost::parameter::keyword<density_num_bins> const num_bins;
  195. #endif
  196. };
  197. }
  198. ///////////////////////////////////////////////////////////////////////////////
  199. // extract::weighted_density
  200. //
  201. namespace extract
  202. {
  203. extractor<tag::density> const weighted_density = {};
  204. BOOST_ACCUMULATORS_IGNORE_GLOBAL(weighted_density)
  205. }
  206. using extract::weighted_density;
  207. }} // namespace boost::accumulators
  208. #endif