weighted_median.hpp 8.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255
  1. ///////////////////////////////////////////////////////////////////////////////
  2. // weighted_median.hpp
  3. //
  4. // Copyright 2006 Eric Niebler, Olivier Gygi. Distributed under the Boost
  5. // Software License, Version 1.0. (See accompanying file
  6. // LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
  7. #ifndef BOOST_ACCUMULATORS_STATISTICS_WEIGHTED_MEDIAN_HPP_EAN_28_10_2005
  8. #define BOOST_ACCUMULATORS_STATISTICS_WEIGHTED_MEDIAN_HPP_EAN_28_10_2005
  9. #include <boost/mpl/placeholders.hpp>
  10. #include <boost/range/iterator_range.hpp>
  11. #include <boost/accumulators/framework/accumulator_base.hpp>
  12. #include <boost/accumulators/framework/extractor.hpp>
  13. #include <boost/accumulators/numeric/functional.hpp>
  14. #include <boost/accumulators/framework/parameters/sample.hpp>
  15. #include <boost/accumulators/framework/depends_on.hpp>
  16. #include <boost/accumulators/statistics_fwd.hpp>
  17. #include <boost/accumulators/statistics/count.hpp>
  18. #include <boost/accumulators/statistics/median.hpp>
  19. #include <boost/accumulators/statistics/weighted_p_square_quantile.hpp>
  20. #include <boost/accumulators/statistics/weighted_density.hpp>
  21. #include <boost/accumulators/statistics/weighted_p_square_cumul_dist.hpp>
  22. namespace boost { namespace accumulators
  23. {
  24. namespace impl
  25. {
  26. ///////////////////////////////////////////////////////////////////////////////
  27. // weighted_median_impl
  28. //
  29. /**
  30. @brief Median estimation for weighted samples based on the \f$P^2\f$ quantile estimator
  31. The \f$P^2\f$ algorithm for weighted samples is invoked with a quantile probability of 0.5.
  32. */
  33. template<typename Sample>
  34. struct weighted_median_impl
  35. : accumulator_base
  36. {
  37. // for boost::result_of
  38. typedef typename numeric::functional::fdiv<Sample, std::size_t>::result_type result_type;
  39. weighted_median_impl(dont_care) {}
  40. template<typename Args>
  41. result_type result(Args const &args) const
  42. {
  43. return weighted_p_square_quantile_for_median(args);
  44. }
  45. };
  46. ///////////////////////////////////////////////////////////////////////////////
  47. // with_density_weighted_median_impl
  48. //
  49. /**
  50. @brief Median estimation for weighted samples based on the density estimator
  51. The algorithm determines the bin in which the \f$0.5*cnt\f$-th sample lies, \f$cnt\f$ being
  52. the total number of samples. It returns the approximate horizontal position of this sample,
  53. based on a linear interpolation inside the bin.
  54. */
  55. template<typename Sample>
  56. struct with_density_weighted_median_impl
  57. : accumulator_base
  58. {
  59. typedef typename numeric::functional::fdiv<Sample, std::size_t>::result_type float_type;
  60. typedef std::vector<std::pair<float_type, float_type> > histogram_type;
  61. typedef iterator_range<typename histogram_type::iterator> range_type;
  62. // for boost::result_of
  63. typedef float_type result_type;
  64. template<typename Args>
  65. with_density_weighted_median_impl(Args const &args)
  66. : sum(numeric::fdiv(args[sample | Sample()], (std::size_t)1))
  67. , is_dirty(true)
  68. {
  69. }
  70. void operator ()(dont_care)
  71. {
  72. this->is_dirty = true;
  73. }
  74. template<typename Args>
  75. result_type result(Args const &args) const
  76. {
  77. if (this->is_dirty)
  78. {
  79. this->is_dirty = false;
  80. std::size_t cnt = count(args);
  81. range_type histogram = weighted_density(args);
  82. typename range_type::iterator it = histogram.begin();
  83. while (this->sum < 0.5 * cnt)
  84. {
  85. this->sum += it->second * cnt;
  86. ++it;
  87. }
  88. --it;
  89. float_type over = numeric::fdiv(this->sum - 0.5 * cnt, it->second * cnt);
  90. this->median = it->first * over + (it + 1)->first * ( 1. - over );
  91. }
  92. return this->median;
  93. }
  94. // make this accumulator serializeable
  95. template<class Archive>
  96. void serialize(Archive & ar, const unsigned int file_version)
  97. {
  98. ar & sum;
  99. ar & is_dirty;
  100. ar & median;
  101. }
  102. private:
  103. mutable float_type sum;
  104. mutable bool is_dirty;
  105. mutable float_type median;
  106. };
  107. ///////////////////////////////////////////////////////////////////////////////
  108. // with_p_square_cumulative_distribution_weighted_median_impl
  109. //
  110. /**
  111. @brief Median estimation for weighted samples based on the \f$P^2\f$ cumulative distribution estimator
  112. The algorithm determines the first (leftmost) bin with a height exceeding 0.5. It
  113. returns the approximate horizontal position of where the cumulative distribution
  114. equals 0.5, based on a linear interpolation inside the bin.
  115. */
  116. template<typename Sample, typename Weight>
  117. struct with_p_square_cumulative_distribution_weighted_median_impl
  118. : accumulator_base
  119. {
  120. typedef typename numeric::functional::multiplies<Sample, Weight>::result_type weighted_sample;
  121. typedef typename numeric::functional::fdiv<weighted_sample, std::size_t>::result_type float_type;
  122. typedef std::vector<std::pair<float_type, float_type> > histogram_type;
  123. typedef iterator_range<typename histogram_type::iterator> range_type;
  124. // for boost::result_of
  125. typedef float_type result_type;
  126. with_p_square_cumulative_distribution_weighted_median_impl(dont_care)
  127. : is_dirty(true)
  128. {
  129. }
  130. void operator ()(dont_care)
  131. {
  132. this->is_dirty = true;
  133. }
  134. template<typename Args>
  135. result_type result(Args const &args) const
  136. {
  137. if (this->is_dirty)
  138. {
  139. this->is_dirty = false;
  140. range_type histogram = weighted_p_square_cumulative_distribution(args);
  141. typename range_type::iterator it = histogram.begin();
  142. while (it->second < 0.5)
  143. {
  144. ++it;
  145. }
  146. float_type over = numeric::fdiv(it->second - 0.5, it->second - (it - 1)->second);
  147. this->median = it->first * over + (it + 1)->first * ( 1. - over );
  148. }
  149. return this->median;
  150. }
  151. // make this accumulator serializeable
  152. template<class Archive>
  153. void serialize(Archive & ar, const unsigned int file_version)
  154. {
  155. ar & is_dirty;
  156. ar & median;
  157. }
  158. private:
  159. mutable bool is_dirty;
  160. mutable float_type median;
  161. };
  162. } // namespace impl
  163. ///////////////////////////////////////////////////////////////////////////////
  164. // tag::weighted_median
  165. // tag::with_density_weighted_median
  166. // tag::with_p_square_cumulative_distribution_weighted_median
  167. //
  168. namespace tag
  169. {
  170. struct weighted_median
  171. : depends_on<weighted_p_square_quantile_for_median>
  172. {
  173. /// INTERNAL ONLY
  174. ///
  175. typedef accumulators::impl::weighted_median_impl<mpl::_1> impl;
  176. };
  177. struct with_density_weighted_median
  178. : depends_on<count, weighted_density>
  179. {
  180. /// INTERNAL ONLY
  181. ///
  182. typedef accumulators::impl::with_density_weighted_median_impl<mpl::_1> impl;
  183. };
  184. struct with_p_square_cumulative_distribution_weighted_median
  185. : depends_on<weighted_p_square_cumulative_distribution>
  186. {
  187. /// INTERNAL ONLY
  188. ///
  189. typedef accumulators::impl::with_p_square_cumulative_distribution_weighted_median_impl<mpl::_1, mpl::_2> impl;
  190. };
  191. }
  192. ///////////////////////////////////////////////////////////////////////////////
  193. // extract::weighted_median
  194. //
  195. namespace extract
  196. {
  197. extractor<tag::median> const weighted_median = {};
  198. BOOST_ACCUMULATORS_IGNORE_GLOBAL(weighted_median)
  199. }
  200. using extract::weighted_median;
  201. // weighted_median(with_p_square_quantile) -> weighted_median
  202. template<>
  203. struct as_feature<tag::weighted_median(with_p_square_quantile)>
  204. {
  205. typedef tag::weighted_median type;
  206. };
  207. // weighted_median(with_density) -> with_density_weighted_median
  208. template<>
  209. struct as_feature<tag::weighted_median(with_density)>
  210. {
  211. typedef tag::with_density_weighted_median type;
  212. };
  213. // weighted_median(with_p_square_cumulative_distribution) -> with_p_square_cumulative_distribution_weighted_median
  214. template<>
  215. struct as_feature<tag::weighted_median(with_p_square_cumulative_distribution)>
  216. {
  217. typedef tag::with_p_square_cumulative_distribution_weighted_median type;
  218. };
  219. }} // namespace boost::accumulators
  220. #endif