median.hpp 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324
  1. ///////////////////////////////////////////////////////////////////////////////
  2. // median.hpp
  3. //
  4. // Copyright 2006 Eric Niebler, Olivier Gygi. Distributed under the Boost
  5. // Software License, Version 1.0. (See accompanying file
  6. // LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
  7. #ifndef BOOST_ACCUMULATORS_STATISTICS_MEDIAN_HPP_EAN_28_10_2005
  8. #define BOOST_ACCUMULATORS_STATISTICS_MEDIAN_HPP_EAN_28_10_2005
  9. #include <boost/mpl/placeholders.hpp>
  10. #include <boost/range/iterator_range.hpp>
  11. #include <boost/accumulators/framework/accumulator_base.hpp>
  12. #include <boost/accumulators/framework/extractor.hpp>
  13. #include <boost/accumulators/numeric/functional.hpp>
  14. #include <boost/accumulators/framework/parameters/sample.hpp>
  15. #include <boost/accumulators/framework/depends_on.hpp>
  16. #include <boost/accumulators/statistics_fwd.hpp>
  17. #include <boost/accumulators/statistics/count.hpp>
  18. #include <boost/accumulators/statistics/p_square_quantile.hpp>
  19. #include <boost/accumulators/statistics/density.hpp>
  20. #include <boost/accumulators/statistics/p_square_cumul_dist.hpp>
  21. namespace boost { namespace accumulators
  22. {
  23. namespace impl
  24. {
  25. ///////////////////////////////////////////////////////////////////////////////
  26. // median_impl
  27. //
  28. /**
  29. @brief Median estimation based on the \f$P^2\f$ quantile estimator
  30. The \f$P^2\f$ algorithm is invoked with a quantile probability of 0.5.
  31. */
  32. template<typename Sample>
  33. struct median_impl
  34. : accumulator_base
  35. {
  36. // for boost::result_of
  37. typedef typename numeric::functional::fdiv<Sample, std::size_t>::result_type result_type;
  38. median_impl(dont_care) {}
  39. template<typename Args>
  40. result_type result(Args const &args) const
  41. {
  42. return p_square_quantile_for_median(args);
  43. }
  44. // serialization is done by accumulators it depends on
  45. template<class Archive>
  46. void serialize(Archive & ar, const unsigned int file_version) {}
  47. };
  48. ///////////////////////////////////////////////////////////////////////////////
  49. // with_density_median_impl
  50. //
  51. /**
  52. @brief Median estimation based on the density estimator
  53. The algorithm determines the bin in which the \f$0.5*cnt\f$-th sample lies, \f$cnt\f$ being
  54. the total number of samples. It returns the approximate horizontal position of this sample,
  55. based on a linear interpolation inside the bin.
  56. */
  57. template<typename Sample>
  58. struct with_density_median_impl
  59. : accumulator_base
  60. {
  61. typedef typename numeric::functional::fdiv<Sample, std::size_t>::result_type float_type;
  62. typedef std::vector<std::pair<float_type, float_type> > histogram_type;
  63. typedef iterator_range<typename histogram_type::iterator> range_type;
  64. // for boost::result_of
  65. typedef float_type result_type;
  66. template<typename Args>
  67. with_density_median_impl(Args const &args)
  68. : sum(numeric::fdiv(args[sample | Sample()], (std::size_t)1))
  69. , is_dirty(true)
  70. {
  71. }
  72. void operator ()(dont_care)
  73. {
  74. this->is_dirty = true;
  75. }
  76. template<typename Args>
  77. result_type result(Args const &args) const
  78. {
  79. if (this->is_dirty)
  80. {
  81. this->is_dirty = false;
  82. std::size_t cnt = count(args);
  83. range_type histogram = density(args);
  84. typename range_type::iterator it = histogram.begin();
  85. while (this->sum < 0.5 * cnt)
  86. {
  87. this->sum += it->second * cnt;
  88. ++it;
  89. }
  90. --it;
  91. float_type over = numeric::fdiv(this->sum - 0.5 * cnt, it->second * cnt);
  92. this->median = it->first * over + (it + 1)->first * (1. - over);
  93. }
  94. return this->median;
  95. }
  96. // make this accumulator serializeable
  97. template<class Archive>
  98. void serialize(Archive & ar, const unsigned int file_version)
  99. {
  100. ar & sum;
  101. ar & is_dirty;
  102. ar & median;
  103. }
  104. private:
  105. mutable float_type sum;
  106. mutable bool is_dirty;
  107. mutable float_type median;
  108. };
  109. ///////////////////////////////////////////////////////////////////////////////
  110. // with_p_square_cumulative_distribution_median_impl
  111. //
  112. /**
  113. @brief Median estimation based on the \f$P^2\f$ cumulative distribution estimator
  114. The algorithm determines the first (leftmost) bin with a height exceeding 0.5. It
  115. returns the approximate horizontal position of where the cumulative distribution
  116. equals 0.5, based on a linear interpolation inside the bin.
  117. */
  118. template<typename Sample>
  119. struct with_p_square_cumulative_distribution_median_impl
  120. : accumulator_base
  121. {
  122. typedef typename numeric::functional::fdiv<Sample, std::size_t>::result_type float_type;
  123. typedef std::vector<std::pair<float_type, float_type> > histogram_type;
  124. typedef iterator_range<typename histogram_type::iterator> range_type;
  125. // for boost::result_of
  126. typedef float_type result_type;
  127. with_p_square_cumulative_distribution_median_impl(dont_care)
  128. : is_dirty(true)
  129. {
  130. }
  131. void operator ()(dont_care)
  132. {
  133. this->is_dirty = true;
  134. }
  135. template<typename Args>
  136. result_type result(Args const &args) const
  137. {
  138. if (this->is_dirty)
  139. {
  140. this->is_dirty = false;
  141. range_type histogram = p_square_cumulative_distribution(args);
  142. typename range_type::iterator it = histogram.begin();
  143. while (it->second < 0.5)
  144. {
  145. ++it;
  146. }
  147. float_type over = numeric::fdiv(it->second - 0.5, it->second - (it - 1)->second);
  148. this->median = it->first * over + (it + 1)->first * ( 1. - over );
  149. }
  150. return this->median;
  151. }
  152. // make this accumulator serializeable
  153. template<class Archive>
  154. void serialize(Archive & ar, const unsigned int file_version)
  155. {
  156. ar & is_dirty;
  157. ar & median;
  158. }
  159. private:
  160. mutable bool is_dirty;
  161. mutable float_type median;
  162. };
  163. } // namespace impl
  164. ///////////////////////////////////////////////////////////////////////////////
  165. // tag::median
  166. // tag::with_densisty_median
  167. // tag::with_p_square_cumulative_distribution_median
  168. //
  169. namespace tag
  170. {
  171. struct median
  172. : depends_on<p_square_quantile_for_median>
  173. {
  174. /// INTERNAL ONLY
  175. ///
  176. typedef accumulators::impl::median_impl<mpl::_1> impl;
  177. };
  178. struct with_density_median
  179. : depends_on<count, density>
  180. {
  181. /// INTERNAL ONLY
  182. ///
  183. typedef accumulators::impl::with_density_median_impl<mpl::_1> impl;
  184. };
  185. struct with_p_square_cumulative_distribution_median
  186. : depends_on<p_square_cumulative_distribution>
  187. {
  188. /// INTERNAL ONLY
  189. ///
  190. typedef accumulators::impl::with_p_square_cumulative_distribution_median_impl<mpl::_1> impl;
  191. };
  192. }
  193. ///////////////////////////////////////////////////////////////////////////////
  194. // extract::median
  195. // extract::with_density_median
  196. // extract::with_p_square_cumulative_distribution_median
  197. //
  198. namespace extract
  199. {
  200. extractor<tag::median> const median = {};
  201. extractor<tag::with_density_median> const with_density_median = {};
  202. extractor<tag::with_p_square_cumulative_distribution_median> const with_p_square_cumulative_distribution_median = {};
  203. BOOST_ACCUMULATORS_IGNORE_GLOBAL(median)
  204. BOOST_ACCUMULATORS_IGNORE_GLOBAL(with_density_median)
  205. BOOST_ACCUMULATORS_IGNORE_GLOBAL(with_p_square_cumulative_distribution_median)
  206. }
  207. using extract::median;
  208. using extract::with_density_median;
  209. using extract::with_p_square_cumulative_distribution_median;
  210. // median(with_p_square_quantile) -> median
  211. template<>
  212. struct as_feature<tag::median(with_p_square_quantile)>
  213. {
  214. typedef tag::median type;
  215. };
  216. // median(with_density) -> with_density_median
  217. template<>
  218. struct as_feature<tag::median(with_density)>
  219. {
  220. typedef tag::with_density_median type;
  221. };
  222. // median(with_p_square_cumulative_distribution) -> with_p_square_cumulative_distribution_median
  223. template<>
  224. struct as_feature<tag::median(with_p_square_cumulative_distribution)>
  225. {
  226. typedef tag::with_p_square_cumulative_distribution_median type;
  227. };
  228. // for the purposes of feature-based dependency resolution,
  229. // with_density_median and with_p_square_cumulative_distribution_median
  230. // provide the same feature as median
  231. template<>
  232. struct feature_of<tag::with_density_median>
  233. : feature_of<tag::median>
  234. {
  235. };
  236. template<>
  237. struct feature_of<tag::with_p_square_cumulative_distribution_median>
  238. : feature_of<tag::median>
  239. {
  240. };
  241. // So that median can be automatically substituted with
  242. // weighted_median when the weight parameter is non-void.
  243. template<>
  244. struct as_weighted_feature<tag::median>
  245. {
  246. typedef tag::weighted_median type;
  247. };
  248. template<>
  249. struct feature_of<tag::weighted_median>
  250. : feature_of<tag::median>
  251. {
  252. };
  253. // So that with_density_median can be automatically substituted with
  254. // with_density_weighted_median when the weight parameter is non-void.
  255. template<>
  256. struct as_weighted_feature<tag::with_density_median>
  257. {
  258. typedef tag::with_density_weighted_median type;
  259. };
  260. template<>
  261. struct feature_of<tag::with_density_weighted_median>
  262. : feature_of<tag::with_density_median>
  263. {
  264. };
  265. // So that with_p_square_cumulative_distribution_median can be automatically substituted with
  266. // with_p_square_cumulative_distribution_weighted_median when the weight parameter is non-void.
  267. template<>
  268. struct as_weighted_feature<tag::with_p_square_cumulative_distribution_median>
  269. {
  270. typedef tag::with_p_square_cumulative_distribution_weighted_median type;
  271. };
  272. template<>
  273. struct feature_of<tag::with_p_square_cumulative_distribution_weighted_median>
  274. : feature_of<tag::with_p_square_cumulative_distribution_median>
  275. {
  276. };
  277. }} // namespace boost::accumulators
  278. #endif