extended_p_square.hpp 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309
  1. ///////////////////////////////////////////////////////////////////////////////
  2. // extended_p_square.hpp
  3. //
  4. // Copyright 2005 Daniel Egloff. Distributed under the Boost
  5. // Software License, Version 1.0. (See accompanying file
  6. // LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
  7. #ifndef BOOST_ACCUMULATORS_STATISTICS_EXTENDED_SINGLE_HPP_DE_01_01_2006
  8. #define BOOST_ACCUMULATORS_STATISTICS_EXTENDED_SINGLE_HPP_DE_01_01_2006
  9. #include <vector>
  10. #include <functional>
  11. #include <boost/range/begin.hpp>
  12. #include <boost/range/end.hpp>
  13. #include <boost/range/iterator_range.hpp>
  14. #include <boost/iterator/transform_iterator.hpp>
  15. #include <boost/iterator/counting_iterator.hpp>
  16. #include <boost/iterator/permutation_iterator.hpp>
  17. #include <boost/parameter/keyword.hpp>
  18. #include <boost/mpl/placeholders.hpp>
  19. #include <boost/accumulators/accumulators_fwd.hpp>
  20. #include <boost/accumulators/framework/extractor.hpp>
  21. #include <boost/accumulators/numeric/functional.hpp>
  22. #include <boost/accumulators/framework/parameters/sample.hpp>
  23. #include <boost/accumulators/framework/depends_on.hpp>
  24. #include <boost/accumulators/statistics_fwd.hpp>
  25. #include <boost/accumulators/statistics/count.hpp>
  26. #include <boost/accumulators/statistics/times2_iterator.hpp>
  27. #include <boost/serialization/vector.hpp>
  28. namespace boost { namespace accumulators
  29. {
  30. ///////////////////////////////////////////////////////////////////////////////
  31. // probabilities named parameter
  32. //
  33. BOOST_PARAMETER_NESTED_KEYWORD(tag, extended_p_square_probabilities, probabilities)
  34. BOOST_ACCUMULATORS_IGNORE_GLOBAL(extended_p_square_probabilities)
  35. namespace impl
  36. {
  37. ///////////////////////////////////////////////////////////////////////////////
  38. // extended_p_square_impl
  39. // multiple quantile estimation
  40. /**
  41. @brief Multiple quantile estimation with the extended \f$P^2\f$ algorithm
  42. Extended \f$P^2\f$ algorithm for estimation of several quantiles without storing samples.
  43. Assume that \f$m\f$ quantiles \f$\xi_{p_1}, \ldots, \xi_{p_m}\f$ are to be estimated.
  44. Instead of storing the whole sample cumulative distribution, the algorithm maintains only
  45. \f$m+2\f$ principal markers and \f$m+1\f$ middle markers, whose positions are updated
  46. with each sample and whose heights are adjusted (if necessary) using a piecewise-parablic
  47. formula. The heights of these central markers are the current estimates of the quantiles
  48. and returned as an iterator range.
  49. For further details, see
  50. K. E. E. Raatikainen, Simultaneous estimation of several quantiles, Simulation, Volume 49,
  51. Number 4 (October), 1986, p. 159-164.
  52. The extended \f$ P^2 \f$ algorithm generalizes the \f$ P^2 \f$ algorithm of
  53. R. Jain and I. Chlamtac, The P^2 algorithm for dynamic calculation of quantiles and
  54. histograms without storing observations, Communications of the ACM,
  55. Volume 28 (October), Number 10, 1985, p. 1076-1085.
  56. @param extended_p_square_probabilities A vector of quantile probabilities.
  57. */
  58. template<typename Sample>
  59. struct extended_p_square_impl
  60. : accumulator_base
  61. {
  62. typedef typename numeric::functional::fdiv<Sample, std::size_t>::result_type float_type;
  63. typedef std::vector<float_type> array_type;
  64. // for boost::result_of
  65. typedef iterator_range<
  66. detail::lvalue_index_iterator<
  67. permutation_iterator<
  68. typename array_type::const_iterator
  69. , detail::times2_iterator
  70. >
  71. >
  72. > result_type;
  73. template<typename Args>
  74. extended_p_square_impl(Args const &args)
  75. : probabilities(
  76. boost::begin(args[extended_p_square_probabilities])
  77. , boost::end(args[extended_p_square_probabilities])
  78. )
  79. , heights(2 * probabilities.size() + 3)
  80. , actual_positions(heights.size())
  81. , desired_positions(heights.size())
  82. , positions_increments(heights.size())
  83. {
  84. std::size_t num_quantiles = this->probabilities.size();
  85. std::size_t num_markers = this->heights.size();
  86. for(std::size_t i = 0; i < num_markers; ++i)
  87. {
  88. this->actual_positions[i] = i + 1;
  89. }
  90. this->positions_increments[0] = 0.;
  91. this->positions_increments[num_markers - 1] = 1.;
  92. for(std::size_t i = 0; i < num_quantiles; ++i)
  93. {
  94. this->positions_increments[2 * i + 2] = probabilities[i];
  95. }
  96. for(std::size_t i = 0; i <= num_quantiles; ++i)
  97. {
  98. this->positions_increments[2 * i + 1] =
  99. 0.5 * (this->positions_increments[2 * i] + this->positions_increments[2 * i + 2]);
  100. }
  101. for(std::size_t i = 0; i < num_markers; ++i)
  102. {
  103. this->desired_positions[i] = 1. + 2. * (num_quantiles + 1.) * this->positions_increments[i];
  104. }
  105. }
  106. template<typename Args>
  107. void operator ()(Args const &args)
  108. {
  109. std::size_t cnt = count(args);
  110. // m+2 principal markers and m+1 middle markers
  111. std::size_t num_markers = 2 * this->probabilities.size() + 3;
  112. // first accumulate num_markers samples
  113. if(cnt <= num_markers)
  114. {
  115. this->heights[cnt - 1] = args[sample];
  116. // complete the initialization of heights by sorting
  117. if(cnt == num_markers)
  118. {
  119. std::sort(this->heights.begin(), this->heights.end());
  120. }
  121. }
  122. else
  123. {
  124. std::size_t sample_cell = 1;
  125. // find cell k = sample_cell such that heights[k-1] <= sample < heights[k]
  126. if(args[sample] < this->heights[0])
  127. {
  128. this->heights[0] = args[sample];
  129. sample_cell = 1;
  130. }
  131. else if(args[sample] >= this->heights[num_markers - 1])
  132. {
  133. this->heights[num_markers - 1] = args[sample];
  134. sample_cell = num_markers - 1;
  135. }
  136. else
  137. {
  138. typedef typename array_type::iterator iterator;
  139. iterator it = std::upper_bound(
  140. this->heights.begin()
  141. , this->heights.end()
  142. , args[sample]
  143. );
  144. sample_cell = std::distance(this->heights.begin(), it);
  145. }
  146. // update actual positions of all markers above sample_cell index
  147. for(std::size_t i = sample_cell; i < num_markers; ++i)
  148. {
  149. ++this->actual_positions[i];
  150. }
  151. // update desired positions of all markers
  152. for(std::size_t i = 0; i < num_markers; ++i)
  153. {
  154. this->desired_positions[i] += this->positions_increments[i];
  155. }
  156. // adjust heights and actual positions of markers 1 to num_markers-2 if necessary
  157. for(std::size_t i = 1; i <= num_markers - 2; ++i)
  158. {
  159. // offset to desired position
  160. float_type d = this->desired_positions[i] - this->actual_positions[i];
  161. // offset to next position
  162. float_type dp = this->actual_positions[i+1] - this->actual_positions[i];
  163. // offset to previous position
  164. float_type dm = this->actual_positions[i-1] - this->actual_positions[i];
  165. // height ds
  166. float_type hp = (this->heights[i+1] - this->heights[i]) / dp;
  167. float_type hm = (this->heights[i-1] - this->heights[i]) / dm;
  168. if((d >= 1 && dp > 1) || (d <= -1 && dm < -1))
  169. {
  170. short sign_d = static_cast<short>(d / std::abs(d));
  171. float_type h = this->heights[i] + sign_d / (dp - dm) * ((sign_d - dm)*hp
  172. + (dp - sign_d) * hm);
  173. // try adjusting heights[i] using p-squared formula
  174. if(this->heights[i - 1] < h && h < this->heights[i + 1])
  175. {
  176. this->heights[i] = h;
  177. }
  178. else
  179. {
  180. // use linear formula
  181. if(d > 0)
  182. {
  183. this->heights[i] += hp;
  184. }
  185. if(d < 0)
  186. {
  187. this->heights[i] -= hm;
  188. }
  189. }
  190. this->actual_positions[i] += sign_d;
  191. }
  192. }
  193. }
  194. }
  195. result_type result(dont_care) const
  196. {
  197. // for i in [1,probabilities.size()], return heights[i * 2]
  198. detail::times2_iterator idx_begin = detail::make_times2_iterator(1);
  199. detail::times2_iterator idx_end = detail::make_times2_iterator(this->probabilities.size() + 1);
  200. return result_type(
  201. make_permutation_iterator(this->heights.begin(), idx_begin)
  202. , make_permutation_iterator(this->heights.begin(), idx_end)
  203. );
  204. }
  205. public:
  206. // make this accumulator serializeable
  207. // TODO: do we need to split to load/save and verify that the parameters did not change?
  208. template<class Archive>
  209. void serialize(Archive & ar, const unsigned int file_version)
  210. {
  211. ar & probabilities;
  212. ar & heights;
  213. ar & actual_positions;
  214. ar & desired_positions;
  215. ar & positions_increments;
  216. }
  217. private:
  218. array_type probabilities; // the quantile probabilities
  219. array_type heights; // q_i
  220. array_type actual_positions; // n_i
  221. array_type desired_positions; // d_i
  222. array_type positions_increments; // f_i
  223. };
  224. } // namespace impl
  225. ///////////////////////////////////////////////////////////////////////////////
  226. // tag::extended_p_square
  227. //
  228. namespace tag
  229. {
  230. struct extended_p_square
  231. : depends_on<count>
  232. , extended_p_square_probabilities
  233. {
  234. typedef accumulators::impl::extended_p_square_impl<mpl::_1> impl;
  235. #ifdef BOOST_ACCUMULATORS_DOXYGEN_INVOKED
  236. /// tag::extended_p_square::probabilities named parameter
  237. static boost::parameter::keyword<tag::probabilities> const probabilities;
  238. #endif
  239. };
  240. }
  241. ///////////////////////////////////////////////////////////////////////////////
  242. // extract::extended_p_square
  243. //
  244. namespace extract
  245. {
  246. extractor<tag::extended_p_square> const extended_p_square = {};
  247. BOOST_ACCUMULATORS_IGNORE_GLOBAL(extended_p_square)
  248. }
  249. using extract::extended_p_square;
  250. // So that extended_p_square can be automatically substituted with
  251. // weighted_extended_p_square when the weight parameter is non-void
  252. template<>
  253. struct as_weighted_feature<tag::extended_p_square>
  254. {
  255. typedef tag::weighted_extended_p_square type;
  256. };
  257. template<>
  258. struct feature_of<tag::weighted_extended_p_square>
  259. : feature_of<tag::extended_p_square>
  260. {
  261. };
  262. }} // namespace boost::accumulators
  263. #endif