weighted_peaks_over_threshold.hpp 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302
  1. ///////////////////////////////////////////////////////////////////////////////
  2. // weighted_peaks_over_threshold.hpp
  3. //
  4. // Copyright 2006 Daniel Egloff, Olivier Gygi. Distributed under the Boost
  5. // Software License, Version 1.0. (See accompanying file
  6. // LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
  7. #ifndef BOOST_ACCUMULATORS_STATISTICS_WEIGHTED_PEAKS_OVER_THRESHOLD_HPP_DE_01_01_2006
  8. #define BOOST_ACCUMULATORS_STATISTICS_WEIGHTED_PEAKS_OVER_THRESHOLD_HPP_DE_01_01_2006
  9. #include <vector>
  10. #include <limits>
  11. #include <numeric>
  12. #include <functional>
  13. #include <boost/throw_exception.hpp>
  14. #include <boost/range.hpp>
  15. #include <boost/mpl/if.hpp>
  16. #include <boost/mpl/placeholders.hpp>
  17. #include <boost/parameter/keyword.hpp>
  18. #include <boost/tuple/tuple.hpp>
  19. #include <boost/accumulators/numeric/functional.hpp>
  20. #include <boost/accumulators/framework/accumulator_base.hpp>
  21. #include <boost/accumulators/framework/extractor.hpp>
  22. #include <boost/accumulators/framework/parameters/sample.hpp>
  23. #include <boost/accumulators/framework/depends_on.hpp>
  24. #include <boost/accumulators/statistics_fwd.hpp>
  25. #include <boost/accumulators/statistics/parameters/quantile_probability.hpp>
  26. #include <boost/accumulators/statistics/peaks_over_threshold.hpp> // for named parameters pot_threshold_value and pot_threshold_probability
  27. #include <boost/accumulators/statistics/sum.hpp>
  28. #include <boost/accumulators/statistics/tail_variate.hpp>
  29. #ifdef _MSC_VER
  30. # pragma warning(push)
  31. # pragma warning(disable: 4127) // conditional expression is constant
  32. #endif
  33. namespace boost { namespace accumulators
  34. {
  35. namespace impl
  36. {
  37. ///////////////////////////////////////////////////////////////////////////////
  38. // weighted_peaks_over_threshold_impl
  39. // works with an explicit threshold value and does not depend on order statistics of weighted samples
  40. /**
  41. @brief Weighted Peaks over Threshold Method for Weighted Quantile and Weighted Tail Mean Estimation
  42. @sa peaks_over_threshold_impl
  43. @param quantile_probability
  44. @param pot_threshold_value
  45. */
  46. template<typename Sample, typename Weight, typename LeftRight>
  47. struct weighted_peaks_over_threshold_impl
  48. : accumulator_base
  49. {
  50. typedef typename numeric::functional::multiplies<Weight, Sample>::result_type weighted_sample;
  51. typedef typename numeric::functional::fdiv<weighted_sample, std::size_t>::result_type float_type;
  52. // for boost::result_of
  53. typedef boost::tuple<float_type, float_type, float_type> result_type;
  54. template<typename Args>
  55. weighted_peaks_over_threshold_impl(Args const &args)
  56. : sign_((is_same<LeftRight, left>::value) ? -1 : 1)
  57. , mu_(sign_ * numeric::fdiv(args[sample | Sample()], (std::size_t)1))
  58. , sigma2_(numeric::fdiv(args[sample | Sample()], (std::size_t)1))
  59. , w_sum_(numeric::fdiv(args[weight | Weight()], (std::size_t)1))
  60. , threshold_(sign_ * args[pot_threshold_value])
  61. , fit_parameters_(boost::make_tuple(0., 0., 0.))
  62. , is_dirty_(true)
  63. {
  64. }
  65. template<typename Args>
  66. void operator ()(Args const &args)
  67. {
  68. this->is_dirty_ = true;
  69. if (this->sign_ * args[sample] > this->threshold_)
  70. {
  71. this->mu_ += args[weight] * args[sample];
  72. this->sigma2_ += args[weight] * args[sample] * args[sample];
  73. this->w_sum_ += args[weight];
  74. }
  75. }
  76. template<typename Args>
  77. result_type result(Args const &args) const
  78. {
  79. if (this->is_dirty_)
  80. {
  81. this->is_dirty_ = false;
  82. this->mu_ = this->sign_ * numeric::fdiv(this->mu_, this->w_sum_);
  83. this->sigma2_ = numeric::fdiv(this->sigma2_, this->w_sum_);
  84. this->sigma2_ -= this->mu_ * this->mu_;
  85. float_type threshold_probability = numeric::fdiv(sum_of_weights(args) - this->w_sum_, sum_of_weights(args));
  86. float_type tmp = numeric::fdiv(( this->mu_ - this->threshold_ )*( this->mu_ - this->threshold_ ), this->sigma2_);
  87. float_type xi_hat = 0.5 * ( 1. - tmp );
  88. float_type beta_hat = 0.5 * ( this->mu_ - this->threshold_ ) * ( 1. + tmp );
  89. float_type beta_bar = beta_hat * std::pow(1. - threshold_probability, xi_hat);
  90. float_type u_bar = this->threshold_ - beta_bar * ( std::pow(1. - threshold_probability, -xi_hat) - 1.)/xi_hat;
  91. this->fit_parameters_ = boost::make_tuple(u_bar, beta_bar, xi_hat);
  92. }
  93. return this->fit_parameters_;
  94. }
  95. // make this accumulator serializeable
  96. // TODO: do we need to split to load/save and verify that threshold did not change?
  97. template<class Archive>
  98. void serialize(Archive & ar, const unsigned int file_version)
  99. {
  100. ar & sign_;
  101. ar & mu_;
  102. ar & sigma2_;
  103. ar & threshold_;
  104. ar & fit_parameters_;
  105. ar & is_dirty_;
  106. }
  107. private:
  108. short sign_; // for left tail fitting, mirror the extreme values
  109. mutable float_type mu_; // mean of samples above threshold
  110. mutable float_type sigma2_; // variance of samples above threshold
  111. mutable float_type w_sum_; // sum of weights of samples above threshold
  112. float_type threshold_;
  113. mutable result_type fit_parameters_; // boost::tuple that stores fit parameters
  114. mutable bool is_dirty_;
  115. };
  116. ///////////////////////////////////////////////////////////////////////////////
  117. // weighted_peaks_over_threshold_prob_impl
  118. // determines threshold from a given threshold probability using order statistics
  119. /**
  120. @brief Peaks over Threshold Method for Quantile and Tail Mean Estimation
  121. @sa weighted_peaks_over_threshold_impl
  122. @param quantile_probability
  123. @param pot_threshold_probability
  124. */
  125. template<typename Sample, typename Weight, typename LeftRight>
  126. struct weighted_peaks_over_threshold_prob_impl
  127. : accumulator_base
  128. {
  129. typedef typename numeric::functional::multiplies<Weight, Sample>::result_type weighted_sample;
  130. typedef typename numeric::functional::fdiv<weighted_sample, std::size_t>::result_type float_type;
  131. // for boost::result_of
  132. typedef boost::tuple<float_type, float_type, float_type> result_type;
  133. template<typename Args>
  134. weighted_peaks_over_threshold_prob_impl(Args const &args)
  135. : sign_((is_same<LeftRight, left>::value) ? -1 : 1)
  136. , mu_(sign_ * numeric::fdiv(args[sample | Sample()], (std::size_t)1))
  137. , sigma2_(numeric::fdiv(args[sample | Sample()], (std::size_t)1))
  138. , threshold_probability_(args[pot_threshold_probability])
  139. , fit_parameters_(boost::make_tuple(0., 0., 0.))
  140. , is_dirty_(true)
  141. {
  142. }
  143. void operator ()(dont_care)
  144. {
  145. this->is_dirty_ = true;
  146. }
  147. template<typename Args>
  148. result_type result(Args const &args) const
  149. {
  150. if (this->is_dirty_)
  151. {
  152. this->is_dirty_ = false;
  153. float_type threshold = sum_of_weights(args)
  154. * ( ( is_same<LeftRight, left>::value ) ? this->threshold_probability_ : 1. - this->threshold_probability_ );
  155. std::size_t n = 0;
  156. Weight sum = Weight(0);
  157. while (sum < threshold)
  158. {
  159. if (n < static_cast<std::size_t>(tail_weights(args).size()))
  160. {
  161. mu_ += *(tail_weights(args).begin() + n) * *(tail(args).begin() + n);
  162. sigma2_ += *(tail_weights(args).begin() + n) * *(tail(args).begin() + n) * (*(tail(args).begin() + n));
  163. sum += *(tail_weights(args).begin() + n);
  164. n++;
  165. }
  166. else
  167. {
  168. if (std::numeric_limits<float_type>::has_quiet_NaN)
  169. {
  170. return boost::make_tuple(
  171. std::numeric_limits<float_type>::quiet_NaN()
  172. , std::numeric_limits<float_type>::quiet_NaN()
  173. , std::numeric_limits<float_type>::quiet_NaN()
  174. );
  175. }
  176. else
  177. {
  178. std::ostringstream msg;
  179. msg << "index n = " << n << " is not in valid range [0, " << tail(args).size() << ")";
  180. boost::throw_exception(std::runtime_error(msg.str()));
  181. return boost::make_tuple(Sample(0), Sample(0), Sample(0));
  182. }
  183. }
  184. }
  185. float_type u = *(tail(args).begin() + n - 1) * this->sign_;
  186. this->mu_ = this->sign_ * numeric::fdiv(this->mu_, sum);
  187. this->sigma2_ = numeric::fdiv(this->sigma2_, sum);
  188. this->sigma2_ -= this->mu_ * this->mu_;
  189. if (is_same<LeftRight, left>::value)
  190. this->threshold_probability_ = 1. - this->threshold_probability_;
  191. float_type tmp = numeric::fdiv(( this->mu_ - u )*( this->mu_ - u ), this->sigma2_);
  192. float_type xi_hat = 0.5 * ( 1. - tmp );
  193. float_type beta_hat = 0.5 * ( this->mu_ - u ) * ( 1. + tmp );
  194. float_type beta_bar = beta_hat * std::pow(1. - threshold_probability_, xi_hat);
  195. float_type u_bar = u - beta_bar * ( std::pow(1. - threshold_probability_, -xi_hat) - 1.)/xi_hat;
  196. this->fit_parameters_ = boost::make_tuple(u_bar, beta_bar, xi_hat);
  197. }
  198. return this->fit_parameters_;
  199. }
  200. private:
  201. short sign_; // for left tail fitting, mirror the extreme values
  202. mutable float_type mu_; // mean of samples above threshold u
  203. mutable float_type sigma2_; // variance of samples above threshold u
  204. mutable float_type threshold_probability_;
  205. mutable result_type fit_parameters_; // boost::tuple that stores fit parameters
  206. mutable bool is_dirty_;
  207. };
  208. } // namespace impl
  209. ///////////////////////////////////////////////////////////////////////////////
  210. // tag::weighted_peaks_over_threshold
  211. //
  212. namespace tag
  213. {
  214. template<typename LeftRight>
  215. struct weighted_peaks_over_threshold
  216. : depends_on<sum_of_weights>
  217. , pot_threshold_value
  218. {
  219. /// INTERNAL ONLY
  220. typedef accumulators::impl::weighted_peaks_over_threshold_impl<mpl::_1, mpl::_2, LeftRight> impl;
  221. };
  222. template<typename LeftRight>
  223. struct weighted_peaks_over_threshold_prob
  224. : depends_on<sum_of_weights, tail_weights<LeftRight> >
  225. , pot_threshold_probability
  226. {
  227. /// INTERNAL ONLY
  228. typedef accumulators::impl::weighted_peaks_over_threshold_prob_impl<mpl::_1, mpl::_2, LeftRight> impl;
  229. };
  230. }
  231. ///////////////////////////////////////////////////////////////////////////////
  232. // extract::weighted_peaks_over_threshold
  233. //
  234. namespace extract
  235. {
  236. extractor<tag::abstract_peaks_over_threshold> const weighted_peaks_over_threshold = {};
  237. BOOST_ACCUMULATORS_IGNORE_GLOBAL(weighted_peaks_over_threshold)
  238. }
  239. using extract::weighted_peaks_over_threshold;
  240. // weighted_peaks_over_threshold<LeftRight>(with_threshold_value) -> weighted_peaks_over_threshold<LeftRight>
  241. template<typename LeftRight>
  242. struct as_feature<tag::weighted_peaks_over_threshold<LeftRight>(with_threshold_value)>
  243. {
  244. typedef tag::weighted_peaks_over_threshold<LeftRight> type;
  245. };
  246. // weighted_peaks_over_threshold<LeftRight>(with_threshold_probability) -> weighted_peaks_over_threshold_prob<LeftRight>
  247. template<typename LeftRight>
  248. struct as_feature<tag::weighted_peaks_over_threshold<LeftRight>(with_threshold_probability)>
  249. {
  250. typedef tag::weighted_peaks_over_threshold_prob<LeftRight> type;
  251. };
  252. }} // namespace boost::accumulators
  253. #ifdef _MSC_VER
  254. # pragma warning(pop)
  255. #endif
  256. #endif