runs_test.hpp 3.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121
  1. /*
  2. * Copyright Nick Thompson, 2019
  3. * Use, modification and distribution are subject to the
  4. * Boost Software License, Version 1.0. (See accompanying file
  5. * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
  6. */
  7. #ifndef BOOST_MATH_STATISTICS_RUNS_TEST_HPP
  8. #define BOOST_MATH_STATISTICS_RUNS_TEST_HPP
  9. #include <cmath>
  10. #include <algorithm>
  11. #include <utility>
  12. #include <boost/math/statistics/univariate_statistics.hpp>
  13. #include <boost/math/distributions/normal.hpp>
  14. namespace boost::math::statistics {
  15. template<class RandomAccessContainer>
  16. auto runs_above_and_below_threshold(RandomAccessContainer const & v,
  17. typename RandomAccessContainer::value_type threshold)
  18. {
  19. using Real = typename RandomAccessContainer::value_type;
  20. using std::sqrt;
  21. using std::abs;
  22. if (v.size() <= 1)
  23. {
  24. throw std::domain_error("At least 2 samples are required to get number of runs.");
  25. }
  26. typedef boost::math::policies::policy<
  27. boost::math::policies::promote_float<false>,
  28. boost::math::policies::promote_double<false> >
  29. no_promote_policy;
  30. decltype(v.size()) nabove = 0;
  31. decltype(v.size()) nbelow = 0;
  32. decltype(v.size()) imin = 0;
  33. // Take care of the case that v[0] == threshold:
  34. while (imin < v.size() && v[imin] == threshold) {
  35. ++imin;
  36. }
  37. // Take care of the constant vector case:
  38. if (imin == v.size()) {
  39. return std::make_pair(std::numeric_limits<Real>::quiet_NaN(), Real(0));
  40. }
  41. bool run_up = (v[imin] > threshold);
  42. if (run_up) {
  43. ++nabove;
  44. } else {
  45. ++nbelow;
  46. }
  47. decltype(v.size()) runs = 1;
  48. for (decltype(v.size()) i = imin + 1; i < v.size(); ++i) {
  49. if (v[i] == threshold) {
  50. // skip values precisely equal to threshold (following R's randtests package)
  51. continue;
  52. }
  53. bool above = (v[i] > threshold);
  54. if (above) {
  55. ++nabove;
  56. } else {
  57. ++nbelow;
  58. }
  59. if (run_up == above) {
  60. continue;
  61. }
  62. else {
  63. run_up = above;
  64. runs++;
  65. }
  66. }
  67. // If you make n an int, the subtraction is gonna be bad in the variance:
  68. Real n = nabove + nbelow;
  69. Real expected_runs = Real(1) + Real(2*nabove*nbelow)/Real(n);
  70. Real variance = 2*nabove*nbelow*(2*nabove*nbelow-n)/Real(n*n*(n-1));
  71. // Bizarre, pathological limits:
  72. if (variance == 0)
  73. {
  74. if (runs == expected_runs)
  75. {
  76. Real statistic = 0;
  77. Real pvalue = 1;
  78. return std::make_pair(statistic, pvalue);
  79. }
  80. else
  81. {
  82. return std::make_pair(std::numeric_limits<Real>::quiet_NaN(), Real(0));
  83. }
  84. }
  85. Real sd = sqrt(variance);
  86. Real statistic = (runs - expected_runs)/sd;
  87. auto normal = boost::math::normal_distribution<Real, no_promote_policy>(0,1);
  88. Real pvalue = 2*boost::math::cdf(normal, -abs(statistic));
  89. return std::make_pair(statistic, pvalue);
  90. }
  91. template<class RandomAccessContainer>
  92. auto runs_above_and_below_median(RandomAccessContainer const & v)
  93. {
  94. using Real = typename RandomAccessContainer::value_type;
  95. using std::log;
  96. using std::sqrt;
  97. // We have to memcpy v because the median does a partial sort,
  98. // and that would be catastrophic for the runs test.
  99. auto w = v;
  100. Real median = boost::math::statistics::median(w);
  101. return runs_above_and_below_threshold(v, median);
  102. }
  103. }
  104. #endif