/* * Copyright Nick Thompson, 2019 * Use, modification and distribution are subject to the * Boost Software License, Version 1.0. (See accompanying file * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) */ #ifndef BOOST_MATH_STATISTICS_RUNS_TEST_HPP #define BOOST_MATH_STATISTICS_RUNS_TEST_HPP #include #include #include #include #include namespace boost::math::statistics { template auto runs_above_and_below_threshold(RandomAccessContainer const & v, typename RandomAccessContainer::value_type threshold) { using Real = typename RandomAccessContainer::value_type; using std::sqrt; using std::abs; if (v.size() <= 1) { throw std::domain_error("At least 2 samples are required to get number of runs."); } typedef boost::math::policies::policy< boost::math::policies::promote_float, boost::math::policies::promote_double > no_promote_policy; decltype(v.size()) nabove = 0; decltype(v.size()) nbelow = 0; decltype(v.size()) imin = 0; // Take care of the case that v[0] == threshold: while (imin < v.size() && v[imin] == threshold) { ++imin; } // Take care of the constant vector case: if (imin == v.size()) { return std::make_pair(std::numeric_limits::quiet_NaN(), Real(0)); } bool run_up = (v[imin] > threshold); if (run_up) { ++nabove; } else { ++nbelow; } decltype(v.size()) runs = 1; for (decltype(v.size()) i = imin + 1; i < v.size(); ++i) { if (v[i] == threshold) { // skip values precisely equal to threshold (following R's randtests package) continue; } bool above = (v[i] > threshold); if (above) { ++nabove; } else { ++nbelow; } if (run_up == above) { continue; } else { run_up = above; runs++; } } // If you make n an int, the subtraction is gonna be bad in the variance: Real n = nabove + nbelow; Real expected_runs = Real(1) + Real(2*nabove*nbelow)/Real(n); Real variance = 2*nabove*nbelow*(2*nabove*nbelow-n)/Real(n*n*(n-1)); // Bizarre, pathological limits: if (variance == 0) { if (runs == expected_runs) { Real statistic = 0; Real pvalue = 1; return std::make_pair(statistic, pvalue); } else { return std::make_pair(std::numeric_limits::quiet_NaN(), Real(0)); } } Real sd = sqrt(variance); Real statistic = (runs - expected_runs)/sd; auto normal = boost::math::normal_distribution(0,1); Real pvalue = 2*boost::math::cdf(normal, -abs(statistic)); return std::make_pair(statistic, pvalue); } template auto runs_above_and_below_median(RandomAccessContainer const & v) { using Real = typename RandomAccessContainer::value_type; using std::log; using std::sqrt; // We have to memcpy v because the median does a partial sort, // and that would be catastrophic for the runs test. auto w = v; Real median = boost::math::statistics::median(w); return runs_above_and_below_threshold(v, median); } } #endif