bivariate_statistics.hpp 2.8 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798
  1. // (C) Copyright Nick Thompson 2018.
  2. // Use, modification and distribution are subject to the
  3. // Boost Software License, Version 1.0. (See accompanying file
  4. // LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
  5. #ifndef BOOST_MATH_TOOLS_BIVARIATE_STATISTICS_HPP
  6. #define BOOST_MATH_TOOLS_BIVARIATE_STATISTICS_HPP
  7. #include <iterator>
  8. #include <tuple>
  9. #include <boost/assert.hpp>
  10. #include <boost/config/header_deprecated.hpp>
  11. BOOST_HEADER_DEPRECATED("<boost/math/statistics/bivariate_statistics.hpp>");
  12. namespace boost{ namespace math{ namespace tools {
  13. template<class Container>
  14. auto means_and_covariance(Container const & u, Container const & v)
  15. {
  16. using Real = typename Container::value_type;
  17. using std::size;
  18. BOOST_ASSERT_MSG(size(u) == size(v), "The size of each vector must be the same to compute covariance.");
  19. BOOST_ASSERT_MSG(size(u) > 0, "Computing covariance requires at least one sample.");
  20. // See Equation III.9 of "Numerically Stable, Single-Pass, Parallel Statistics Algorithms", Bennet et al.
  21. Real cov = 0;
  22. Real mu_u = u[0];
  23. Real mu_v = v[0];
  24. for(size_t i = 1; i < size(u); ++i)
  25. {
  26. Real u_tmp = (u[i] - mu_u)/(i+1);
  27. Real v_tmp = v[i] - mu_v;
  28. cov += i*u_tmp*v_tmp;
  29. mu_u = mu_u + u_tmp;
  30. mu_v = mu_v + v_tmp/(i+1);
  31. }
  32. return std::make_tuple(mu_u, mu_v, cov/size(u));
  33. }
  34. template<class Container>
  35. auto covariance(Container const & u, Container const & v)
  36. {
  37. auto [mu_u, mu_v, cov] = boost::math::tools::means_and_covariance(u, v);
  38. return cov;
  39. }
  40. template<class Container>
  41. auto correlation_coefficient(Container const & u, Container const & v)
  42. {
  43. using Real = typename Container::value_type;
  44. using std::size;
  45. BOOST_ASSERT_MSG(size(u) == size(v), "The size of each vector must be the same to compute covariance.");
  46. BOOST_ASSERT_MSG(size(u) > 0, "Computing covariance requires at least two samples.");
  47. Real cov = 0;
  48. Real mu_u = u[0];
  49. Real mu_v = v[0];
  50. Real Qu = 0;
  51. Real Qv = 0;
  52. for(size_t i = 1; i < size(u); ++i)
  53. {
  54. Real u_tmp = u[i] - mu_u;
  55. Real v_tmp = v[i] - mu_v;
  56. Qu = Qu + (i*u_tmp*u_tmp)/(i+1);
  57. Qv = Qv + (i*v_tmp*v_tmp)/(i+1);
  58. cov += i*u_tmp*v_tmp/(i+1);
  59. mu_u = mu_u + u_tmp/(i+1);
  60. mu_v = mu_v + v_tmp/(i+1);
  61. }
  62. // If both datasets are constant, then they are perfectly correlated.
  63. if (Qu == 0 && Qv == 0)
  64. {
  65. return Real(1);
  66. }
  67. // If one dataset is constant and the other isn't, then they have no correlation:
  68. if (Qu == 0 || Qv == 0)
  69. {
  70. return Real(0);
  71. }
  72. // Make sure rho in [-1, 1], even in the presence of numerical noise.
  73. Real rho = cov/sqrt(Qu*Qv);
  74. if (rho > 1) {
  75. rho = 1;
  76. }
  77. if (rho < -1) {
  78. rho = -1;
  79. }
  80. return rho;
  81. }
  82. }}}
  83. #endif