univariate_statistics.html 43 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354
  1. <html>
  2. <head>
  3. <meta http-equiv="Content-Type" content="text/html; charset=US-ASCII">
  4. <title>Univariate Statistics</title>
  5. <link rel="stylesheet" href="../math.css" type="text/css">
  6. <meta name="generator" content="DocBook XSL Stylesheets V1.79.1">
  7. <link rel="home" href="../index.html" title="Math Toolkit 2.11.0">
  8. <link rel="up" href="../statistics.html" title="Chapter&#160;6.&#160;Statistics">
  9. <link rel="prev" href="../statistics.html" title="Chapter&#160;6.&#160;Statistics">
  10. <link rel="next" href="bivariate_statistics.html" title="Bivariate Statistics">
  11. </head>
  12. <body bgcolor="white" text="black" link="#0000FF" vlink="#840084" alink="#0000FF">
  13. <table cellpadding="2" width="100%"><tr>
  14. <td valign="top"><img alt="Boost C++ Libraries" width="277" height="86" src="../../../../../boost.png"></td>
  15. <td align="center"><a href="../../../../../index.html">Home</a></td>
  16. <td align="center"><a href="../../../../../libs/libraries.htm">Libraries</a></td>
  17. <td align="center"><a href="http://www.boost.org/users/people.html">People</a></td>
  18. <td align="center"><a href="http://www.boost.org/users/faq.html">FAQ</a></td>
  19. <td align="center"><a href="../../../../../more/index.htm">More</a></td>
  20. </tr></table>
  21. <hr>
  22. <div class="spirit-nav">
  23. <a accesskey="p" href="../statistics.html"><img src="../../../../../doc/src/images/prev.png" alt="Prev"></a><a accesskey="u" href="../statistics.html"><img src="../../../../../doc/src/images/up.png" alt="Up"></a><a accesskey="h" href="../index.html"><img src="../../../../../doc/src/images/home.png" alt="Home"></a><a accesskey="n" href="bivariate_statistics.html"><img src="../../../../../doc/src/images/next.png" alt="Next"></a>
  24. </div>
  25. <div class="section">
  26. <div class="titlepage"><div><div><h2 class="title" style="clear: both">
  27. <a name="math_toolkit.univariate_statistics"></a><a class="link" href="univariate_statistics.html" title="Univariate Statistics">Univariate Statistics</a>
  28. </h2></div></div></div>
  29. <h4>
  30. <a name="math_toolkit.univariate_statistics.h0"></a>
  31. <span class="phrase"><a name="math_toolkit.univariate_statistics.synopsis"></a></span><a class="link" href="univariate_statistics.html#math_toolkit.univariate_statistics.synopsis">Synopsis</a>
  32. </h4>
  33. <pre class="programlisting"><span class="preprocessor">#include</span> <span class="special">&lt;</span><span class="identifier">boost</span><span class="special">/</span><span class="identifier">math</span><span class="special">/</span><span class="identifier">statistics</span><span class="special">/</span><span class="identifier">univariate_statistics</span><span class="special">.</span><span class="identifier">hpp</span><span class="special">&gt;</span>
  34. <span class="keyword">namespace</span> <span class="identifier">boost</span><span class="special">{</span> <span class="keyword">namespace</span> <span class="identifier">math</span><span class="special">{</span> <span class="keyword">namespace</span> <span class="identifier">statistics</span> <span class="special">{</span>
  35. <span class="keyword">template</span><span class="special">&lt;</span><span class="keyword">class</span> <span class="identifier">Container</span><span class="special">&gt;</span>
  36. <span class="keyword">auto</span> <span class="identifier">mean</span><span class="special">(</span><span class="identifier">Container</span> <span class="keyword">const</span> <span class="special">&amp;</span> <span class="identifier">c</span><span class="special">);</span>
  37. <span class="keyword">template</span><span class="special">&lt;</span><span class="keyword">class</span> <span class="identifier">ForwardIterator</span><span class="special">&gt;</span>
  38. <span class="keyword">auto</span> <span class="identifier">mean</span><span class="special">(</span><span class="identifier">ForwardIterator</span> <span class="identifier">first</span><span class="special">,</span> <span class="identifier">ForwardIterator</span> <span class="identifier">last</span><span class="special">);</span>
  39. <span class="keyword">template</span><span class="special">&lt;</span><span class="keyword">class</span> <span class="identifier">Container</span><span class="special">&gt;</span>
  40. <span class="keyword">auto</span> <span class="identifier">variance</span><span class="special">(</span><span class="identifier">Container</span> <span class="keyword">const</span> <span class="special">&amp;</span> <span class="identifier">c</span><span class="special">);</span>
  41. <span class="keyword">template</span><span class="special">&lt;</span><span class="keyword">class</span> <span class="identifier">ForwardIterator</span><span class="special">&gt;</span>
  42. <span class="keyword">auto</span> <span class="identifier">variance</span><span class="special">(</span><span class="identifier">ForwardIterator</span> <span class="identifier">first</span><span class="special">,</span> <span class="identifier">ForwardIterator</span> <span class="identifier">last</span><span class="special">);</span>
  43. <span class="keyword">template</span><span class="special">&lt;</span><span class="keyword">class</span> <span class="identifier">Container</span><span class="special">&gt;</span>
  44. <span class="keyword">auto</span> <span class="identifier">sample_variance</span><span class="special">(</span><span class="identifier">Container</span> <span class="keyword">const</span> <span class="special">&amp;</span> <span class="identifier">c</span><span class="special">);</span>
  45. <span class="keyword">template</span><span class="special">&lt;</span><span class="keyword">class</span> <span class="identifier">ForwardIterator</span><span class="special">&gt;</span>
  46. <span class="keyword">auto</span> <span class="identifier">sample_variance</span><span class="special">(</span><span class="identifier">ForwardIterator</span> <span class="identifier">first</span><span class="special">,</span> <span class="identifier">ForwardIterator</span> <span class="identifier">last</span><span class="special">);</span>
  47. <span class="keyword">template</span><span class="special">&lt;</span><span class="keyword">class</span> <span class="identifier">Container</span><span class="special">&gt;</span>
  48. <span class="keyword">auto</span> <span class="identifier">mean_and_sample_variance</span><span class="special">(</span><span class="identifier">Container</span> <span class="keyword">const</span> <span class="special">&amp;</span> <span class="identifier">c</span><span class="special">);</span>
  49. <span class="keyword">template</span><span class="special">&lt;</span><span class="keyword">class</span> <span class="identifier">Container</span><span class="special">&gt;</span>
  50. <span class="keyword">auto</span> <span class="identifier">skewness</span><span class="special">(</span><span class="identifier">Container</span> <span class="keyword">const</span> <span class="special">&amp;</span> <span class="identifier">c</span><span class="special">);</span>
  51. <span class="keyword">template</span><span class="special">&lt;</span><span class="keyword">class</span> <span class="identifier">ForwardIterator</span><span class="special">&gt;</span>
  52. <span class="keyword">auto</span> <span class="identifier">skewness</span><span class="special">(</span><span class="identifier">ForwardIterator</span> <span class="identifier">first</span><span class="special">,</span> <span class="identifier">ForwardIterator</span> <span class="identifier">last</span><span class="special">);</span>
  53. <span class="keyword">template</span><span class="special">&lt;</span><span class="keyword">class</span> <span class="identifier">Container</span><span class="special">&gt;</span>
  54. <span class="keyword">auto</span> <span class="identifier">kurtosis</span><span class="special">(</span><span class="identifier">Container</span> <span class="keyword">const</span> <span class="special">&amp;</span> <span class="identifier">c</span><span class="special">);</span>
  55. <span class="keyword">template</span><span class="special">&lt;</span><span class="keyword">class</span> <span class="identifier">ForwardIterator</span><span class="special">&gt;</span>
  56. <span class="keyword">auto</span> <span class="identifier">kurtosis</span><span class="special">(</span><span class="identifier">ForwardIterator</span> <span class="identifier">first</span><span class="special">,</span> <span class="identifier">ForwardIterator</span> <span class="identifier">last</span><span class="special">);</span>
  57. <span class="keyword">template</span><span class="special">&lt;</span><span class="keyword">class</span> <span class="identifier">Container</span><span class="special">&gt;</span>
  58. <span class="keyword">auto</span> <span class="identifier">excess_kurtosis</span><span class="special">(</span><span class="identifier">Container</span> <span class="keyword">const</span> <span class="special">&amp;</span> <span class="identifier">c</span><span class="special">);</span>
  59. <span class="keyword">template</span><span class="special">&lt;</span><span class="keyword">class</span> <span class="identifier">ForwardIterator</span><span class="special">&gt;</span>
  60. <span class="keyword">auto</span> <span class="identifier">excess_kurtosis</span><span class="special">(</span><span class="identifier">ForwardIterator</span> <span class="identifier">first</span><span class="special">,</span> <span class="identifier">ForwardIterator</span> <span class="identifier">last</span><span class="special">);</span>
  61. <span class="keyword">template</span><span class="special">&lt;</span><span class="keyword">class</span> <span class="identifier">Container</span><span class="special">&gt;</span>
  62. <span class="keyword">auto</span> <span class="identifier">first_four_moments</span><span class="special">(</span><span class="identifier">Container</span> <span class="keyword">const</span> <span class="special">&amp;</span> <span class="identifier">c</span><span class="special">);</span>
  63. <span class="keyword">template</span><span class="special">&lt;</span><span class="keyword">class</span> <span class="identifier">ForwardIterator</span><span class="special">&gt;</span>
  64. <span class="keyword">auto</span> <span class="identifier">first_four_moments</span><span class="special">(</span><span class="identifier">ForwardIterator</span> <span class="identifier">first</span><span class="special">,</span> <span class="identifier">ForwardIterator</span> <span class="identifier">last</span><span class="special">);</span>
  65. <span class="keyword">template</span><span class="special">&lt;</span><span class="keyword">class</span> <span class="identifier">Container</span><span class="special">&gt;</span>
  66. <span class="keyword">auto</span> <span class="identifier">median</span><span class="special">(</span><span class="identifier">Container</span> <span class="special">&amp;</span> <span class="identifier">c</span><span class="special">);</span>
  67. <span class="keyword">template</span><span class="special">&lt;</span><span class="keyword">class</span> <span class="identifier">ForwardIterator</span><span class="special">&gt;</span>
  68. <span class="keyword">auto</span> <span class="identifier">median</span><span class="special">(</span><span class="identifier">ForwardIterator</span> <span class="identifier">first</span><span class="special">,</span> <span class="identifier">ForwardIterator</span> <span class="identifier">last</span><span class="special">);</span>
  69. <span class="keyword">template</span><span class="special">&lt;</span><span class="keyword">class</span> <span class="identifier">RandomAccessIterator</span><span class="special">&gt;</span>
  70. <span class="keyword">auto</span> <span class="identifier">median_absolute_deviation</span><span class="special">(</span><span class="identifier">ForwardIterator</span> <span class="identifier">first</span><span class="special">,</span> <span class="identifier">ForwardIterator</span> <span class="identifier">last</span><span class="special">,</span> <span class="keyword">typename</span> <span class="identifier">std</span><span class="special">::</span><span class="identifier">iterator_traits</span><span class="special">&lt;</span><span class="identifier">RandomAccessIterator</span><span class="special">&gt;::</span><span class="identifier">value_type</span> <span class="identifier">center</span><span class="special">=</span><span class="identifier">std</span><span class="special">::</span><span class="identifier">numeric_limits</span><span class="special">&lt;</span><span class="identifier">Real</span><span class="special">&gt;::</span><span class="identifier">quiet_NaN</span><span class="special">());</span>
  71. <span class="keyword">template</span><span class="special">&lt;</span><span class="keyword">class</span> <span class="identifier">RandomAccessContainer</span><span class="special">&gt;</span>
  72. <span class="keyword">auto</span> <span class="identifier">median_absolute_deviation</span><span class="special">(</span><span class="identifier">RandomAccessContainer</span> <span class="identifier">v</span><span class="special">,</span> <span class="keyword">typename</span> <span class="identifier">RandomAccessContainer</span><span class="special">::</span><span class="identifier">value_type</span> <span class="identifier">center</span><span class="special">=</span><span class="identifier">std</span><span class="special">::</span><span class="identifier">numeric_limits</span><span class="special">&lt;</span><span class="identifier">Real</span><span class="special">&gt;::</span><span class="identifier">quiet_NaN</span><span class="special">());</span>
  73. <span class="keyword">template</span><span class="special">&lt;</span><span class="keyword">class</span> <span class="identifier">Container</span><span class="special">&gt;</span>
  74. <span class="keyword">auto</span> <span class="identifier">gini_coefficient</span><span class="special">(</span><span class="identifier">Container</span> <span class="special">&amp;</span> <span class="identifier">c</span><span class="special">);</span>
  75. <span class="keyword">template</span><span class="special">&lt;</span><span class="keyword">class</span> <span class="identifier">ForwardIterator</span><span class="special">&gt;</span>
  76. <span class="keyword">auto</span> <span class="identifier">gini_coefficient</span><span class="special">(</span><span class="identifier">ForwardIterator</span> <span class="identifier">first</span><span class="special">,</span> <span class="identifier">ForwardIterator</span> <span class="identifier">last</span><span class="special">);</span>
  77. <span class="keyword">template</span><span class="special">&lt;</span><span class="keyword">class</span> <span class="identifier">Container</span><span class="special">&gt;</span>
  78. <span class="keyword">auto</span> <span class="identifier">sample_gini_coefficient</span><span class="special">(</span><span class="identifier">Container</span> <span class="special">&amp;</span> <span class="identifier">c</span><span class="special">);</span>
  79. <span class="keyword">template</span><span class="special">&lt;</span><span class="keyword">class</span> <span class="identifier">ForwardIterator</span><span class="special">&gt;</span>
  80. <span class="keyword">auto</span> <span class="identifier">sample_gini_coefficient</span><span class="special">(</span><span class="identifier">ForwardIterator</span> <span class="identifier">first</span><span class="special">,</span> <span class="identifier">ForwardIterator</span> <span class="identifier">last</span><span class="special">);</span>
  81. <span class="special">}}}</span>
  82. </pre>
  83. <h4>
  84. <a name="math_toolkit.univariate_statistics.h1"></a>
  85. <span class="phrase"><a name="math_toolkit.univariate_statistics.description"></a></span><a class="link" href="univariate_statistics.html#math_toolkit.univariate_statistics.description">Description</a>
  86. </h4>
  87. <p>
  88. The file <code class="computeroutput"><span class="identifier">boost</span><span class="special">/</span><span class="identifier">math</span><span class="special">/</span><span class="identifier">statistics</span><span class="special">/</span><span class="identifier">univariate_statistics</span><span class="special">.</span><span class="identifier">hpp</span></code> is a
  89. set of facilities for computing scalar values from vectors.
  90. </p>
  91. <p>
  92. Many of these functionals have trivial naive implementations, but experienced
  93. programmers will recognize that even trivial algorithms are easy to screw up,
  94. and that numerical instabilities often lurk in corner cases. We have attempted
  95. to do our "due diligence" to root out these problems-scouring the
  96. literature for numerically stable algorithms for even the simplest of functionals.
  97. </p>
  98. <p>
  99. <span class="emphasis"><em>Nota bene</em></span>: Some similar functionality is provided in
  100. <a href="https://www.boost.org/doc/libs/1_68_0/doc/html/accumulators/user_s_guide.html" target="_top">Boost
  101. Accumulators Framework</a>. These accumulators should be used in real-time
  102. applications; <code class="computeroutput"><span class="identifier">univariate_statistics</span><span class="special">.</span><span class="identifier">hpp</span></code> should
  103. be used when CPU vectorization is needed. As a reminder, remember that to actually
  104. <span class="emphasis"><em>get</em></span> vectorization, compile with <code class="computeroutput"><span class="special">-</span><span class="identifier">march</span><span class="special">=</span><span class="identifier">native</span>
  105. <span class="special">-</span><span class="identifier">O3</span></code>
  106. flags.
  107. </p>
  108. <p>
  109. We now describe each functional in detail. Our examples use <code class="computeroutput"><span class="identifier">std</span><span class="special">::</span><span class="identifier">vector</span><span class="special">&lt;</span><span class="keyword">double</span><span class="special">&gt;</span></code>
  110. to hold the data, but this not required. In general, you can store your data
  111. in an Eigen array, and Armadillo vector, <code class="computeroutput"><span class="identifier">std</span><span class="special">::</span><span class="identifier">array</span></code>,
  112. and for many of the routines, a <code class="computeroutput"><span class="identifier">std</span><span class="special">::</span><span class="identifier">forward_list</span></code>.
  113. These routines are usable in float, double, long double, and Boost.Multiprecision
  114. precision, as well as their complex extensions whenever the computation is
  115. well-defined. For certain operations (total variation, for example) integer
  116. inputs are supported.
  117. </p>
  118. <h4>
  119. <a name="math_toolkit.univariate_statistics.h2"></a>
  120. <span class="phrase"><a name="math_toolkit.univariate_statistics.mean"></a></span><a class="link" href="univariate_statistics.html#math_toolkit.univariate_statistics.mean">Mean</a>
  121. </h4>
  122. <pre class="programlisting"><span class="identifier">std</span><span class="special">::</span><span class="identifier">vector</span><span class="special">&lt;</span><span class="keyword">double</span><span class="special">&gt;</span> <span class="identifier">v</span><span class="special">{</span><span class="number">1</span><span class="special">,</span><span class="number">2</span><span class="special">,</span><span class="number">3</span><span class="special">,</span><span class="number">4</span><span class="special">,</span><span class="number">5</span><span class="special">};</span>
  123. <span class="keyword">double</span> <span class="identifier">mu</span> <span class="special">=</span> <span class="identifier">boost</span><span class="special">::</span><span class="identifier">math</span><span class="special">::</span><span class="identifier">statistics</span><span class="special">::</span><span class="identifier">mean</span><span class="special">(</span><span class="identifier">v</span><span class="special">.</span><span class="identifier">cbegin</span><span class="special">(),</span> <span class="identifier">v</span><span class="special">.</span><span class="identifier">cend</span><span class="special">());</span>
  124. <span class="comment">// Alternative syntax if you want to use entire container:</span>
  125. <span class="identifier">mu</span> <span class="special">=</span> <span class="identifier">boost</span><span class="special">::</span><span class="identifier">math</span><span class="special">::</span><span class="identifier">statistics</span><span class="special">::</span><span class="identifier">mean</span><span class="special">(</span><span class="identifier">v</span><span class="special">);</span>
  126. </pre>
  127. <p>
  128. The implementation follows <a href="https://doi.org/10.1137/1.9780898718027" target="_top">Higham
  129. 1.6a</a>. The data is not modified and must be forward iterable. Works
  130. with real and integer data. If the input is an integer type, the output is
  131. a double precision float.
  132. </p>
  133. <h4>
  134. <a name="math_toolkit.univariate_statistics.h3"></a>
  135. <span class="phrase"><a name="math_toolkit.univariate_statistics.variance"></a></span><a class="link" href="univariate_statistics.html#math_toolkit.univariate_statistics.variance">Variance</a>
  136. </h4>
  137. <pre class="programlisting"><span class="identifier">std</span><span class="special">::</span><span class="identifier">vector</span><span class="special">&lt;</span><span class="keyword">double</span><span class="special">&gt;</span> <span class="identifier">v</span><span class="special">{</span><span class="number">1</span><span class="special">,</span><span class="number">2</span><span class="special">,</span><span class="number">3</span><span class="special">,</span><span class="number">4</span><span class="special">,</span><span class="number">5</span><span class="special">};</span>
  138. <span class="identifier">Real</span> <span class="identifier">sigma_sq</span> <span class="special">=</span> <span class="identifier">boost</span><span class="special">::</span><span class="identifier">math</span><span class="special">::</span><span class="identifier">statistics</span><span class="special">::</span><span class="identifier">variance</span><span class="special">(</span><span class="identifier">v</span><span class="special">.</span><span class="identifier">cbegin</span><span class="special">(),</span> <span class="identifier">v</span><span class="special">.</span><span class="identifier">cend</span><span class="special">());</span>
  139. </pre>
  140. <p>
  141. If you don't need to calculate on a subset of the input, then the range call
  142. is more terse:
  143. </p>
  144. <pre class="programlisting"><span class="identifier">std</span><span class="special">::</span><span class="identifier">vector</span><span class="special">&lt;</span><span class="keyword">double</span><span class="special">&gt;</span> <span class="identifier">v</span><span class="special">{</span><span class="number">1</span><span class="special">,</span><span class="number">2</span><span class="special">,</span><span class="number">3</span><span class="special">,</span><span class="number">4</span><span class="special">,</span><span class="number">5</span><span class="special">};</span>
  145. <span class="identifier">Real</span> <span class="identifier">sigma_sq</span> <span class="special">=</span> <span class="identifier">boost</span><span class="special">::</span><span class="identifier">math</span><span class="special">::</span><span class="identifier">statistics</span><span class="special">::</span><span class="identifier">variance</span><span class="special">(</span><span class="identifier">v</span><span class="special">);</span>
  146. </pre>
  147. <p>
  148. The implementation follows <a href="https://doi.org/10.1137/1.9780898718027" target="_top">Higham
  149. 1.6b</a>. The input data must be forward iterable and the range <code class="computeroutput"><span class="special">[</span><span class="identifier">first</span><span class="special">,</span>
  150. <span class="identifier">last</span><span class="special">)</span></code>
  151. must contain at least two elements. It is <span class="emphasis"><em>not</em></span> in general
  152. sensible to pass complex numbers to this routine. If integers are passed as
  153. input, then the output is a double precision float.
  154. </p>
  155. <p>
  156. <code class="computeroutput"><span class="identifier">boost</span><span class="special">::</span><span class="identifier">math</span><span class="special">::</span><span class="identifier">statistics</span><span class="special">::</span><span class="identifier">variance</span></code>
  157. returns the population variance. If you want a sample variance, use
  158. </p>
  159. <pre class="programlisting"><span class="identifier">std</span><span class="special">::</span><span class="identifier">vector</span><span class="special">&lt;</span><span class="keyword">double</span><span class="special">&gt;</span> <span class="identifier">v</span><span class="special">{</span><span class="number">1</span><span class="special">,</span><span class="number">2</span><span class="special">,</span><span class="number">3</span><span class="special">,</span><span class="number">4</span><span class="special">,</span><span class="number">5</span><span class="special">};</span>
  160. <span class="identifier">Real</span> <span class="identifier">sn_sq</span> <span class="special">=</span> <span class="identifier">boost</span><span class="special">::</span><span class="identifier">math</span><span class="special">::</span><span class="identifier">statistics</span><span class="special">::</span><span class="identifier">sample_variance</span><span class="special">(</span><span class="identifier">v</span><span class="special">);</span>
  161. </pre>
  162. <h4>
  163. <a name="math_toolkit.univariate_statistics.h4"></a>
  164. <span class="phrase"><a name="math_toolkit.univariate_statistics.skewness"></a></span><a class="link" href="univariate_statistics.html#math_toolkit.univariate_statistics.skewness">Skewness</a>
  165. </h4>
  166. <p>
  167. Computes the skewness of a dataset:
  168. </p>
  169. <pre class="programlisting"><span class="identifier">std</span><span class="special">::</span><span class="identifier">vector</span><span class="special">&lt;</span><span class="keyword">double</span><span class="special">&gt;</span> <span class="identifier">v</span><span class="special">{</span><span class="number">1</span><span class="special">,</span><span class="number">2</span><span class="special">,</span><span class="number">3</span><span class="special">,</span><span class="number">4</span><span class="special">,</span><span class="number">5</span><span class="special">};</span>
  170. <span class="keyword">double</span> <span class="identifier">skewness</span> <span class="special">=</span> <span class="identifier">boost</span><span class="special">::</span><span class="identifier">math</span><span class="special">::</span><span class="identifier">statistics</span><span class="special">::</span><span class="identifier">skewness</span><span class="special">(</span><span class="identifier">v</span><span class="special">);</span>
  171. <span class="comment">// skewness = 0.</span>
  172. </pre>
  173. <p>
  174. The input vector is not modified, works with integral and real data. If the
  175. input data is integral, the output is a double precision float.
  176. </p>
  177. <p>
  178. For a dataset consisting of a single constant value, we take the skewness to
  179. be zero by definition.
  180. </p>
  181. <p>
  182. The implementation follows <a href="https://prod.sandia.gov/techlib-noauth/access-control.cgi/2008/086212.pdf" target="_top">Pebay</a>.
  183. </p>
  184. <h4>
  185. <a name="math_toolkit.univariate_statistics.h5"></a>
  186. <span class="phrase"><a name="math_toolkit.univariate_statistics.kurtosis"></a></span><a class="link" href="univariate_statistics.html#math_toolkit.univariate_statistics.kurtosis">Kurtosis</a>
  187. </h4>
  188. <p>
  189. Computes the kurtosis of a dataset:
  190. </p>
  191. <pre class="programlisting"><span class="identifier">std</span><span class="special">::</span><span class="identifier">vector</span><span class="special">&lt;</span><span class="keyword">double</span><span class="special">&gt;</span> <span class="identifier">v</span><span class="special">{</span><span class="number">1</span><span class="special">,</span><span class="number">2</span><span class="special">,</span><span class="number">3</span><span class="special">,</span><span class="number">4</span><span class="special">,</span><span class="number">5</span><span class="special">};</span>
  192. <span class="keyword">double</span> <span class="identifier">kurtosis</span> <span class="special">=</span> <span class="identifier">boost</span><span class="special">::</span><span class="identifier">math</span><span class="special">::</span><span class="identifier">statistics</span><span class="special">::</span><span class="identifier">kurtosis</span><span class="special">(</span><span class="identifier">v</span><span class="special">);</span>
  193. <span class="comment">// kurtosis = 17/10</span>
  194. </pre>
  195. <p>
  196. The implementation follows <a href="https://prod.sandia.gov/techlib-noauth/access-control.cgi/2008/086212.pdf" target="_top">Pebay</a>.
  197. The input data must be forward iterable and must consist of real or integral
  198. values. If the input data is integral, the output is a double precision float.
  199. Note that this is <span class="emphasis"><em>not</em></span> the excess kurtosis. If you require
  200. the excess kurtosis, use <code class="computeroutput"><span class="identifier">boost</span><span class="special">::</span><span class="identifier">math</span><span class="special">::</span><span class="identifier">statistics</span><span class="special">::</span><span class="identifier">excess_kurtosis</span></code>. This function simply subtracts
  201. 3 from the kurtosis, but it makes eminently clear our definition of kurtosis.
  202. </p>
  203. <h4>
  204. <a name="math_toolkit.univariate_statistics.h6"></a>
  205. <span class="phrase"><a name="math_toolkit.univariate_statistics.first_four_moments"></a></span><a class="link" href="univariate_statistics.html#math_toolkit.univariate_statistics.first_four_moments">First
  206. four moments</a>
  207. </h4>
  208. <p>
  209. Simultaneously computes the first four <a href="https://en.wikipedia.org/wiki/Central_moment" target="_top">central
  210. moments</a> in a single pass through the data:
  211. </p>
  212. <pre class="programlisting"><span class="identifier">std</span><span class="special">::</span><span class="identifier">vector</span><span class="special">&lt;</span><span class="keyword">double</span><span class="special">&gt;</span> <span class="identifier">v</span><span class="special">{</span><span class="number">1</span><span class="special">,</span><span class="number">2</span><span class="special">,</span><span class="number">3</span><span class="special">,</span><span class="number">4</span><span class="special">,</span><span class="number">5</span><span class="special">};</span>
  213. <span class="keyword">auto</span> <span class="special">[</span><span class="identifier">M1</span><span class="special">,</span> <span class="identifier">M2</span><span class="special">,</span> <span class="identifier">M3</span><span class="special">,</span> <span class="identifier">M4</span><span class="special">]</span> <span class="special">=</span> <span class="identifier">boost</span><span class="special">::</span><span class="identifier">math</span><span class="special">::</span><span class="identifier">statistics</span><span class="special">::</span><span class="identifier">first_four_moments</span><span class="special">(</span><span class="identifier">v</span><span class="special">);</span>
  214. </pre>
  215. <h4>
  216. <a name="math_toolkit.univariate_statistics.h7"></a>
  217. <span class="phrase"><a name="math_toolkit.univariate_statistics.median"></a></span><a class="link" href="univariate_statistics.html#math_toolkit.univariate_statistics.median">Median</a>
  218. </h4>
  219. <p>
  220. Computes the median of a dataset:
  221. </p>
  222. <pre class="programlisting"><span class="identifier">std</span><span class="special">::</span><span class="identifier">vector</span><span class="special">&lt;</span><span class="keyword">double</span><span class="special">&gt;</span> <span class="identifier">v</span><span class="special">{</span><span class="number">1</span><span class="special">,</span><span class="number">2</span><span class="special">,</span><span class="number">3</span><span class="special">,</span><span class="number">4</span><span class="special">,</span><span class="number">5</span><span class="special">};</span>
  223. <span class="keyword">double</span> <span class="identifier">m</span> <span class="special">=</span> <span class="identifier">boost</span><span class="special">::</span><span class="identifier">math</span><span class="special">::</span><span class="identifier">statistics</span><span class="special">::</span><span class="identifier">median</span><span class="special">(</span><span class="identifier">v</span><span class="special">.</span><span class="identifier">begin</span><span class="special">(),</span> <span class="identifier">v</span><span class="special">.</span><span class="identifier">end</span><span class="special">());</span>
  224. </pre>
  225. <p>
  226. <span class="emphasis"><em>Nota bene: The input vector is modified.</em></span> The calculation
  227. of the median is a thin wrapper around the C++11 <a href="https://en.cppreference.com/w/cpp/algorithm/nth_element" target="_top"><code class="computeroutput"><span class="identifier">nth_element</span></code></a>. Therefore, all requirements
  228. of <code class="computeroutput"><span class="identifier">std</span><span class="special">::</span><span class="identifier">nth_element</span></code> are inherited by the median calculation.
  229. In particular, the container must allow random access.
  230. </p>
  231. <h4>
  232. <a name="math_toolkit.univariate_statistics.h8"></a>
  233. <span class="phrase"><a name="math_toolkit.univariate_statistics.median_absolute_deviation"></a></span><a class="link" href="univariate_statistics.html#math_toolkit.univariate_statistics.median_absolute_deviation">Median
  234. Absolute Deviation</a>
  235. </h4>
  236. <p>
  237. Computes the <a href="https://en.wikipedia.org/wiki/Median_absolute_deviation" target="_top">median
  238. absolute deviation</a> of a dataset:
  239. </p>
  240. <pre class="programlisting"><span class="identifier">std</span><span class="special">::</span><span class="identifier">vector</span><span class="special">&lt;</span><span class="keyword">double</span><span class="special">&gt;</span> <span class="identifier">v</span><span class="special">{</span><span class="number">1</span><span class="special">,</span><span class="number">2</span><span class="special">,</span><span class="number">3</span><span class="special">,</span><span class="number">4</span><span class="special">,</span><span class="number">5</span><span class="special">};</span>
  241. <span class="keyword">double</span> <span class="identifier">mad</span> <span class="special">=</span> <span class="identifier">boost</span><span class="special">::</span><span class="identifier">math</span><span class="special">::</span><span class="identifier">statistics</span><span class="special">::</span><span class="identifier">median_absolute_deviation</span><span class="special">(</span><span class="identifier">v</span><span class="special">);</span>
  242. </pre>
  243. <p>
  244. By default, the deviation from the median is used. If you have some prior that
  245. the median is zero, or wish to compute the median absolute deviation from the
  246. mean, use the following:
  247. </p>
  248. <pre class="programlisting"><span class="comment">// prior is that center is zero:</span>
  249. <span class="keyword">double</span> <span class="identifier">center</span> <span class="special">=</span> <span class="number">0</span><span class="special">;</span>
  250. <span class="keyword">double</span> <span class="identifier">mad</span> <span class="special">=</span> <span class="identifier">boost</span><span class="special">::</span><span class="identifier">math</span><span class="special">::</span><span class="identifier">statistics</span><span class="special">::</span><span class="identifier">median_absolute_deviation</span><span class="special">(</span><span class="identifier">v</span><span class="special">,</span> <span class="identifier">center</span><span class="special">);</span>
  251. <span class="comment">// compute median absolute deviation from the mean:</span>
  252. <span class="keyword">double</span> <span class="identifier">mu</span> <span class="special">=</span> <span class="identifier">boost</span><span class="special">::</span><span class="identifier">math</span><span class="special">::</span><span class="identifier">statistics</span><span class="special">::</span><span class="identifier">mean</span><span class="special">(</span><span class="identifier">v</span><span class="special">);</span>
  253. <span class="keyword">double</span> <span class="identifier">mad</span> <span class="special">=</span> <span class="identifier">boost</span><span class="special">::</span><span class="identifier">math</span><span class="special">::</span><span class="identifier">statistics</span><span class="special">::</span><span class="identifier">median_absolute_deviation</span><span class="special">(</span><span class="identifier">v</span><span class="special">,</span> <span class="identifier">mu</span><span class="special">);</span>
  254. </pre>
  255. <p>
  256. <span class="emphasis"><em>Nota bene:</em></span> The input vector is modified. Again the vector
  257. is passed into a call to <a href="https://en.cppreference.com/w/cpp/algorithm/nth_element" target="_top"><code class="computeroutput"><span class="identifier">nth_element</span></code></a>.
  258. </p>
  259. <h4>
  260. <a name="math_toolkit.univariate_statistics.h9"></a>
  261. <span class="phrase"><a name="math_toolkit.univariate_statistics.gini_coefficient"></a></span><a class="link" href="univariate_statistics.html#math_toolkit.univariate_statistics.gini_coefficient">Gini
  262. Coefficient</a>
  263. </h4>
  264. <p>
  265. Compute the Gini coefficient of a dataset:
  266. </p>
  267. <pre class="programlisting"><span class="identifier">std</span><span class="special">::</span><span class="identifier">vector</span><span class="special">&lt;</span><span class="keyword">double</span><span class="special">&gt;</span> <span class="identifier">v</span><span class="special">{</span><span class="number">1</span><span class="special">,</span><span class="number">0</span><span class="special">,</span><span class="number">0</span><span class="special">,</span><span class="number">0</span><span class="special">};</span>
  268. <span class="keyword">double</span> <span class="identifier">gini</span> <span class="special">=</span> <span class="identifier">boost</span><span class="special">::</span><span class="identifier">math</span><span class="special">::</span><span class="identifier">statistics</span><span class="special">::</span><span class="identifier">gini_coefficient</span><span class="special">(</span><span class="identifier">v</span><span class="special">);</span>
  269. <span class="comment">// gini = 3/4</span>
  270. <span class="keyword">double</span> <span class="identifier">s_gini</span> <span class="special">=</span> <span class="identifier">boost</span><span class="special">::</span><span class="identifier">math</span><span class="special">::</span><span class="identifier">statistics</span><span class="special">::</span><span class="identifier">sample_gini_coefficient</span><span class="special">(</span><span class="identifier">v</span><span class="special">);</span>
  271. <span class="comment">// s_gini = 1.</span>
  272. <span class="identifier">std</span><span class="special">::</span><span class="identifier">vector</span><span class="special">&lt;</span><span class="keyword">double</span><span class="special">&gt;</span> <span class="identifier">w</span><span class="special">{</span><span class="number">1</span><span class="special">,</span><span class="number">1</span><span class="special">,</span><span class="number">1</span><span class="special">,</span><span class="number">1</span><span class="special">};</span>
  273. <span class="identifier">gini</span> <span class="special">=</span> <span class="identifier">boost</span><span class="special">::</span><span class="identifier">math</span><span class="special">::</span><span class="identifier">statistics</span><span class="special">::</span><span class="identifier">gini_coefficient</span><span class="special">(</span><span class="identifier">w</span><span class="special">.</span><span class="identifier">begin</span><span class="special">(),</span> <span class="identifier">w</span><span class="special">.</span><span class="identifier">end</span><span class="special">());</span>
  274. <span class="comment">// gini = 0, as all elements are now equal.</span>
  275. </pre>
  276. <p>
  277. <span class="emphasis"><em>Nota bene</em></span>: The input data is altered: in particular, it
  278. is sorted. Makes a call to <code class="computeroutput"><span class="identifier">std</span><span class="special">::</span><span class="identifier">sort</span></code>, and
  279. as such requires random access iterators.
  280. </p>
  281. <p>
  282. The sample Gini coefficient lies in the range [0,1], whereas the population
  283. Gini coefficient is in the range [0, 1 - 1/ <span class="emphasis"><em>n</em></span>].
  284. </p>
  285. <p>
  286. <span class="emphasis"><em>Nota bene:</em></span> There is essentially no reason to pass negative
  287. values to the Gini coefficient function. However, a use case (measuring wealth
  288. inequality when some people have negative wealth) exists, so we do not throw
  289. an exception when negative values are encountered. You should have <span class="emphasis"><em>very</em></span>
  290. good cause to pass negative values to the Gini coefficient calculator. Another
  291. use case is found in signal processing, but the sorting is by magnitude and
  292. hence has a different implementation. See <code class="computeroutput"><span class="identifier">absolute_gini_coefficient</span></code>
  293. for details.
  294. </p>
  295. <h4>
  296. <a name="math_toolkit.univariate_statistics.h10"></a>
  297. <span class="phrase"><a name="math_toolkit.univariate_statistics.references"></a></span><a class="link" href="univariate_statistics.html#math_toolkit.univariate_statistics.references">References</a>
  298. </h4>
  299. <div class="itemizedlist"><ul class="itemizedlist" style="list-style-type: disc; ">
  300. <li class="listitem">
  301. Higham, Nicholas J. <span class="emphasis"><em>Accuracy and stability of numerical algorithms.</em></span>
  302. Vol. 80. Siam, 2002.
  303. </li>
  304. <li class="listitem">
  305. Philippe P. P&#233;bay: <span class="quote">&#8220;<span class="quote">Formulas for Robust, One-Pass Parallel Computation
  306. of Covariances and Arbitrary-Order Statistical Moments.</span>&#8221;</span> Technical
  307. Report SAND2008-6212, Sandia National Laboratories, September 2008.
  308. </li>
  309. </ul></div>
  310. </div>
  311. <table xmlns:rev="http://www.cs.rpi.edu/~gregod/boost/tools/doc/revision" width="100%"><tr>
  312. <td align="left"></td>
  313. <td align="right"><div class="copyright-footer">Copyright &#169; 2006-2019 Nikhar
  314. Agrawal, Anton Bikineev, Paul A. Bristow, Marco Guazzone, Christopher Kormanyos,
  315. Hubert Holin, Bruno Lalande, John Maddock, Jeremy Murphy, Matthew Pulver, Johan
  316. R&#229;de, Gautam Sewani, Benjamin Sobotta, Nicholas Thompson, Thijs van den Berg,
  317. Daryle Walker and Xiaogang Zhang<p>
  318. Distributed under the Boost Software License, Version 1.0. (See accompanying
  319. file LICENSE_1_0.txt or copy at <a href="http://www.boost.org/LICENSE_1_0.txt" target="_top">http://www.boost.org/LICENSE_1_0.txt</a>)
  320. </p>
  321. </div></td>
  322. </tr></table>
  323. <hr>
  324. <div class="spirit-nav">
  325. <a accesskey="p" href="../statistics.html"><img src="../../../../../doc/src/images/prev.png" alt="Prev"></a><a accesskey="u" href="../statistics.html"><img src="../../../../../doc/src/images/up.png" alt="Up"></a><a accesskey="h" href="../index.html"><img src="../../../../../doc/src/images/home.png" alt="Home"></a><a accesskey="n" href="bivariate_statistics.html"><img src="../../../../../doc/src/images/next.png" alt="Next"></a>
  326. </div>
  327. </body>
  328. </html>