123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354 |
- <html>
- <head>
- <meta http-equiv="Content-Type" content="text/html; charset=US-ASCII">
- <title>Univariate Statistics</title>
- <link rel="stylesheet" href="../math.css" type="text/css">
- <meta name="generator" content="DocBook XSL Stylesheets V1.79.1">
- <link rel="home" href="../index.html" title="Math Toolkit 2.11.0">
- <link rel="up" href="../statistics.html" title="Chapter 6. Statistics">
- <link rel="prev" href="../statistics.html" title="Chapter 6. Statistics">
- <link rel="next" href="bivariate_statistics.html" title="Bivariate Statistics">
- </head>
- <body bgcolor="white" text="black" link="#0000FF" vlink="#840084" alink="#0000FF">
- <table cellpadding="2" width="100%"><tr>
- <td valign="top"><img alt="Boost C++ Libraries" width="277" height="86" src="../../../../../boost.png"></td>
- <td align="center"><a href="../../../../../index.html">Home</a></td>
- <td align="center"><a href="../../../../../libs/libraries.htm">Libraries</a></td>
- <td align="center"><a href="http://www.boost.org/users/people.html">People</a></td>
- <td align="center"><a href="http://www.boost.org/users/faq.html">FAQ</a></td>
- <td align="center"><a href="../../../../../more/index.htm">More</a></td>
- </tr></table>
- <hr>
- <div class="spirit-nav">
- <a accesskey="p" href="../statistics.html"><img src="../../../../../doc/src/images/prev.png" alt="Prev"></a><a accesskey="u" href="../statistics.html"><img src="../../../../../doc/src/images/up.png" alt="Up"></a><a accesskey="h" href="../index.html"><img src="../../../../../doc/src/images/home.png" alt="Home"></a><a accesskey="n" href="bivariate_statistics.html"><img src="../../../../../doc/src/images/next.png" alt="Next"></a>
- </div>
- <div class="section">
- <div class="titlepage"><div><div><h2 class="title" style="clear: both">
- <a name="math_toolkit.univariate_statistics"></a><a class="link" href="univariate_statistics.html" title="Univariate Statistics">Univariate Statistics</a>
- </h2></div></div></div>
- <h4>
- <a name="math_toolkit.univariate_statistics.h0"></a>
- <span class="phrase"><a name="math_toolkit.univariate_statistics.synopsis"></a></span><a class="link" href="univariate_statistics.html#math_toolkit.univariate_statistics.synopsis">Synopsis</a>
- </h4>
- <pre class="programlisting"><span class="preprocessor">#include</span> <span class="special"><</span><span class="identifier">boost</span><span class="special">/</span><span class="identifier">math</span><span class="special">/</span><span class="identifier">statistics</span><span class="special">/</span><span class="identifier">univariate_statistics</span><span class="special">.</span><span class="identifier">hpp</span><span class="special">></span>
- <span class="keyword">namespace</span> <span class="identifier">boost</span><span class="special">{</span> <span class="keyword">namespace</span> <span class="identifier">math</span><span class="special">{</span> <span class="keyword">namespace</span> <span class="identifier">statistics</span> <span class="special">{</span>
- <span class="keyword">template</span><span class="special"><</span><span class="keyword">class</span> <span class="identifier">Container</span><span class="special">></span>
- <span class="keyword">auto</span> <span class="identifier">mean</span><span class="special">(</span><span class="identifier">Container</span> <span class="keyword">const</span> <span class="special">&</span> <span class="identifier">c</span><span class="special">);</span>
- <span class="keyword">template</span><span class="special"><</span><span class="keyword">class</span> <span class="identifier">ForwardIterator</span><span class="special">></span>
- <span class="keyword">auto</span> <span class="identifier">mean</span><span class="special">(</span><span class="identifier">ForwardIterator</span> <span class="identifier">first</span><span class="special">,</span> <span class="identifier">ForwardIterator</span> <span class="identifier">last</span><span class="special">);</span>
- <span class="keyword">template</span><span class="special"><</span><span class="keyword">class</span> <span class="identifier">Container</span><span class="special">></span>
- <span class="keyword">auto</span> <span class="identifier">variance</span><span class="special">(</span><span class="identifier">Container</span> <span class="keyword">const</span> <span class="special">&</span> <span class="identifier">c</span><span class="special">);</span>
- <span class="keyword">template</span><span class="special"><</span><span class="keyword">class</span> <span class="identifier">ForwardIterator</span><span class="special">></span>
- <span class="keyword">auto</span> <span class="identifier">variance</span><span class="special">(</span><span class="identifier">ForwardIterator</span> <span class="identifier">first</span><span class="special">,</span> <span class="identifier">ForwardIterator</span> <span class="identifier">last</span><span class="special">);</span>
- <span class="keyword">template</span><span class="special"><</span><span class="keyword">class</span> <span class="identifier">Container</span><span class="special">></span>
- <span class="keyword">auto</span> <span class="identifier">sample_variance</span><span class="special">(</span><span class="identifier">Container</span> <span class="keyword">const</span> <span class="special">&</span> <span class="identifier">c</span><span class="special">);</span>
- <span class="keyword">template</span><span class="special"><</span><span class="keyword">class</span> <span class="identifier">ForwardIterator</span><span class="special">></span>
- <span class="keyword">auto</span> <span class="identifier">sample_variance</span><span class="special">(</span><span class="identifier">ForwardIterator</span> <span class="identifier">first</span><span class="special">,</span> <span class="identifier">ForwardIterator</span> <span class="identifier">last</span><span class="special">);</span>
- <span class="keyword">template</span><span class="special"><</span><span class="keyword">class</span> <span class="identifier">Container</span><span class="special">></span>
- <span class="keyword">auto</span> <span class="identifier">mean_and_sample_variance</span><span class="special">(</span><span class="identifier">Container</span> <span class="keyword">const</span> <span class="special">&</span> <span class="identifier">c</span><span class="special">);</span>
- <span class="keyword">template</span><span class="special"><</span><span class="keyword">class</span> <span class="identifier">Container</span><span class="special">></span>
- <span class="keyword">auto</span> <span class="identifier">skewness</span><span class="special">(</span><span class="identifier">Container</span> <span class="keyword">const</span> <span class="special">&</span> <span class="identifier">c</span><span class="special">);</span>
- <span class="keyword">template</span><span class="special"><</span><span class="keyword">class</span> <span class="identifier">ForwardIterator</span><span class="special">></span>
- <span class="keyword">auto</span> <span class="identifier">skewness</span><span class="special">(</span><span class="identifier">ForwardIterator</span> <span class="identifier">first</span><span class="special">,</span> <span class="identifier">ForwardIterator</span> <span class="identifier">last</span><span class="special">);</span>
- <span class="keyword">template</span><span class="special"><</span><span class="keyword">class</span> <span class="identifier">Container</span><span class="special">></span>
- <span class="keyword">auto</span> <span class="identifier">kurtosis</span><span class="special">(</span><span class="identifier">Container</span> <span class="keyword">const</span> <span class="special">&</span> <span class="identifier">c</span><span class="special">);</span>
- <span class="keyword">template</span><span class="special"><</span><span class="keyword">class</span> <span class="identifier">ForwardIterator</span><span class="special">></span>
- <span class="keyword">auto</span> <span class="identifier">kurtosis</span><span class="special">(</span><span class="identifier">ForwardIterator</span> <span class="identifier">first</span><span class="special">,</span> <span class="identifier">ForwardIterator</span> <span class="identifier">last</span><span class="special">);</span>
- <span class="keyword">template</span><span class="special"><</span><span class="keyword">class</span> <span class="identifier">Container</span><span class="special">></span>
- <span class="keyword">auto</span> <span class="identifier">excess_kurtosis</span><span class="special">(</span><span class="identifier">Container</span> <span class="keyword">const</span> <span class="special">&</span> <span class="identifier">c</span><span class="special">);</span>
- <span class="keyword">template</span><span class="special"><</span><span class="keyword">class</span> <span class="identifier">ForwardIterator</span><span class="special">></span>
- <span class="keyword">auto</span> <span class="identifier">excess_kurtosis</span><span class="special">(</span><span class="identifier">ForwardIterator</span> <span class="identifier">first</span><span class="special">,</span> <span class="identifier">ForwardIterator</span> <span class="identifier">last</span><span class="special">);</span>
- <span class="keyword">template</span><span class="special"><</span><span class="keyword">class</span> <span class="identifier">Container</span><span class="special">></span>
- <span class="keyword">auto</span> <span class="identifier">first_four_moments</span><span class="special">(</span><span class="identifier">Container</span> <span class="keyword">const</span> <span class="special">&</span> <span class="identifier">c</span><span class="special">);</span>
- <span class="keyword">template</span><span class="special"><</span><span class="keyword">class</span> <span class="identifier">ForwardIterator</span><span class="special">></span>
- <span class="keyword">auto</span> <span class="identifier">first_four_moments</span><span class="special">(</span><span class="identifier">ForwardIterator</span> <span class="identifier">first</span><span class="special">,</span> <span class="identifier">ForwardIterator</span> <span class="identifier">last</span><span class="special">);</span>
- <span class="keyword">template</span><span class="special"><</span><span class="keyword">class</span> <span class="identifier">Container</span><span class="special">></span>
- <span class="keyword">auto</span> <span class="identifier">median</span><span class="special">(</span><span class="identifier">Container</span> <span class="special">&</span> <span class="identifier">c</span><span class="special">);</span>
- <span class="keyword">template</span><span class="special"><</span><span class="keyword">class</span> <span class="identifier">ForwardIterator</span><span class="special">></span>
- <span class="keyword">auto</span> <span class="identifier">median</span><span class="special">(</span><span class="identifier">ForwardIterator</span> <span class="identifier">first</span><span class="special">,</span> <span class="identifier">ForwardIterator</span> <span class="identifier">last</span><span class="special">);</span>
- <span class="keyword">template</span><span class="special"><</span><span class="keyword">class</span> <span class="identifier">RandomAccessIterator</span><span class="special">></span>
- <span class="keyword">auto</span> <span class="identifier">median_absolute_deviation</span><span class="special">(</span><span class="identifier">ForwardIterator</span> <span class="identifier">first</span><span class="special">,</span> <span class="identifier">ForwardIterator</span> <span class="identifier">last</span><span class="special">,</span> <span class="keyword">typename</span> <span class="identifier">std</span><span class="special">::</span><span class="identifier">iterator_traits</span><span class="special"><</span><span class="identifier">RandomAccessIterator</span><span class="special">>::</span><span class="identifier">value_type</span> <span class="identifier">center</span><span class="special">=</span><span class="identifier">std</span><span class="special">::</span><span class="identifier">numeric_limits</span><span class="special"><</span><span class="identifier">Real</span><span class="special">>::</span><span class="identifier">quiet_NaN</span><span class="special">());</span>
- <span class="keyword">template</span><span class="special"><</span><span class="keyword">class</span> <span class="identifier">RandomAccessContainer</span><span class="special">></span>
- <span class="keyword">auto</span> <span class="identifier">median_absolute_deviation</span><span class="special">(</span><span class="identifier">RandomAccessContainer</span> <span class="identifier">v</span><span class="special">,</span> <span class="keyword">typename</span> <span class="identifier">RandomAccessContainer</span><span class="special">::</span><span class="identifier">value_type</span> <span class="identifier">center</span><span class="special">=</span><span class="identifier">std</span><span class="special">::</span><span class="identifier">numeric_limits</span><span class="special"><</span><span class="identifier">Real</span><span class="special">>::</span><span class="identifier">quiet_NaN</span><span class="special">());</span>
- <span class="keyword">template</span><span class="special"><</span><span class="keyword">class</span> <span class="identifier">Container</span><span class="special">></span>
- <span class="keyword">auto</span> <span class="identifier">gini_coefficient</span><span class="special">(</span><span class="identifier">Container</span> <span class="special">&</span> <span class="identifier">c</span><span class="special">);</span>
- <span class="keyword">template</span><span class="special"><</span><span class="keyword">class</span> <span class="identifier">ForwardIterator</span><span class="special">></span>
- <span class="keyword">auto</span> <span class="identifier">gini_coefficient</span><span class="special">(</span><span class="identifier">ForwardIterator</span> <span class="identifier">first</span><span class="special">,</span> <span class="identifier">ForwardIterator</span> <span class="identifier">last</span><span class="special">);</span>
- <span class="keyword">template</span><span class="special"><</span><span class="keyword">class</span> <span class="identifier">Container</span><span class="special">></span>
- <span class="keyword">auto</span> <span class="identifier">sample_gini_coefficient</span><span class="special">(</span><span class="identifier">Container</span> <span class="special">&</span> <span class="identifier">c</span><span class="special">);</span>
- <span class="keyword">template</span><span class="special"><</span><span class="keyword">class</span> <span class="identifier">ForwardIterator</span><span class="special">></span>
- <span class="keyword">auto</span> <span class="identifier">sample_gini_coefficient</span><span class="special">(</span><span class="identifier">ForwardIterator</span> <span class="identifier">first</span><span class="special">,</span> <span class="identifier">ForwardIterator</span> <span class="identifier">last</span><span class="special">);</span>
- <span class="special">}}}</span>
- </pre>
- <h4>
- <a name="math_toolkit.univariate_statistics.h1"></a>
- <span class="phrase"><a name="math_toolkit.univariate_statistics.description"></a></span><a class="link" href="univariate_statistics.html#math_toolkit.univariate_statistics.description">Description</a>
- </h4>
- <p>
- The file <code class="computeroutput"><span class="identifier">boost</span><span class="special">/</span><span class="identifier">math</span><span class="special">/</span><span class="identifier">statistics</span><span class="special">/</span><span class="identifier">univariate_statistics</span><span class="special">.</span><span class="identifier">hpp</span></code> is a
- set of facilities for computing scalar values from vectors.
- </p>
- <p>
- Many of these functionals have trivial naive implementations, but experienced
- programmers will recognize that even trivial algorithms are easy to screw up,
- and that numerical instabilities often lurk in corner cases. We have attempted
- to do our "due diligence" to root out these problems-scouring the
- literature for numerically stable algorithms for even the simplest of functionals.
- </p>
- <p>
- <span class="emphasis"><em>Nota bene</em></span>: Some similar functionality is provided in
- <a href="https://www.boost.org/doc/libs/1_68_0/doc/html/accumulators/user_s_guide.html" target="_top">Boost
- Accumulators Framework</a>. These accumulators should be used in real-time
- applications; <code class="computeroutput"><span class="identifier">univariate_statistics</span><span class="special">.</span><span class="identifier">hpp</span></code> should
- be used when CPU vectorization is needed. As a reminder, remember that to actually
- <span class="emphasis"><em>get</em></span> vectorization, compile with <code class="computeroutput"><span class="special">-</span><span class="identifier">march</span><span class="special">=</span><span class="identifier">native</span>
- <span class="special">-</span><span class="identifier">O3</span></code>
- flags.
- </p>
- <p>
- We now describe each functional in detail. Our examples use <code class="computeroutput"><span class="identifier">std</span><span class="special">::</span><span class="identifier">vector</span><span class="special"><</span><span class="keyword">double</span><span class="special">></span></code>
- to hold the data, but this not required. In general, you can store your data
- in an Eigen array, and Armadillo vector, <code class="computeroutput"><span class="identifier">std</span><span class="special">::</span><span class="identifier">array</span></code>,
- and for many of the routines, a <code class="computeroutput"><span class="identifier">std</span><span class="special">::</span><span class="identifier">forward_list</span></code>.
- These routines are usable in float, double, long double, and Boost.Multiprecision
- precision, as well as their complex extensions whenever the computation is
- well-defined. For certain operations (total variation, for example) integer
- inputs are supported.
- </p>
- <h4>
- <a name="math_toolkit.univariate_statistics.h2"></a>
- <span class="phrase"><a name="math_toolkit.univariate_statistics.mean"></a></span><a class="link" href="univariate_statistics.html#math_toolkit.univariate_statistics.mean">Mean</a>
- </h4>
- <pre class="programlisting"><span class="identifier">std</span><span class="special">::</span><span class="identifier">vector</span><span class="special"><</span><span class="keyword">double</span><span class="special">></span> <span class="identifier">v</span><span class="special">{</span><span class="number">1</span><span class="special">,</span><span class="number">2</span><span class="special">,</span><span class="number">3</span><span class="special">,</span><span class="number">4</span><span class="special">,</span><span class="number">5</span><span class="special">};</span>
- <span class="keyword">double</span> <span class="identifier">mu</span> <span class="special">=</span> <span class="identifier">boost</span><span class="special">::</span><span class="identifier">math</span><span class="special">::</span><span class="identifier">statistics</span><span class="special">::</span><span class="identifier">mean</span><span class="special">(</span><span class="identifier">v</span><span class="special">.</span><span class="identifier">cbegin</span><span class="special">(),</span> <span class="identifier">v</span><span class="special">.</span><span class="identifier">cend</span><span class="special">());</span>
- <span class="comment">// Alternative syntax if you want to use entire container:</span>
- <span class="identifier">mu</span> <span class="special">=</span> <span class="identifier">boost</span><span class="special">::</span><span class="identifier">math</span><span class="special">::</span><span class="identifier">statistics</span><span class="special">::</span><span class="identifier">mean</span><span class="special">(</span><span class="identifier">v</span><span class="special">);</span>
- </pre>
- <p>
- The implementation follows <a href="https://doi.org/10.1137/1.9780898718027" target="_top">Higham
- 1.6a</a>. The data is not modified and must be forward iterable. Works
- with real and integer data. If the input is an integer type, the output is
- a double precision float.
- </p>
- <h4>
- <a name="math_toolkit.univariate_statistics.h3"></a>
- <span class="phrase"><a name="math_toolkit.univariate_statistics.variance"></a></span><a class="link" href="univariate_statistics.html#math_toolkit.univariate_statistics.variance">Variance</a>
- </h4>
- <pre class="programlisting"><span class="identifier">std</span><span class="special">::</span><span class="identifier">vector</span><span class="special"><</span><span class="keyword">double</span><span class="special">></span> <span class="identifier">v</span><span class="special">{</span><span class="number">1</span><span class="special">,</span><span class="number">2</span><span class="special">,</span><span class="number">3</span><span class="special">,</span><span class="number">4</span><span class="special">,</span><span class="number">5</span><span class="special">};</span>
- <span class="identifier">Real</span> <span class="identifier">sigma_sq</span> <span class="special">=</span> <span class="identifier">boost</span><span class="special">::</span><span class="identifier">math</span><span class="special">::</span><span class="identifier">statistics</span><span class="special">::</span><span class="identifier">variance</span><span class="special">(</span><span class="identifier">v</span><span class="special">.</span><span class="identifier">cbegin</span><span class="special">(),</span> <span class="identifier">v</span><span class="special">.</span><span class="identifier">cend</span><span class="special">());</span>
- </pre>
- <p>
- If you don't need to calculate on a subset of the input, then the range call
- is more terse:
- </p>
- <pre class="programlisting"><span class="identifier">std</span><span class="special">::</span><span class="identifier">vector</span><span class="special"><</span><span class="keyword">double</span><span class="special">></span> <span class="identifier">v</span><span class="special">{</span><span class="number">1</span><span class="special">,</span><span class="number">2</span><span class="special">,</span><span class="number">3</span><span class="special">,</span><span class="number">4</span><span class="special">,</span><span class="number">5</span><span class="special">};</span>
- <span class="identifier">Real</span> <span class="identifier">sigma_sq</span> <span class="special">=</span> <span class="identifier">boost</span><span class="special">::</span><span class="identifier">math</span><span class="special">::</span><span class="identifier">statistics</span><span class="special">::</span><span class="identifier">variance</span><span class="special">(</span><span class="identifier">v</span><span class="special">);</span>
- </pre>
- <p>
- The implementation follows <a href="https://doi.org/10.1137/1.9780898718027" target="_top">Higham
- 1.6b</a>. The input data must be forward iterable and the range <code class="computeroutput"><span class="special">[</span><span class="identifier">first</span><span class="special">,</span>
- <span class="identifier">last</span><span class="special">)</span></code>
- must contain at least two elements. It is <span class="emphasis"><em>not</em></span> in general
- sensible to pass complex numbers to this routine. If integers are passed as
- input, then the output is a double precision float.
- </p>
- <p>
- <code class="computeroutput"><span class="identifier">boost</span><span class="special">::</span><span class="identifier">math</span><span class="special">::</span><span class="identifier">statistics</span><span class="special">::</span><span class="identifier">variance</span></code>
- returns the population variance. If you want a sample variance, use
- </p>
- <pre class="programlisting"><span class="identifier">std</span><span class="special">::</span><span class="identifier">vector</span><span class="special"><</span><span class="keyword">double</span><span class="special">></span> <span class="identifier">v</span><span class="special">{</span><span class="number">1</span><span class="special">,</span><span class="number">2</span><span class="special">,</span><span class="number">3</span><span class="special">,</span><span class="number">4</span><span class="special">,</span><span class="number">5</span><span class="special">};</span>
- <span class="identifier">Real</span> <span class="identifier">sn_sq</span> <span class="special">=</span> <span class="identifier">boost</span><span class="special">::</span><span class="identifier">math</span><span class="special">::</span><span class="identifier">statistics</span><span class="special">::</span><span class="identifier">sample_variance</span><span class="special">(</span><span class="identifier">v</span><span class="special">);</span>
- </pre>
- <h4>
- <a name="math_toolkit.univariate_statistics.h4"></a>
- <span class="phrase"><a name="math_toolkit.univariate_statistics.skewness"></a></span><a class="link" href="univariate_statistics.html#math_toolkit.univariate_statistics.skewness">Skewness</a>
- </h4>
- <p>
- Computes the skewness of a dataset:
- </p>
- <pre class="programlisting"><span class="identifier">std</span><span class="special">::</span><span class="identifier">vector</span><span class="special"><</span><span class="keyword">double</span><span class="special">></span> <span class="identifier">v</span><span class="special">{</span><span class="number">1</span><span class="special">,</span><span class="number">2</span><span class="special">,</span><span class="number">3</span><span class="special">,</span><span class="number">4</span><span class="special">,</span><span class="number">5</span><span class="special">};</span>
- <span class="keyword">double</span> <span class="identifier">skewness</span> <span class="special">=</span> <span class="identifier">boost</span><span class="special">::</span><span class="identifier">math</span><span class="special">::</span><span class="identifier">statistics</span><span class="special">::</span><span class="identifier">skewness</span><span class="special">(</span><span class="identifier">v</span><span class="special">);</span>
- <span class="comment">// skewness = 0.</span>
- </pre>
- <p>
- The input vector is not modified, works with integral and real data. If the
- input data is integral, the output is a double precision float.
- </p>
- <p>
- For a dataset consisting of a single constant value, we take the skewness to
- be zero by definition.
- </p>
- <p>
- The implementation follows <a href="https://prod.sandia.gov/techlib-noauth/access-control.cgi/2008/086212.pdf" target="_top">Pebay</a>.
- </p>
- <h4>
- <a name="math_toolkit.univariate_statistics.h5"></a>
- <span class="phrase"><a name="math_toolkit.univariate_statistics.kurtosis"></a></span><a class="link" href="univariate_statistics.html#math_toolkit.univariate_statistics.kurtosis">Kurtosis</a>
- </h4>
- <p>
- Computes the kurtosis of a dataset:
- </p>
- <pre class="programlisting"><span class="identifier">std</span><span class="special">::</span><span class="identifier">vector</span><span class="special"><</span><span class="keyword">double</span><span class="special">></span> <span class="identifier">v</span><span class="special">{</span><span class="number">1</span><span class="special">,</span><span class="number">2</span><span class="special">,</span><span class="number">3</span><span class="special">,</span><span class="number">4</span><span class="special">,</span><span class="number">5</span><span class="special">};</span>
- <span class="keyword">double</span> <span class="identifier">kurtosis</span> <span class="special">=</span> <span class="identifier">boost</span><span class="special">::</span><span class="identifier">math</span><span class="special">::</span><span class="identifier">statistics</span><span class="special">::</span><span class="identifier">kurtosis</span><span class="special">(</span><span class="identifier">v</span><span class="special">);</span>
- <span class="comment">// kurtosis = 17/10</span>
- </pre>
- <p>
- The implementation follows <a href="https://prod.sandia.gov/techlib-noauth/access-control.cgi/2008/086212.pdf" target="_top">Pebay</a>.
- The input data must be forward iterable and must consist of real or integral
- values. If the input data is integral, the output is a double precision float.
- Note that this is <span class="emphasis"><em>not</em></span> the excess kurtosis. If you require
- the excess kurtosis, use <code class="computeroutput"><span class="identifier">boost</span><span class="special">::</span><span class="identifier">math</span><span class="special">::</span><span class="identifier">statistics</span><span class="special">::</span><span class="identifier">excess_kurtosis</span></code>. This function simply subtracts
- 3 from the kurtosis, but it makes eminently clear our definition of kurtosis.
- </p>
- <h4>
- <a name="math_toolkit.univariate_statistics.h6"></a>
- <span class="phrase"><a name="math_toolkit.univariate_statistics.first_four_moments"></a></span><a class="link" href="univariate_statistics.html#math_toolkit.univariate_statistics.first_four_moments">First
- four moments</a>
- </h4>
- <p>
- Simultaneously computes the first four <a href="https://en.wikipedia.org/wiki/Central_moment" target="_top">central
- moments</a> in a single pass through the data:
- </p>
- <pre class="programlisting"><span class="identifier">std</span><span class="special">::</span><span class="identifier">vector</span><span class="special"><</span><span class="keyword">double</span><span class="special">></span> <span class="identifier">v</span><span class="special">{</span><span class="number">1</span><span class="special">,</span><span class="number">2</span><span class="special">,</span><span class="number">3</span><span class="special">,</span><span class="number">4</span><span class="special">,</span><span class="number">5</span><span class="special">};</span>
- <span class="keyword">auto</span> <span class="special">[</span><span class="identifier">M1</span><span class="special">,</span> <span class="identifier">M2</span><span class="special">,</span> <span class="identifier">M3</span><span class="special">,</span> <span class="identifier">M4</span><span class="special">]</span> <span class="special">=</span> <span class="identifier">boost</span><span class="special">::</span><span class="identifier">math</span><span class="special">::</span><span class="identifier">statistics</span><span class="special">::</span><span class="identifier">first_four_moments</span><span class="special">(</span><span class="identifier">v</span><span class="special">);</span>
- </pre>
- <h4>
- <a name="math_toolkit.univariate_statistics.h7"></a>
- <span class="phrase"><a name="math_toolkit.univariate_statistics.median"></a></span><a class="link" href="univariate_statistics.html#math_toolkit.univariate_statistics.median">Median</a>
- </h4>
- <p>
- Computes the median of a dataset:
- </p>
- <pre class="programlisting"><span class="identifier">std</span><span class="special">::</span><span class="identifier">vector</span><span class="special"><</span><span class="keyword">double</span><span class="special">></span> <span class="identifier">v</span><span class="special">{</span><span class="number">1</span><span class="special">,</span><span class="number">2</span><span class="special">,</span><span class="number">3</span><span class="special">,</span><span class="number">4</span><span class="special">,</span><span class="number">5</span><span class="special">};</span>
- <span class="keyword">double</span> <span class="identifier">m</span> <span class="special">=</span> <span class="identifier">boost</span><span class="special">::</span><span class="identifier">math</span><span class="special">::</span><span class="identifier">statistics</span><span class="special">::</span><span class="identifier">median</span><span class="special">(</span><span class="identifier">v</span><span class="special">.</span><span class="identifier">begin</span><span class="special">(),</span> <span class="identifier">v</span><span class="special">.</span><span class="identifier">end</span><span class="special">());</span>
- </pre>
- <p>
- <span class="emphasis"><em>Nota bene: The input vector is modified.</em></span> The calculation
- of the median is a thin wrapper around the C++11 <a href="https://en.cppreference.com/w/cpp/algorithm/nth_element" target="_top"><code class="computeroutput"><span class="identifier">nth_element</span></code></a>. Therefore, all requirements
- of <code class="computeroutput"><span class="identifier">std</span><span class="special">::</span><span class="identifier">nth_element</span></code> are inherited by the median calculation.
- In particular, the container must allow random access.
- </p>
- <h4>
- <a name="math_toolkit.univariate_statistics.h8"></a>
- <span class="phrase"><a name="math_toolkit.univariate_statistics.median_absolute_deviation"></a></span><a class="link" href="univariate_statistics.html#math_toolkit.univariate_statistics.median_absolute_deviation">Median
- Absolute Deviation</a>
- </h4>
- <p>
- Computes the <a href="https://en.wikipedia.org/wiki/Median_absolute_deviation" target="_top">median
- absolute deviation</a> of a dataset:
- </p>
- <pre class="programlisting"><span class="identifier">std</span><span class="special">::</span><span class="identifier">vector</span><span class="special"><</span><span class="keyword">double</span><span class="special">></span> <span class="identifier">v</span><span class="special">{</span><span class="number">1</span><span class="special">,</span><span class="number">2</span><span class="special">,</span><span class="number">3</span><span class="special">,</span><span class="number">4</span><span class="special">,</span><span class="number">5</span><span class="special">};</span>
- <span class="keyword">double</span> <span class="identifier">mad</span> <span class="special">=</span> <span class="identifier">boost</span><span class="special">::</span><span class="identifier">math</span><span class="special">::</span><span class="identifier">statistics</span><span class="special">::</span><span class="identifier">median_absolute_deviation</span><span class="special">(</span><span class="identifier">v</span><span class="special">);</span>
- </pre>
- <p>
- By default, the deviation from the median is used. If you have some prior that
- the median is zero, or wish to compute the median absolute deviation from the
- mean, use the following:
- </p>
- <pre class="programlisting"><span class="comment">// prior is that center is zero:</span>
- <span class="keyword">double</span> <span class="identifier">center</span> <span class="special">=</span> <span class="number">0</span><span class="special">;</span>
- <span class="keyword">double</span> <span class="identifier">mad</span> <span class="special">=</span> <span class="identifier">boost</span><span class="special">::</span><span class="identifier">math</span><span class="special">::</span><span class="identifier">statistics</span><span class="special">::</span><span class="identifier">median_absolute_deviation</span><span class="special">(</span><span class="identifier">v</span><span class="special">,</span> <span class="identifier">center</span><span class="special">);</span>
- <span class="comment">// compute median absolute deviation from the mean:</span>
- <span class="keyword">double</span> <span class="identifier">mu</span> <span class="special">=</span> <span class="identifier">boost</span><span class="special">::</span><span class="identifier">math</span><span class="special">::</span><span class="identifier">statistics</span><span class="special">::</span><span class="identifier">mean</span><span class="special">(</span><span class="identifier">v</span><span class="special">);</span>
- <span class="keyword">double</span> <span class="identifier">mad</span> <span class="special">=</span> <span class="identifier">boost</span><span class="special">::</span><span class="identifier">math</span><span class="special">::</span><span class="identifier">statistics</span><span class="special">::</span><span class="identifier">median_absolute_deviation</span><span class="special">(</span><span class="identifier">v</span><span class="special">,</span> <span class="identifier">mu</span><span class="special">);</span>
- </pre>
- <p>
- <span class="emphasis"><em>Nota bene:</em></span> The input vector is modified. Again the vector
- is passed into a call to <a href="https://en.cppreference.com/w/cpp/algorithm/nth_element" target="_top"><code class="computeroutput"><span class="identifier">nth_element</span></code></a>.
- </p>
- <h4>
- <a name="math_toolkit.univariate_statistics.h9"></a>
- <span class="phrase"><a name="math_toolkit.univariate_statistics.gini_coefficient"></a></span><a class="link" href="univariate_statistics.html#math_toolkit.univariate_statistics.gini_coefficient">Gini
- Coefficient</a>
- </h4>
- <p>
- Compute the Gini coefficient of a dataset:
- </p>
- <pre class="programlisting"><span class="identifier">std</span><span class="special">::</span><span class="identifier">vector</span><span class="special"><</span><span class="keyword">double</span><span class="special">></span> <span class="identifier">v</span><span class="special">{</span><span class="number">1</span><span class="special">,</span><span class="number">0</span><span class="special">,</span><span class="number">0</span><span class="special">,</span><span class="number">0</span><span class="special">};</span>
- <span class="keyword">double</span> <span class="identifier">gini</span> <span class="special">=</span> <span class="identifier">boost</span><span class="special">::</span><span class="identifier">math</span><span class="special">::</span><span class="identifier">statistics</span><span class="special">::</span><span class="identifier">gini_coefficient</span><span class="special">(</span><span class="identifier">v</span><span class="special">);</span>
- <span class="comment">// gini = 3/4</span>
- <span class="keyword">double</span> <span class="identifier">s_gini</span> <span class="special">=</span> <span class="identifier">boost</span><span class="special">::</span><span class="identifier">math</span><span class="special">::</span><span class="identifier">statistics</span><span class="special">::</span><span class="identifier">sample_gini_coefficient</span><span class="special">(</span><span class="identifier">v</span><span class="special">);</span>
- <span class="comment">// s_gini = 1.</span>
- <span class="identifier">std</span><span class="special">::</span><span class="identifier">vector</span><span class="special"><</span><span class="keyword">double</span><span class="special">></span> <span class="identifier">w</span><span class="special">{</span><span class="number">1</span><span class="special">,</span><span class="number">1</span><span class="special">,</span><span class="number">1</span><span class="special">,</span><span class="number">1</span><span class="special">};</span>
- <span class="identifier">gini</span> <span class="special">=</span> <span class="identifier">boost</span><span class="special">::</span><span class="identifier">math</span><span class="special">::</span><span class="identifier">statistics</span><span class="special">::</span><span class="identifier">gini_coefficient</span><span class="special">(</span><span class="identifier">w</span><span class="special">.</span><span class="identifier">begin</span><span class="special">(),</span> <span class="identifier">w</span><span class="special">.</span><span class="identifier">end</span><span class="special">());</span>
- <span class="comment">// gini = 0, as all elements are now equal.</span>
- </pre>
- <p>
- <span class="emphasis"><em>Nota bene</em></span>: The input data is altered: in particular, it
- is sorted. Makes a call to <code class="computeroutput"><span class="identifier">std</span><span class="special">::</span><span class="identifier">sort</span></code>, and
- as such requires random access iterators.
- </p>
- <p>
- The sample Gini coefficient lies in the range [0,1], whereas the population
- Gini coefficient is in the range [0, 1 - 1/ <span class="emphasis"><em>n</em></span>].
- </p>
- <p>
- <span class="emphasis"><em>Nota bene:</em></span> There is essentially no reason to pass negative
- values to the Gini coefficient function. However, a use case (measuring wealth
- inequality when some people have negative wealth) exists, so we do not throw
- an exception when negative values are encountered. You should have <span class="emphasis"><em>very</em></span>
- good cause to pass negative values to the Gini coefficient calculator. Another
- use case is found in signal processing, but the sorting is by magnitude and
- hence has a different implementation. See <code class="computeroutput"><span class="identifier">absolute_gini_coefficient</span></code>
- for details.
- </p>
- <h4>
- <a name="math_toolkit.univariate_statistics.h10"></a>
- <span class="phrase"><a name="math_toolkit.univariate_statistics.references"></a></span><a class="link" href="univariate_statistics.html#math_toolkit.univariate_statistics.references">References</a>
- </h4>
- <div class="itemizedlist"><ul class="itemizedlist" style="list-style-type: disc; ">
- <li class="listitem">
- Higham, Nicholas J. <span class="emphasis"><em>Accuracy and stability of numerical algorithms.</em></span>
- Vol. 80. Siam, 2002.
- </li>
- <li class="listitem">
- Philippe P. Pébay: <span class="quote">“<span class="quote">Formulas for Robust, One-Pass Parallel Computation
- of Covariances and Arbitrary-Order Statistical Moments.</span>”</span> Technical
- Report SAND2008-6212, Sandia National Laboratories, September 2008.
- </li>
- </ul></div>
- </div>
- <table xmlns:rev="http://www.cs.rpi.edu/~gregod/boost/tools/doc/revision" width="100%"><tr>
- <td align="left"></td>
- <td align="right"><div class="copyright-footer">Copyright © 2006-2019 Nikhar
- Agrawal, Anton Bikineev, Paul A. Bristow, Marco Guazzone, Christopher Kormanyos,
- Hubert Holin, Bruno Lalande, John Maddock, Jeremy Murphy, Matthew Pulver, Johan
- Råde, Gautam Sewani, Benjamin Sobotta, Nicholas Thompson, Thijs van den Berg,
- Daryle Walker and Xiaogang Zhang<p>
- Distributed under the Boost Software License, Version 1.0. (See accompanying
- file LICENSE_1_0.txt or copy at <a href="http://www.boost.org/LICENSE_1_0.txt" target="_top">http://www.boost.org/LICENSE_1_0.txt</a>)
- </p>
- </div></td>
- </tr></table>
- <hr>
- <div class="spirit-nav">
- <a accesskey="p" href="../statistics.html"><img src="../../../../../doc/src/images/prev.png" alt="Prev"></a><a accesskey="u" href="../statistics.html"><img src="../../../../../doc/src/images/up.png" alt="Up"></a><a accesskey="h" href="../index.html"><img src="../../../../../doc/src/images/home.png" alt="Home"></a><a accesskey="n" href="bivariate_statistics.html"><img src="../../../../../doc/src/images/next.png" alt="Next"></a>
- </div>
- </body>
- </html>
|