summaryrefslogtreecommitdiff
path: root/boost/math/tools/univariate_statistics.hpp
diff options
context:
space:
mode:
Diffstat (limited to 'boost/math/tools/univariate_statistics.hpp')
-rw-r--r--boost/math/tools/univariate_statistics.hpp393
1 files changed, 393 insertions, 0 deletions
diff --git a/boost/math/tools/univariate_statistics.hpp b/boost/math/tools/univariate_statistics.hpp
new file mode 100644
index 0000000000..226fdf46d2
--- /dev/null
+++ b/boost/math/tools/univariate_statistics.hpp
@@ -0,0 +1,393 @@
+// (C) Copyright Nick Thompson 2018.
+// Use, modification and distribution are subject to the
+// Boost Software License, Version 1.0. (See accompanying file
+// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#ifndef BOOST_MATH_TOOLS_UNIVARIATE_STATISTICS_HPP
+#define BOOST_MATH_TOOLS_UNIVARIATE_STATISTICS_HPP
+
+#include <algorithm>
+#include <iterator>
+#include <boost/type_traits/is_complex.hpp>
+#include <boost/assert.hpp>
+#include <boost/multiprecision/detail/number_base.hpp>
+
+
+namespace boost::math::tools {
+
+template<class ForwardIterator>
+auto mean(ForwardIterator first, ForwardIterator last)
+{
+ using Real = typename std::iterator_traits<ForwardIterator>::value_type;
+ BOOST_ASSERT_MSG(first != last, "At least one sample is required to compute the mean.");
+ if constexpr (std::is_integral<Real>::value)
+ {
+ double mu = 0;
+ double i = 1;
+ for(auto it = first; it != last; ++it) {
+ mu = mu + (*it - mu)/i;
+ i += 1;
+ }
+ return mu;
+ }
+ else
+ {
+ Real mu = 0;
+ Real i = 1;
+ for(auto it = first; it != last; ++it) {
+ mu = mu + (*it - mu)/i;
+ i += 1;
+ }
+ return mu;
+ }
+}
+
+template<class Container>
+inline auto mean(Container const & v)
+{
+ return mean(v.cbegin(), v.cend());
+}
+
+template<class ForwardIterator>
+auto variance(ForwardIterator first, ForwardIterator last)
+{
+ using Real = typename std::iterator_traits<ForwardIterator>::value_type;
+ BOOST_ASSERT_MSG(first != last, "At least one sample is required to compute mean and variance.");
+ // Higham, Accuracy and Stability, equation 1.6a and 1.6b:
+ if constexpr (std::is_integral<Real>::value)
+ {
+ double M = *first;
+ double Q = 0;
+ double k = 2;
+ for (auto it = std::next(first); it != last; ++it)
+ {
+ double tmp = *it - M;
+ Q = Q + ((k-1)*tmp*tmp)/k;
+ M = M + tmp/k;
+ k += 1;
+ }
+ return Q/(k-1);
+ }
+ else
+ {
+ Real M = *first;
+ Real Q = 0;
+ Real k = 2;
+ for (auto it = std::next(first); it != last; ++it)
+ {
+ Real tmp = (*it - M)/k;
+ Q += k*(k-1)*tmp*tmp;
+ M += tmp;
+ k += 1;
+ }
+ return Q/(k-1);
+ }
+}
+
+template<class Container>
+inline auto variance(Container const & v)
+{
+ return variance(v.cbegin(), v.cend());
+}
+
+template<class ForwardIterator>
+auto sample_variance(ForwardIterator first, ForwardIterator last)
+{
+ size_t n = std::distance(first, last);
+ BOOST_ASSERT_MSG(n > 1, "At least two samples are required to compute the sample variance.");
+ return n*variance(first, last)/(n-1);
+}
+
+template<class Container>
+inline auto sample_variance(Container const & v)
+{
+ return sample_variance(v.cbegin(), v.cend());
+}
+
+
+// Follows equation 1.5 of:
+// https://prod.sandia.gov/techlib-noauth/access-control.cgi/2008/086212.pdf
+template<class ForwardIterator>
+auto skewness(ForwardIterator first, ForwardIterator last)
+{
+ using Real = typename std::iterator_traits<ForwardIterator>::value_type;
+ BOOST_ASSERT_MSG(first != last, "At least one sample is required to compute skewness.");
+ if constexpr (std::is_integral<Real>::value)
+ {
+ double M1 = *first;
+ double M2 = 0;
+ double M3 = 0;
+ double n = 2;
+ for (auto it = std::next(first); it != last; ++it)
+ {
+ double delta21 = *it - M1;
+ double tmp = delta21/n;
+ M3 = M3 + tmp*((n-1)*(n-2)*delta21*tmp - 3*M2);
+ M2 = M2 + tmp*(n-1)*delta21;
+ M1 = M1 + tmp;
+ n += 1;
+ }
+
+ double var = M2/(n-1);
+ if (var == 0)
+ {
+ // The limit is technically undefined, but the interpretation here is clear:
+ // A constant dataset has no skewness.
+ return double(0);
+ }
+ double skew = M3/(M2*sqrt(var));
+ return skew;
+ }
+ else
+ {
+ Real M1 = *first;
+ Real M2 = 0;
+ Real M3 = 0;
+ Real n = 2;
+ for (auto it = std::next(first); it != last; ++it)
+ {
+ Real delta21 = *it - M1;
+ Real tmp = delta21/n;
+ M3 += tmp*((n-1)*(n-2)*delta21*tmp - 3*M2);
+ M2 += tmp*(n-1)*delta21;
+ M1 += tmp;
+ n += 1;
+ }
+
+ Real var = M2/(n-1);
+ if (var == 0)
+ {
+ // The limit is technically undefined, but the interpretation here is clear:
+ // A constant dataset has no skewness.
+ return Real(0);
+ }
+ Real skew = M3/(M2*sqrt(var));
+ return skew;
+ }
+}
+
+template<class Container>
+inline auto skewness(Container const & v)
+{
+ return skewness(v.cbegin(), v.cend());
+}
+
+// Follows equation 1.5/1.6 of:
+// https://prod.sandia.gov/techlib-noauth/access-control.cgi/2008/086212.pdf
+template<class ForwardIterator>
+auto first_four_moments(ForwardIterator first, ForwardIterator last)
+{
+ using Real = typename std::iterator_traits<ForwardIterator>::value_type;
+ BOOST_ASSERT_MSG(first != last, "At least one sample is required to compute the first four moments.");
+ if constexpr (std::is_integral<Real>::value)
+ {
+ double M1 = *first;
+ double M2 = 0;
+ double M3 = 0;
+ double M4 = 0;
+ double n = 2;
+ for (auto it = std::next(first); it != last; ++it)
+ {
+ double delta21 = *it - M1;
+ double tmp = delta21/n;
+ M4 = M4 + tmp*(tmp*tmp*delta21*((n-1)*(n*n-3*n+3)) + 6*tmp*M2 - 4*M3);
+ M3 = M3 + tmp*((n-1)*(n-2)*delta21*tmp - 3*M2);
+ M2 = M2 + tmp*(n-1)*delta21;
+ M1 = M1 + tmp;
+ n += 1;
+ }
+
+ return std::make_tuple(M1, M2/(n-1), M3/(n-1), M4/(n-1));
+ }
+ else
+ {
+ Real M1 = *first;
+ Real M2 = 0;
+ Real M3 = 0;
+ Real M4 = 0;
+ Real n = 2;
+ for (auto it = std::next(first); it != last; ++it)
+ {
+ Real delta21 = *it - M1;
+ Real tmp = delta21/n;
+ M4 = M4 + tmp*(tmp*tmp*delta21*((n-1)*(n*n-3*n+3)) + 6*tmp*M2 - 4*M3);
+ M3 = M3 + tmp*((n-1)*(n-2)*delta21*tmp - 3*M2);
+ M2 = M2 + tmp*(n-1)*delta21;
+ M1 = M1 + tmp;
+ n += 1;
+ }
+
+ return std::make_tuple(M1, M2/(n-1), M3/(n-1), M4/(n-1));
+ }
+}
+
+template<class Container>
+inline auto first_four_moments(Container const & v)
+{
+ return first_four_moments(v.cbegin(), v.cend());
+}
+
+
+// Follows equation 1.6 of:
+// https://prod.sandia.gov/techlib-noauth/access-control.cgi/2008/086212.pdf
+template<class ForwardIterator>
+auto kurtosis(ForwardIterator first, ForwardIterator last)
+{
+ auto [M1, M2, M3, M4] = first_four_moments(first, last);
+ if (M2 == 0)
+ {
+ return M2;
+ }
+ return M4/(M2*M2);
+}
+
+template<class Container>
+inline auto kurtosis(Container const & v)
+{
+ return kurtosis(v.cbegin(), v.cend());
+}
+
+template<class ForwardIterator>
+auto excess_kurtosis(ForwardIterator first, ForwardIterator last)
+{
+ return kurtosis(first, last) - 3;
+}
+
+template<class Container>
+inline auto excess_kurtosis(Container const & v)
+{
+ return excess_kurtosis(v.cbegin(), v.cend());
+}
+
+
+template<class RandomAccessIterator>
+auto median(RandomAccessIterator first, RandomAccessIterator last)
+{
+ size_t num_elems = std::distance(first, last);
+ BOOST_ASSERT_MSG(num_elems > 0, "The median of a zero length vector is undefined.");
+ if (num_elems & 1)
+ {
+ auto middle = first + (num_elems - 1)/2;
+ std::nth_element(first, middle, last);
+ return *middle;
+ }
+ else
+ {
+ auto middle = first + num_elems/2 - 1;
+ std::nth_element(first, middle, last);
+ std::nth_element(middle, middle+1, last);
+ return (*middle + *(middle+1))/2;
+ }
+}
+
+
+template<class RandomAccessContainer>
+inline auto median(RandomAccessContainer & v)
+{
+ return median(v.begin(), v.end());
+}
+
+template<class RandomAccessIterator>
+auto gini_coefficient(RandomAccessIterator first, RandomAccessIterator last)
+{
+ using Real = typename std::iterator_traits<RandomAccessIterator>::value_type;
+ BOOST_ASSERT_MSG(first != last && std::next(first) != last, "Computation of the Gini coefficient requires at least two samples.");
+
+ std::sort(first, last);
+ if constexpr (std::is_integral<Real>::value)
+ {
+ double i = 1;
+ double num = 0;
+ double denom = 0;
+ for (auto it = first; it != last; ++it)
+ {
+ num += *it*i;
+ denom += *it;
+ ++i;
+ }
+
+ // If the l1 norm is zero, all elements are zero, so every element is the same.
+ if (denom == 0)
+ {
+ return double(0);
+ }
+
+ return ((2*num)/denom - i)/(i-1);
+ }
+ else
+ {
+ Real i = 1;
+ Real num = 0;
+ Real denom = 0;
+ for (auto it = first; it != last; ++it)
+ {
+ num += *it*i;
+ denom += *it;
+ ++i;
+ }
+
+ // If the l1 norm is zero, all elements are zero, so every element is the same.
+ if (denom == 0)
+ {
+ return Real(0);
+ }
+
+ return ((2*num)/denom - i)/(i-1);
+ }
+}
+
+template<class RandomAccessContainer>
+inline auto gini_coefficient(RandomAccessContainer & v)
+{
+ return gini_coefficient(v.begin(), v.end());
+}
+
+template<class RandomAccessIterator>
+inline auto sample_gini_coefficient(RandomAccessIterator first, RandomAccessIterator last)
+{
+ size_t n = std::distance(first, last);
+ return n*gini_coefficient(first, last)/(n-1);
+}
+
+template<class RandomAccessContainer>
+inline auto sample_gini_coefficient(RandomAccessContainer & v)
+{
+ return sample_gini_coefficient(v.begin(), v.end());
+}
+
+template<class RandomAccessIterator>
+auto median_absolute_deviation(RandomAccessIterator first, RandomAccessIterator last, typename std::iterator_traits<RandomAccessIterator>::value_type center=std::numeric_limits<typename std::iterator_traits<RandomAccessIterator>::value_type>::quiet_NaN())
+{
+ using std::abs;
+ using Real = typename std::iterator_traits<RandomAccessIterator>::value_type;
+ using std::isnan;
+ if (isnan(center))
+ {
+ center = boost::math::tools::median(first, last);
+ }
+ size_t num_elems = std::distance(first, last);
+ BOOST_ASSERT_MSG(num_elems > 0, "The median of a zero-length vector is undefined.");
+ auto comparator = [&center](Real a, Real b) { return abs(a-center) < abs(b-center);};
+ if (num_elems & 1)
+ {
+ auto middle = first + (num_elems - 1)/2;
+ std::nth_element(first, middle, last, comparator);
+ return abs(*middle);
+ }
+ else
+ {
+ auto middle = first + num_elems/2 - 1;
+ std::nth_element(first, middle, last, comparator);
+ std::nth_element(middle, middle+1, last, comparator);
+ return (abs(*middle) + abs(*(middle+1)))/abs(static_cast<Real>(2));
+ }
+}
+
+template<class RandomAccessContainer>
+inline auto median_absolute_deviation(RandomAccessContainer & v, typename RandomAccessContainer::value_type center=std::numeric_limits<typename RandomAccessContainer::value_type>::quiet_NaN())
+{
+ return median_absolute_deviation(v.begin(), v.end(), center);
+}
+
+}
+#endif