summaryrefslogtreecommitdiff
path: root/boost/math/tools/bivariate_statistics.hpp
diff options
context:
space:
mode:
Diffstat (limited to 'boost/math/tools/bivariate_statistics.hpp')
-rw-r--r--boost/math/tools/bivariate_statistics.hpp97
1 files changed, 97 insertions, 0 deletions
diff --git a/boost/math/tools/bivariate_statistics.hpp b/boost/math/tools/bivariate_statistics.hpp
new file mode 100644
index 0000000000..20b7500ed3
--- /dev/null
+++ b/boost/math/tools/bivariate_statistics.hpp
@@ -0,0 +1,97 @@
+// (C) Copyright Nick Thompson 2018.
+// Use, modification and distribution are subject to the
+// Boost Software License, Version 1.0. (See accompanying file
+// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#ifndef BOOST_MATH_TOOLS_BIVARIATE_STATISTICS_HPP
+#define BOOST_MATH_TOOLS_BIVARIATE_STATISTICS_HPP
+
+#include <iterator>
+#include <tuple>
+#include <boost/assert.hpp>
+#include <boost/multiprecision/detail/number_base.hpp>
+
+
+namespace boost{ namespace math{ namespace tools {
+
+template<class Container>
+auto means_and_covariance(Container const & u, Container const & v)
+{
+ using Real = typename Container::value_type;
+ using std::size;
+ BOOST_ASSERT_MSG(size(u) == size(v), "The size of each vector must be the same to compute covariance.");
+ BOOST_ASSERT_MSG(size(u) > 0, "Computing covariance requires at least one sample.");
+
+ // See Equation III.9 of "Numerically Stable, Single-Pass, Parallel Statistics Algorithms", Bennet et al.
+ Real cov = 0;
+ Real mu_u = u[0];
+ Real mu_v = v[0];
+
+ for(size_t i = 1; i < size(u); ++i)
+ {
+ Real u_tmp = (u[i] - mu_u)/(i+1);
+ Real v_tmp = v[i] - mu_v;
+ cov += i*u_tmp*v_tmp;
+ mu_u = mu_u + u_tmp;
+ mu_v = mu_v + v_tmp/(i+1);
+ }
+
+ return std::make_tuple(mu_u, mu_v, cov/size(u));
+}
+
+template<class Container>
+auto covariance(Container const & u, Container const & v)
+{
+ auto [mu_u, mu_v, cov] = boost::math::tools::means_and_covariance(u, v);
+ return cov;
+}
+
+template<class Container>
+auto correlation_coefficient(Container const & u, Container const & v)
+{
+ using Real = typename Container::value_type;
+ using std::size;
+ BOOST_ASSERT_MSG(size(u) == size(v), "The size of each vector must be the same to compute covariance.");
+ BOOST_ASSERT_MSG(size(u) > 0, "Computing covariance requires at least two samples.");
+
+ Real cov = 0;
+ Real mu_u = u[0];
+ Real mu_v = v[0];
+ Real Qu = 0;
+ Real Qv = 0;
+
+ for(size_t i = 1; i < size(u); ++i)
+ {
+ Real u_tmp = u[i] - mu_u;
+ Real v_tmp = v[i] - mu_v;
+ Qu = Qu + (i*u_tmp*u_tmp)/(i+1);
+ Qv = Qv + (i*v_tmp*v_tmp)/(i+1);
+ cov += i*u_tmp*v_tmp/(i+1);
+ mu_u = mu_u + u_tmp/(i+1);
+ mu_v = mu_v + v_tmp/(i+1);
+ }
+
+ // If both datasets are constant, then they are perfectly correlated.
+ if (Qu == 0 && Qv == 0)
+ {
+ return Real(1);
+ }
+ // If one dataset is constant and the other isn't, then they have no correlation:
+ if (Qu == 0 || Qv == 0)
+ {
+ return Real(0);
+ }
+
+ // Make sure rho in [-1, 1], even in the presence of numerical noise.
+ Real rho = cov/sqrt(Qu*Qv);
+ if (rho > 1) {
+ rho = 1;
+ }
+ if (rho < -1) {
+ rho = -1;
+ }
+ return rho;
+}
+
+}}}
+#endif