diff options
Diffstat (limited to 'boost/accumulators/statistics')
53 files changed, 9255 insertions, 0 deletions
diff --git a/boost/accumulators/statistics/count.hpp b/boost/accumulators/statistics/count.hpp new file mode 100644 index 0000000000..6d30b41e26 --- /dev/null +++ b/boost/accumulators/statistics/count.hpp @@ -0,0 +1,80 @@ +/////////////////////////////////////////////////////////////////////////////// +// count.hpp +// +// Copyright 2005 Eric Niebler. Distributed under the Boost +// Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#ifndef BOOST_ACCUMULATORS_STATISTICS_COUNT_HPP_EAN_28_10_2005 +#define BOOST_ACCUMULATORS_STATISTICS_COUNT_HPP_EAN_28_10_2005 + +#include <boost/mpl/always.hpp> +#include <boost/accumulators/framework/accumulator_base.hpp> +#include <boost/accumulators/framework/extractor.hpp> +#include <boost/accumulators/framework/depends_on.hpp> +#include <boost/accumulators/statistics_fwd.hpp> + +namespace boost { namespace accumulators +{ + +namespace impl +{ + + /////////////////////////////////////////////////////////////////////////////// + // count_impl + struct count_impl + : accumulator_base + { + // for boost::result_of + typedef std::size_t result_type; + + count_impl(dont_care) + : cnt(0) + { + } + + void operator ()(dont_care) + { + ++this->cnt; + } + + result_type result(dont_care) const + { + return this->cnt; + } + + private: + std::size_t cnt; + }; + +} // namespace impl + +/////////////////////////////////////////////////////////////////////////////// +// tag::count +// +namespace tag +{ + struct count + : depends_on<> + { + /// INTERNAL ONLY + /// + typedef mpl::always<accumulators::impl::count_impl> impl; + }; +} + +/////////////////////////////////////////////////////////////////////////////// +// extract::count +// +namespace extract +{ + extractor<tag::count> const count = {}; + + BOOST_ACCUMULATORS_IGNORE_GLOBAL(count) +} + +using extract::count; + +}} // namespace boost::accumulators + +#endif diff --git a/boost/accumulators/statistics/covariance.hpp b/boost/accumulators/statistics/covariance.hpp new file mode 100644 index 0000000000..b34284b046 --- /dev/null +++ b/boost/accumulators/statistics/covariance.hpp @@ -0,0 +1,220 @@ +/////////////////////////////////////////////////////////////////////////////// +// covariance.hpp +// +// Copyright 2006 Daniel Egloff, Olivier Gygi. Distributed under the Boost +// Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#ifndef BOOST_ACCUMULATORS_STATISTICS_COVARIANCE_HPP_DE_01_01_2006 +#define BOOST_ACCUMULATORS_STATISTICS_COVARIANCE_HPP_DE_01_01_2006 + +#include <vector> +#include <limits> +#include <numeric> +#include <functional> +#include <complex> +#include <boost/mpl/assert.hpp> +#include <boost/mpl/bool.hpp> +#include <boost/range.hpp> +#include <boost/parameter/keyword.hpp> +#include <boost/mpl/placeholders.hpp> +#include <boost/numeric/ublas/io.hpp> +#include <boost/numeric/ublas/matrix.hpp> +#include <boost/type_traits/is_scalar.hpp> +#include <boost/type_traits/is_same.hpp> +#include <boost/accumulators/framework/accumulator_base.hpp> +#include <boost/accumulators/framework/extractor.hpp> +#include <boost/accumulators/numeric/functional.hpp> +#include <boost/accumulators/framework/parameters/sample.hpp> +#include <boost/accumulators/statistics_fwd.hpp> +#include <boost/accumulators/statistics/count.hpp> +#include <boost/accumulators/statistics/mean.hpp> + +namespace boost { namespace numeric +{ + namespace functional + { + struct std_vector_tag; + + /////////////////////////////////////////////////////////////////////////////// + // functional::outer_product + template<typename Left, typename Right, typename EnableIf = void> + struct outer_product_base + : functional::multiplies<Left, Right> + {}; + + template<typename Left, typename Right, typename LeftTag = typename tag<Left>::type, typename RightTag = typename tag<Right>::type> + struct outer_product + : outer_product_base<Left, Right, void> + {}; + + template<typename Left, typename Right> + struct outer_product<Left, Right, std_vector_tag, std_vector_tag> + : std::binary_function< + Left + , Right + , ublas::matrix< + typename functional::multiplies< + typename Left::value_type + , typename Right::value_type + >::result_type + > + > + { + typedef + ublas::matrix< + typename functional::multiplies< + typename Left::value_type + , typename Right::value_type + >::result_type + > + result_type; + + result_type + operator ()(Left & left, Right & right) const + { + std::size_t left_size = left.size(); + std::size_t right_size = right.size(); + result_type result(left_size, right_size); + for (std::size_t i = 0; i < left_size; ++i) + for (std::size_t j = 0; j < right_size; ++j) + result(i,j) = numeric::multiplies(left[i], right[j]); + return result; + } + }; + } + + namespace op + { + struct outer_product + : boost::detail::function2<functional::outer_product<_1, _2, functional::tag<_1>, functional::tag<_2> > > + {}; + } + + namespace + { + op::outer_product const &outer_product = boost::detail::pod_singleton<op::outer_product>::instance; + } + +}} + +namespace boost { namespace accumulators +{ + +namespace impl +{ + /////////////////////////////////////////////////////////////////////////////// + // covariance_impl + // + /** + @brief Covariance Estimator + + An iterative Monte Carlo estimator for the covariance \f$\mathrm{Cov}(X,X')\f$, where \f$X\f$ is a sample + and \f$X'\f$ is a variate, is given by: + + \f[ + \hat{c}_n = \frac{n-1}{n} \hat{c}_{n-1} + \frac{1}{n-1}(X_n - \hat{\mu}_n)(X_n' - \hat{\mu}_n'),\quad n\ge2,\quad\hat{c}_1 = 0, + \f] + + \f$\hat{\mu}_n\f$ and \f$\hat{\mu}_n'\f$ being the means of the samples and variates. + */ + template<typename Sample, typename VariateType, typename VariateTag> + struct covariance_impl + : accumulator_base + { + typedef typename numeric::functional::average<Sample, std::size_t>::result_type sample_type; + typedef typename numeric::functional::average<VariateType, std::size_t>::result_type variate_type; + // for boost::result_of + typedef typename numeric::functional::outer_product<sample_type, variate_type>::result_type result_type; + + template<typename Args> + covariance_impl(Args const &args) + : cov_( + numeric::outer_product( + numeric::average(args[sample | Sample()], (std::size_t)1) + , numeric::average(args[parameter::keyword<VariateTag>::get() | VariateType()], (std::size_t)1) + ) + ) + { + } + + template<typename Args> + void operator ()(Args const &args) + { + std::size_t cnt = count(args); + + if (cnt > 1) + { + extractor<tag::mean_of_variates<VariateType, VariateTag> > const some_mean_of_variates = {}; + + this->cov_ = this->cov_*(cnt-1.)/cnt + + numeric::outer_product( + some_mean_of_variates(args) - args[parameter::keyword<VariateTag>::get()] + , mean(args) - args[sample] + ) / (cnt-1.); + } + } + + result_type result(dont_care) const + { + return this->cov_; + } + + private: + result_type cov_; + }; + +} // namespace impl + +/////////////////////////////////////////////////////////////////////////////// +// tag::covariance +// +namespace tag +{ + template<typename VariateType, typename VariateTag> + struct covariance + : depends_on<count, mean, mean_of_variates<VariateType, VariateTag> > + { + typedef accumulators::impl::covariance_impl<mpl::_1, VariateType, VariateTag> impl; + }; + + struct abstract_covariance + : depends_on<> + { + }; +} + +/////////////////////////////////////////////////////////////////////////////// +// extract::covariance +// +namespace extract +{ + extractor<tag::abstract_covariance> const covariance = {}; + + BOOST_ACCUMULATORS_IGNORE_GLOBAL(covariance) +} + +using extract::covariance; + +template<typename VariateType, typename VariateTag> +struct feature_of<tag::covariance<VariateType, VariateTag> > + : feature_of<tag::abstract_covariance> +{ +}; + +// So that covariance can be automatically substituted with +// weighted_covariance when the weight parameter is non-void. +template<typename VariateType, typename VariateTag> +struct as_weighted_feature<tag::covariance<VariateType, VariateTag> > +{ + typedef tag::weighted_covariance<VariateType, VariateTag> type; +}; + +template<typename VariateType, typename VariateTag> +struct feature_of<tag::weighted_covariance<VariateType, VariateTag> > + : feature_of<tag::covariance<VariateType, VariateTag> > +{}; + +}} // namespace boost::accumulators + +#endif diff --git a/boost/accumulators/statistics/density.hpp b/boost/accumulators/statistics/density.hpp new file mode 100644 index 0000000000..8835f47b69 --- /dev/null +++ b/boost/accumulators/statistics/density.hpp @@ -0,0 +1,246 @@ + +/////////////////////////////////////////////////////////////////////////////// +// density.hpp +// +// Copyright 2006 Daniel Egloff, Olivier Gygi. Distributed under the Boost +// Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#ifndef BOOST_ACCUMULATORS_STATISTICS_DENSITY_HPP_DE_01_01_2006 +#define BOOST_ACCUMULATORS_STATISTICS_DENSITY_HPP_DE_01_01_2006 + +#include <vector> +#include <limits> +#include <functional> +#include <boost/range.hpp> +#include <boost/parameter/keyword.hpp> +#include <boost/mpl/placeholders.hpp> +#include <boost/accumulators/framework/accumulator_base.hpp> +#include <boost/accumulators/framework/extractor.hpp> +#include <boost/accumulators/numeric/functional.hpp> +#include <boost/accumulators/framework/parameters/sample.hpp> +#include <boost/accumulators/framework/depends_on.hpp> +#include <boost/accumulators/statistics_fwd.hpp> +#include <boost/accumulators/statistics/count.hpp> +#include <boost/accumulators/statistics/max.hpp> +#include <boost/accumulators/statistics/min.hpp> + +namespace boost { namespace accumulators +{ + +/////////////////////////////////////////////////////////////////////////////// +// cache_size and num_bins named parameters +// +BOOST_PARAMETER_NESTED_KEYWORD(tag, density_cache_size, cache_size) +BOOST_PARAMETER_NESTED_KEYWORD(tag, density_num_bins, num_bins) + +namespace impl +{ + /////////////////////////////////////////////////////////////////////////////// + // density_impl + // density histogram + /** + @brief Histogram density estimator + + The histogram density estimator returns a histogram of the sample distribution. The positions and sizes of the bins + are determined using a specifiable number of cached samples (cache_size). The range between the minimum and the + maximum of the cached samples is subdivided into a specifiable number of bins (num_bins) of same size. Additionally, + an under- and an overflow bin is added to capture future under- and overflow samples. Once the bins are determined, + the cached samples and all subsequent samples are added to the correct bins. At the end, a range of std::pair is + return, where each pair contains the position of the bin (lower bound) and the samples count (normalized with the + total number of samples). + + @param density_cache_size Number of first samples used to determine min and max. + @param density_num_bins Number of bins (two additional bins collect under- and overflow samples). + */ + template<typename Sample> + struct density_impl + : accumulator_base + { + typedef typename numeric::functional::average<Sample, std::size_t>::result_type float_type; + typedef std::vector<std::pair<float_type, float_type> > histogram_type; + typedef std::vector<float_type> array_type; + // for boost::result_of + typedef iterator_range<typename histogram_type::iterator> result_type; + + template<typename Args> + density_impl(Args const &args) + : cache_size(args[density_cache_size]) + , cache(cache_size) + , num_bins(args[density_num_bins]) + , samples_in_bin(num_bins + 2, 0.) + , bin_positions(num_bins + 2) + , histogram( + num_bins + 2 + , std::make_pair( + numeric::average(args[sample | Sample()],(std::size_t)1) + , numeric::average(args[sample | Sample()],(std::size_t)1) + ) + ) + , is_dirty(true) + { + } + + template<typename Args> + void operator ()(Args const &args) + { + this->is_dirty = true; + + std::size_t cnt = count(args); + + // Fill up cache with cache_size first samples + if (cnt <= this->cache_size) + { + this->cache[cnt - 1] = args[sample]; + } + + // Once cache_size samples have been accumulated, create num_bins bins of same size between + // the minimum and maximum of the cached samples as well as an under- and and an overflow bin. + // Store their lower bounds (bin_positions) and fill the bins with the cached samples (samples_in_bin). + if (cnt == this->cache_size) + { + float_type minimum = numeric::average((min)(args), (std::size_t)1); + float_type maximum = numeric::average((max)(args), (std::size_t)1); + float_type bin_size = numeric::average(maximum - minimum, this->num_bins ); + + // determine bin positions (their lower bounds) + for (std::size_t i = 0; i < this->num_bins + 2; ++i) + { + this->bin_positions[i] = minimum + (i - 1.) * bin_size; + } + + for (typename array_type::const_iterator iter = this->cache.begin(); iter != this->cache.end(); ++iter) + { + if (*iter < this->bin_positions[1]) + { + ++(this->samples_in_bin[0]); + } + else if (*iter >= this->bin_positions[this->num_bins + 1]) + { + ++(this->samples_in_bin[this->num_bins + 1]); + } + else + { + typename array_type::iterator it = std::upper_bound( + this->bin_positions.begin() + , this->bin_positions.end() + , *iter + ); + + std::size_t d = std::distance(this->bin_positions.begin(), it); + ++(this->samples_in_bin[d - 1]); + } + } + } + // Add each subsequent sample to the correct bin + else if (cnt > this->cache_size) + { + if (args[sample] < this->bin_positions[1]) + { + ++(this->samples_in_bin[0]); + } + else if (args[sample] >= this->bin_positions[this->num_bins + 1]) + { + ++(this->samples_in_bin[this->num_bins + 1]); + } + else + { + typename array_type::iterator it = std::upper_bound( + this->bin_positions.begin() + , this->bin_positions.end() + , args[sample] + ); + + std::size_t d = std::distance(this->bin_positions.begin(), it); + ++(this->samples_in_bin[d - 1]); + } + } + } + + /** + @pre The number of samples must meet or exceed the cache size + */ + template<typename Args> + result_type result(Args const &args) const + { + if (this->is_dirty) + { + this->is_dirty = false; + + // creates a vector of std::pair where each pair i holds + // the values bin_positions[i] (x-axis of histogram) and + // samples_in_bin[i] / cnt (y-axis of histogram). + + for (std::size_t i = 0; i < this->num_bins + 2; ++i) + { + this->histogram[i] = std::make_pair(this->bin_positions[i], numeric::average(this->samples_in_bin[i], count(args))); + } + } + // returns a range of pairs + return make_iterator_range(this->histogram); + } + + private: + std::size_t cache_size; // number of cached samples + array_type cache; // cache to store the first cache_size samples + std::size_t num_bins; // number of bins + array_type samples_in_bin; // number of samples in each bin + array_type bin_positions; // lower bounds of bins + mutable histogram_type histogram; // histogram + mutable bool is_dirty; + }; + +} // namespace impl + +/////////////////////////////////////////////////////////////////////////////// +// tag::density +// +namespace tag +{ + struct density + : depends_on<count, min, max> + , density_cache_size + , density_num_bins + { + /// INTERNAL ONLY + /// + typedef accumulators::impl::density_impl<mpl::_1> impl; + + #ifdef BOOST_ACCUMULATORS_DOXYGEN_INVOKED + /// tag::density::cache_size named parameter + /// tag::density::num_bins named parameter + static boost::parameter::keyword<density_cache_size> const cache_size; + static boost::parameter::keyword<density_num_bins> const num_bins; + #endif + }; +} + +/////////////////////////////////////////////////////////////////////////////// +// extract::density +// +namespace extract +{ + extractor<tag::density> const density = {}; + + BOOST_ACCUMULATORS_IGNORE_GLOBAL(density) +} + +using extract::density; + +// So that density can be automatically substituted +// with weighted_density when the weight parameter is non-void. +template<> +struct as_weighted_feature<tag::density> +{ + typedef tag::weighted_density type; +}; + +template<> +struct feature_of<tag::weighted_density> + : feature_of<tag::density> +{ +}; + +}} // namespace boost::accumulators + +#endif diff --git a/boost/accumulators/statistics/error_of.hpp b/boost/accumulators/statistics/error_of.hpp new file mode 100644 index 0000000000..a29da02f57 --- /dev/null +++ b/boost/accumulators/statistics/error_of.hpp @@ -0,0 +1,99 @@ +/////////////////////////////////////////////////////////////////////////////// +// error_of.hpp +// +// Copyright 2005 Eric Niebler. Distributed under the Boost +// Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#ifndef BOOST_ACCUMULATORS_STATISTICS_ERROR_OF_HPP_EAN_29_11_2005 +#define BOOST_ACCUMULATORS_STATISTICS_ERROR_OF_HPP_EAN_29_11_2005 + +#include <boost/mpl/placeholders.hpp> +#include <boost/accumulators/framework/accumulator_base.hpp> +#include <boost/accumulators/framework/extractor.hpp> +#include <boost/accumulators/framework/depends_on.hpp> +#include <boost/accumulators/statistics_fwd.hpp> + +namespace boost { namespace accumulators +{ + +namespace impl +{ + /// INTERNAL ONLY + /// + template<typename Feature> + struct this_feature_has_no_error_calculation + : mpl::false_ + { + }; + + /////////////////////////////////////////////////////////////////////////////// + // error_of_impl + /// INTERNAL ONLY + /// + template<typename Sample, typename Feature> + struct error_of_impl + : accumulator_base + { + // TODO: specialize this on the specific features that have errors we're + // interested in. + BOOST_MPL_ASSERT((this_feature_has_no_error_calculation<Feature>)); + + // for boost::result_of + typedef int result_type; + + error_of_impl(dont_care) + { + } + + result_type result(dont_care) const + { + return 0; + } + }; + +} // namespace impl + +/////////////////////////////////////////////////////////////////////////////// +// tag::error_of +// +namespace tag +{ + template<typename Feature> + struct error_of + : depends_on<Feature> + { + /// INTERNAL ONLY + /// + typedef accumulators::impl::error_of_impl<mpl::_1, Feature> impl; + }; +} + +/////////////////////////////////////////////////////////////////////////////// +// extract::error_of +// +namespace extract +{ + BOOST_ACCUMULATORS_DEFINE_EXTRACTOR(tag, error_of, (typename)) +} + +using extract::error_of; + +// make tag::error_of<tag::feature(modifier)> work +template<typename Feature> +struct as_feature<tag::error_of<Feature> > +{ + typedef tag::error_of<typename as_feature<Feature>::type> type; +}; + +// make error_of<tag::mean> work with non-void weights (should become +// error_of<tag::weighted_mean> +template<typename Feature> +struct as_weighted_feature<tag::error_of<Feature> > +{ + typedef tag::error_of<typename as_weighted_feature<Feature>::type> type; +}; + +}} // namespace boost::accumulators + +#endif diff --git a/boost/accumulators/statistics/error_of_mean.hpp b/boost/accumulators/statistics/error_of_mean.hpp new file mode 100644 index 0000000000..9451d40f73 --- /dev/null +++ b/boost/accumulators/statistics/error_of_mean.hpp @@ -0,0 +1,73 @@ +/////////////////////////////////////////////////////////////////////////////// +// error_of.hpp +// +// Copyright 2005 Eric Niebler. Distributed under the Boost +// Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#ifndef BOOST_ACCUMULATORS_STATISTICS_ERROR_OF_MEAN_HPP_EAN_27_03_2006 +#define BOOST_ACCUMULATORS_STATISTICS_ERROR_OF_MEAN_HPP_EAN_27_03_2006 + +#include <boost/mpl/placeholders.hpp> +#include <boost/accumulators/framework/accumulator_base.hpp> +#include <boost/accumulators/framework/extractor.hpp> +#include <boost/accumulators/framework/depends_on.hpp> +#include <boost/accumulators/statistics_fwd.hpp> +#include <boost/accumulators/statistics/error_of.hpp> +#include <boost/accumulators/statistics/variance.hpp> +#include <boost/accumulators/statistics/count.hpp> + +namespace boost { namespace accumulators +{ + +namespace impl +{ + /////////////////////////////////////////////////////////////////////////////// + // error_of_mean_impl + template<typename Sample, typename Variance> + struct error_of_mean_impl + : accumulator_base + { + // for boost::result_of + typedef typename numeric::functional::average<Sample, std::size_t>::result_type result_type; + + error_of_mean_impl(dont_care) {} + + template<typename Args> + result_type result(Args const &args) const + { + using namespace std; + extractor<Variance> const variance = {}; + return sqrt(numeric::average(variance(args), count(args) - 1)); + } + }; + +} // namespace impl + +/////////////////////////////////////////////////////////////////////////////// +// tag::error_of +// +namespace tag +{ + template<> + struct error_of<mean> + : depends_on<lazy_variance, count> + { + /// INTERNAL ONLY + /// + typedef accumulators::impl::error_of_mean_impl<mpl::_1, lazy_variance> impl; + }; + + template<> + struct error_of<immediate_mean> + : depends_on<variance, count> + { + /// INTERNAL ONLY + /// + typedef accumulators::impl::error_of_mean_impl<mpl::_1, variance> impl; + }; +} + +}} // namespace boost::accumulators + +#endif diff --git a/boost/accumulators/statistics/extended_p_square.hpp b/boost/accumulators/statistics/extended_p_square.hpp new file mode 100644 index 0000000000..e8d4c0456b --- /dev/null +++ b/boost/accumulators/statistics/extended_p_square.hpp @@ -0,0 +1,293 @@ +/////////////////////////////////////////////////////////////////////////////// +// extended_p_square.hpp +// +// Copyright 2005 Daniel Egloff. Distributed under the Boost +// Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#ifndef BOOST_ACCUMULATORS_STATISTICS_EXTENDED_SINGLE_HPP_DE_01_01_2006 +#define BOOST_ACCUMULATORS_STATISTICS_EXTENDED_SINGLE_HPP_DE_01_01_2006 + +#include <vector> +#include <functional> +#include <boost/range/begin.hpp> +#include <boost/range/end.hpp> +#include <boost/range/iterator_range.hpp> +#include <boost/iterator/transform_iterator.hpp> +#include <boost/iterator/counting_iterator.hpp> +#include <boost/iterator/permutation_iterator.hpp> +#include <boost/parameter/keyword.hpp> +#include <boost/mpl/placeholders.hpp> +#include <boost/accumulators/framework/accumulator_base.hpp> +#include <boost/accumulators/framework/extractor.hpp> +#include <boost/accumulators/numeric/functional.hpp> +#include <boost/accumulators/framework/parameters/sample.hpp> +#include <boost/accumulators/framework/depends_on.hpp> +#include <boost/accumulators/statistics_fwd.hpp> +#include <boost/accumulators/statistics/count.hpp> +#include <boost/accumulators/statistics/times2_iterator.hpp> + +namespace boost { namespace accumulators +{ +/////////////////////////////////////////////////////////////////////////////// +// probabilities named parameter +// +BOOST_PARAMETER_NESTED_KEYWORD(tag, extended_p_square_probabilities, probabilities) + +namespace impl +{ + /////////////////////////////////////////////////////////////////////////////// + // extended_p_square_impl + // multiple quantile estimation + /** + @brief Multiple quantile estimation with the extended \f$P^2\f$ algorithm + + Extended \f$P^2\f$ algorithm for estimation of several quantiles without storing samples. + Assume that \f$m\f$ quantiles \f$\xi_{p_1}, \ldots, \xi_{p_m}\f$ are to be estimated. + Instead of storing the whole sample cumulative distribution, the algorithm maintains only + \f$m+2\f$ principal markers and \f$m+1\f$ middle markers, whose positions are updated + with each sample and whose heights are adjusted (if necessary) using a piecewise-parablic + formula. The heights of these central markers are the current estimates of the quantiles + and returned as an iterator range. + + For further details, see + + K. E. E. Raatikainen, Simultaneous estimation of several quantiles, Simulation, Volume 49, + Number 4 (October), 1986, p. 159-164. + + The extended \f$ P^2 \f$ algorithm generalizess the \f$ P^2 \f$ algorithm of + + R. Jain and I. Chlamtac, The P^2 algorithmus for dynamic calculation of quantiles and + histograms without storing observations, Communications of the ACM, + Volume 28 (October), Number 10, 1985, p. 1076-1085. + + @param extended_p_square_probabilities A vector of quantile probabilities. + */ + template<typename Sample> + struct extended_p_square_impl + : accumulator_base + { + typedef typename numeric::functional::average<Sample, std::size_t>::result_type float_type; + typedef std::vector<float_type> array_type; + // for boost::result_of + typedef iterator_range< + detail::lvalue_index_iterator< + permutation_iterator< + typename array_type::const_iterator + , detail::times2_iterator + > + > + > result_type; + + template<typename Args> + extended_p_square_impl(Args const &args) + : probabilities( + boost::begin(args[extended_p_square_probabilities]) + , boost::end(args[extended_p_square_probabilities]) + ) + , heights(2 * probabilities.size() + 3) + , actual_positions(heights.size()) + , desired_positions(heights.size()) + , positions_increments(heights.size()) + { + std::size_t num_quantiles = this->probabilities.size(); + std::size_t num_markers = this->heights.size(); + + for(std::size_t i = 0; i < num_markers; ++i) + { + this->actual_positions[i] = i + 1; + } + + this->positions_increments[0] = 0.; + this->positions_increments[num_markers - 1] = 1.; + + for(std::size_t i = 0; i < num_quantiles; ++i) + { + this->positions_increments[2 * i + 2] = probabilities[i]; + } + + for(std::size_t i = 0; i <= num_quantiles; ++i) + { + this->positions_increments[2 * i + 1] = + 0.5 * (this->positions_increments[2 * i] + this->positions_increments[2 * i + 2]); + } + + for(std::size_t i = 0; i < num_markers; ++i) + { + this->desired_positions[i] = 1. + 2. * (num_quantiles + 1.) * this->positions_increments[i]; + } + } + + template<typename Args> + void operator ()(Args const &args) + { + std::size_t cnt = count(args); + + // m+2 principal markers and m+1 middle markers + std::size_t num_markers = 2 * this->probabilities.size() + 3; + + // first accumulate num_markers samples + if(cnt <= num_markers) + { + this->heights[cnt - 1] = args[sample]; + + // complete the initialization of heights by sorting + if(cnt == num_markers) + { + std::sort(this->heights.begin(), this->heights.end()); + } + } + else + { + std::size_t sample_cell = 1; + + // find cell k = sample_cell such that heights[k-1] <= sample < heights[k] + if(args[sample] < this->heights[0]) + { + this->heights[0] = args[sample]; + sample_cell = 1; + } + else if(args[sample] >= this->heights[num_markers - 1]) + { + this->heights[num_markers - 1] = args[sample]; + sample_cell = num_markers - 1; + } + else + { + typedef typename array_type::iterator iterator; + iterator it = std::upper_bound( + this->heights.begin() + , this->heights.end() + , args[sample] + ); + + sample_cell = std::distance(this->heights.begin(), it); + } + + // update actual positions of all markers above sample_cell index + for(std::size_t i = sample_cell; i < num_markers; ++i) + { + ++this->actual_positions[i]; + } + + // update desired positions of all markers + for(std::size_t i = 0; i < num_markers; ++i) + { + this->desired_positions[i] += this->positions_increments[i]; + } + + // adjust heights and actual positions of markers 1 to num_markers-2 if necessary + for(std::size_t i = 1; i <= num_markers - 2; ++i) + { + // offset to desired position + float_type d = this->desired_positions[i] - this->actual_positions[i]; + + // offset to next position + float_type dp = this->actual_positions[i+1] - this->actual_positions[i]; + + // offset to previous position + float_type dm = this->actual_positions[i-1] - this->actual_positions[i]; + + // height ds + float_type hp = (this->heights[i+1] - this->heights[i]) / dp; + float_type hm = (this->heights[i-1] - this->heights[i]) / dm; + + if((d >= 1 && dp > 1) || (d <= -1 && dm < -1)) + { + short sign_d = static_cast<short>(d / std::abs(d)); + + float_type h = this->heights[i] + sign_d / (dp - dm) * ((sign_d - dm)*hp + + (dp - sign_d) * hm); + + // try adjusting heights[i] using p-squared formula + if(this->heights[i - 1] < h && h < this->heights[i + 1]) + { + this->heights[i] = h; + } + else + { + // use linear formula + if(d > 0) + { + this->heights[i] += hp; + } + if(d < 0) + { + this->heights[i] -= hm; + } + } + this->actual_positions[i] += sign_d; + } + } + } + } + + result_type result(dont_care) const + { + // for i in [1,probabilities.size()], return heights[i * 2] + detail::times2_iterator idx_begin = detail::make_times2_iterator(1); + detail::times2_iterator idx_end = detail::make_times2_iterator(this->probabilities.size() + 1); + + return result_type( + make_permutation_iterator(this->heights.begin(), idx_begin) + , make_permutation_iterator(this->heights.begin(), idx_end) + ); + } + + private: + array_type probabilities; // the quantile probabilities + array_type heights; // q_i + array_type actual_positions; // n_i + array_type desired_positions; // d_i + array_type positions_increments; // f_i + }; + +} // namespace impl + +/////////////////////////////////////////////////////////////////////////////// +// tag::extended_p_square +// +namespace tag +{ + struct extended_p_square + : depends_on<count> + , extended_p_square_probabilities + { + typedef accumulators::impl::extended_p_square_impl<mpl::_1> impl; + + #ifdef BOOST_ACCUMULATORS_DOXYGEN_INVOKED + /// tag::extended_p_square::probabilities named paramter + static boost::parameter::keyword<tag::probabilities> const probabilities; + #endif + }; +} + +/////////////////////////////////////////////////////////////////////////////// +// extract::extended_p_square +// +namespace extract +{ + extractor<tag::extended_p_square> const extended_p_square = {}; + + BOOST_ACCUMULATORS_IGNORE_GLOBAL(extended_p_square) +} + +using extract::extended_p_square; + +// So that extended_p_square can be automatically substituted with +// weighted_extended_p_square when the weight parameter is non-void +template<> +struct as_weighted_feature<tag::extended_p_square> +{ + typedef tag::weighted_extended_p_square type; +}; + +template<> +struct feature_of<tag::weighted_extended_p_square> + : feature_of<tag::extended_p_square> +{ +}; + +}} // namespace boost::accumulators + +#endif diff --git a/boost/accumulators/statistics/extended_p_square_quantile.hpp b/boost/accumulators/statistics/extended_p_square_quantile.hpp new file mode 100644 index 0000000000..b6c008d712 --- /dev/null +++ b/boost/accumulators/statistics/extended_p_square_quantile.hpp @@ -0,0 +1,319 @@ +/////////////////////////////////////////////////////////////////////////////// +// extended_p_square_quantile.hpp +// +// Copyright 2005 Daniel Egloff. Distributed under the Boost +// Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#ifndef BOOST_ACCUMULATORS_STATISTICS_EXTENDED_SINGLE_QUANTILE_HPP_DE_01_01_2006 +#define BOOST_ACCUMULATORS_STATISTICS_EXTENDED_SINGLE_QUANTILE_HPP_DE_01_01_2006 + +#include <vector> +#include <functional> +#include <boost/range/begin.hpp> +#include <boost/range/end.hpp> +#include <boost/range/iterator_range.hpp> +#include <boost/iterator/transform_iterator.hpp> +#include <boost/iterator/counting_iterator.hpp> +#include <boost/iterator/permutation_iterator.hpp> +#include <boost/parameter/keyword.hpp> +#include <boost/mpl/placeholders.hpp> +#include <boost/type_traits/is_same.hpp> +#include <boost/accumulators/framework/accumulator_base.hpp> +#include <boost/accumulators/framework/extractor.hpp> +#include <boost/accumulators/numeric/functional.hpp> +#include <boost/accumulators/framework/parameters/sample.hpp> +#include <boost/accumulators/framework/depends_on.hpp> +#include <boost/accumulators/statistics_fwd.hpp> +#include <boost/accumulators/statistics/count.hpp> +#include <boost/accumulators/statistics/parameters/quantile_probability.hpp> +#include <boost/accumulators/statistics/extended_p_square.hpp> +#include <boost/accumulators/statistics/weighted_extended_p_square.hpp> +#include <boost/accumulators/statistics/times2_iterator.hpp> + +#ifdef _MSC_VER +# pragma warning(push) +# pragma warning(disable: 4127) // conditional expression is constant +#endif + +namespace boost { namespace accumulators +{ + +namespace impl +{ + /////////////////////////////////////////////////////////////////////////////// + // extended_p_square_quantile_impl + // single quantile estimation + /** + @brief Quantile estimation using the extended \f$P^2\f$ algorithm for weighted and unweighted samples + + Uses the quantile estimates calculated by the extended \f$P^2\f$ algorithm to compute + intermediate quantile estimates by means of quadratic interpolation. + + @param quantile_probability The probability of the quantile to be estimated. + */ + template<typename Sample, typename Impl1, typename Impl2> // Impl1: weighted/unweighted // Impl2: linear/quadratic + struct extended_p_square_quantile_impl + : accumulator_base + { + typedef typename numeric::functional::average<Sample, std::size_t>::result_type float_type; + typedef std::vector<float_type> array_type; + typedef iterator_range< + detail::lvalue_index_iterator< + permutation_iterator< + typename array_type::const_iterator + , detail::times2_iterator + > + > + > range_type; + // for boost::result_of + typedef float_type result_type; + + template<typename Args> + extended_p_square_quantile_impl(Args const &args) + : probabilities( + boost::begin(args[extended_p_square_probabilities]) + , boost::end(args[extended_p_square_probabilities]) + ) + { + } + + template<typename Args> + result_type result(Args const &args) const + { + typedef + typename mpl::if_< + is_same<Impl1, weighted> + , tag::weighted_extended_p_square + , tag::extended_p_square + >::type + extended_p_square_tag; + + extractor<extended_p_square_tag> const some_extended_p_square = {}; + + array_type heights(some_extended_p_square(args).size()); + std::copy(some_extended_p_square(args).begin(), some_extended_p_square(args).end(), heights.begin()); + + this->probability = args[quantile_probability]; + + typename array_type::const_iterator iter_probs = std::lower_bound(this->probabilities.begin(), this->probabilities.end(), this->probability); + std::size_t dist = std::distance(this->probabilities.begin(), iter_probs); + typename array_type::const_iterator iter_heights = heights.begin() + dist; + + // If this->probability is not in a valid range return NaN or throw exception + if (this->probability < *this->probabilities.begin() || this->probability > *(this->probabilities.end() - 1)) + { + if (std::numeric_limits<result_type>::has_quiet_NaN) + { + return std::numeric_limits<result_type>::quiet_NaN(); + } + else + { + std::ostringstream msg; + msg << "probability = " << this->probability << " is not in valid range ("; + msg << *this->probabilities.begin() << ", " << *(this->probabilities.end() - 1) << ")"; + boost::throw_exception(std::runtime_error(msg.str())); + return Sample(0); + } + + } + + if (*iter_probs == this->probability) + { + return heights[dist]; + } + else + { + result_type res; + + if (is_same<Impl2, linear>::value) + { + ///////////////////////////////////////////////////////////////////////////////// + // LINEAR INTERPOLATION + // + float_type p1 = *iter_probs; + float_type p0 = *(iter_probs - 1); + float_type h1 = *iter_heights; + float_type h0 = *(iter_heights - 1); + + float_type a = numeric::average(h1 - h0, p1 - p0); + float_type b = h1 - p1 * a; + + res = a * this->probability + b; + } + else + { + ///////////////////////////////////////////////////////////////////////////////// + // QUADRATIC INTERPOLATION + // + float_type p0, p1, p2; + float_type h0, h1, h2; + + if ( (dist == 1 || *iter_probs - this->probability <= this->probability - *(iter_probs - 1) ) && dist != this->probabilities.size() - 1 ) + { + p0 = *(iter_probs - 1); + p1 = *iter_probs; + p2 = *(iter_probs + 1); + h0 = *(iter_heights - 1); + h1 = *iter_heights; + h2 = *(iter_heights + 1); + } + else + { + p0 = *(iter_probs - 2); + p1 = *(iter_probs - 1); + p2 = *iter_probs; + h0 = *(iter_heights - 2); + h1 = *(iter_heights - 1); + h2 = *iter_heights; + } + + float_type hp21 = numeric::average(h2 - h1, p2 - p1); + float_type hp10 = numeric::average(h1 - h0, p1 - p0); + float_type p21 = numeric::average(p2 * p2 - p1 * p1, p2 - p1); + float_type p10 = numeric::average(p1 * p1 - p0 * p0, p1 - p0); + + float_type a = numeric::average(hp21 - hp10, p21 - p10); + float_type b = hp21 - a * p21; + float_type c = h2 - a * p2 * p2 - b * p2; + + res = a * this->probability * this-> probability + b * this->probability + c; + } + + return res; + } + + } + private: + + array_type probabilities; + mutable float_type probability; + + }; + +} // namespace impl + +/////////////////////////////////////////////////////////////////////////////// +// tag::extended_p_square_quantile +// +namespace tag +{ + struct extended_p_square_quantile + : depends_on<extended_p_square> + { + typedef accumulators::impl::extended_p_square_quantile_impl<mpl::_1, unweighted, linear> impl; + }; + struct extended_p_square_quantile_quadratic + : depends_on<extended_p_square> + { + typedef accumulators::impl::extended_p_square_quantile_impl<mpl::_1, unweighted, quadratic> impl; + }; + struct weighted_extended_p_square_quantile + : depends_on<weighted_extended_p_square> + { + typedef accumulators::impl::extended_p_square_quantile_impl<mpl::_1, weighted, linear> impl; + }; + struct weighted_extended_p_square_quantile_quadratic + : depends_on<weighted_extended_p_square> + { + typedef accumulators::impl::extended_p_square_quantile_impl<mpl::_1, weighted, quadratic> impl; + }; +} + +/////////////////////////////////////////////////////////////////////////////// +// extract::extended_p_square_quantile +// extract::weighted_extended_p_square_quantile +// +namespace extract +{ + extractor<tag::extended_p_square_quantile> const extended_p_square_quantile = {}; + extractor<tag::extended_p_square_quantile_quadratic> const extended_p_square_quantile_quadratic = {}; + extractor<tag::weighted_extended_p_square_quantile> const weighted_extended_p_square_quantile = {}; + extractor<tag::weighted_extended_p_square_quantile_quadratic> const weighted_extended_p_square_quantile_quadratic = {}; + + BOOST_ACCUMULATORS_IGNORE_GLOBAL(extended_p_square_quantile) + BOOST_ACCUMULATORS_IGNORE_GLOBAL(extended_p_square_quantile_quadratic) + BOOST_ACCUMULATORS_IGNORE_GLOBAL(weighted_extended_p_square_quantile) + BOOST_ACCUMULATORS_IGNORE_GLOBAL(weighted_extended_p_square_quantile_quadratic) +} + +using extract::extended_p_square_quantile; +using extract::extended_p_square_quantile_quadratic; +using extract::weighted_extended_p_square_quantile; +using extract::weighted_extended_p_square_quantile_quadratic; + +// extended_p_square_quantile(linear) -> extended_p_square_quantile +template<> +struct as_feature<tag::extended_p_square_quantile(linear)> +{ + typedef tag::extended_p_square_quantile type; +}; + +// extended_p_square_quantile(quadratic) -> extended_p_square_quantile_quadratic +template<> +struct as_feature<tag::extended_p_square_quantile(quadratic)> +{ + typedef tag::extended_p_square_quantile_quadratic type; +}; + +// weighted_extended_p_square_quantile(linear) -> weighted_extended_p_square_quantile +template<> +struct as_feature<tag::weighted_extended_p_square_quantile(linear)> +{ + typedef tag::weighted_extended_p_square_quantile type; +}; + +// weighted_extended_p_square_quantile(quadratic) -> weighted_extended_p_square_quantile_quadratic +template<> +struct as_feature<tag::weighted_extended_p_square_quantile(quadratic)> +{ + typedef tag::weighted_extended_p_square_quantile_quadratic type; +}; + +// for the purposes of feature-based dependency resolution, +// extended_p_square_quantile and weighted_extended_p_square_quantile +// provide the same feature as quantile +template<> +struct feature_of<tag::extended_p_square_quantile> + : feature_of<tag::quantile> +{ +}; +template<> +struct feature_of<tag::extended_p_square_quantile_quadratic> + : feature_of<tag::quantile> +{ +}; +// So that extended_p_square_quantile can be automatically substituted with +// weighted_extended_p_square_quantile when the weight parameter is non-void +template<> +struct as_weighted_feature<tag::extended_p_square_quantile> +{ + typedef tag::weighted_extended_p_square_quantile type; +}; + +template<> +struct feature_of<tag::weighted_extended_p_square_quantile> + : feature_of<tag::extended_p_square_quantile> +{ +}; + +// So that extended_p_square_quantile_quadratic can be automatically substituted with +// weighted_extended_p_square_quantile_quadratic when the weight parameter is non-void +template<> +struct as_weighted_feature<tag::extended_p_square_quantile_quadratic> +{ + typedef tag::weighted_extended_p_square_quantile_quadratic type; +}; +template<> +struct feature_of<tag::weighted_extended_p_square_quantile_quadratic> + : feature_of<tag::extended_p_square_quantile_quadratic> +{ +}; + +}} // namespace boost::accumulators + +#ifdef _MSC_VER +# pragma warning(pop) +#endif + +#endif diff --git a/boost/accumulators/statistics/kurtosis.hpp b/boost/accumulators/statistics/kurtosis.hpp new file mode 100644 index 0000000000..4c5c23d5d7 --- /dev/null +++ b/boost/accumulators/statistics/kurtosis.hpp @@ -0,0 +1,112 @@ +/////////////////////////////////////////////////////////////////////////////// +// kurtosis.hpp +// +// Copyright 2006 Olivier Gygi, Daniel Egloff. Distributed under the Boost +// Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#ifndef BOOST_ACCUMULATORS_STATISTICS_KURTOSIS_HPP_EAN_28_10_2005 +#define BOOST_ACCUMULATORS_STATISTICS_KURTOSIS_HPP_EAN_28_10_2005 + +#include <limits> +#include <boost/mpl/placeholders.hpp> +#include <boost/accumulators/framework/accumulator_base.hpp> +#include <boost/accumulators/framework/extractor.hpp> +#include <boost/accumulators/framework/parameters/sample.hpp> +#include <boost/accumulators/numeric/functional.hpp> +#include <boost/accumulators/framework/depends_on.hpp> +#include <boost/accumulators/statistics/mean.hpp> +#include <boost/accumulators/statistics/moment.hpp> + +namespace boost { namespace accumulators +{ + +namespace impl +{ + /////////////////////////////////////////////////////////////////////////////// + // kurtosis_impl + /** + @brief Kurtosis estimation + + The kurtosis of a sample distribution is defined as the ratio of the 4th central moment and the square of the 2nd central + moment (the variance) of the samples, minus 3. The term \f$ -3 \f$ is added in order to ensure that the normal distribution + has zero kurtosis. The kurtosis can also be expressed by the simple moments: + + \f[ + \hat{g}_2 = + \frac + {\widehat{m}_n^{(4)}-4\widehat{m}_n^{(3)}\hat{\mu}_n+6\widehat{m}_n^{(2)}\hat{\mu}_n^2-3\hat{\mu}_n^4} + {\left(\widehat{m}_n^{(2)} - \hat{\mu}_n^{2}\right)^2} - 3, + \f] + + where \f$ \widehat{m}_n^{(i)} \f$ are the \f$ i \f$-th moment and \f$ \hat{\mu}_n \f$ the mean (first moment) of the + \f$ n \f$ samples. + */ + template<typename Sample> + struct kurtosis_impl + : accumulator_base + { + // for boost::result_of + typedef typename numeric::functional::average<Sample, Sample>::result_type result_type; + + kurtosis_impl(dont_care) {} + + template<typename Args> + result_type result(Args const &args) const + { + return numeric::average( + accumulators::moment<4>(args) + - 4. * accumulators::moment<3>(args) * mean(args) + + 6. * accumulators::moment<2>(args) * mean(args) * mean(args) + - 3. * mean(args) * mean(args) * mean(args) * mean(args) + , ( accumulators::moment<2>(args) - mean(args) * mean(args) ) + * ( accumulators::moment<2>(args) - mean(args) * mean(args) ) + ) - 3.; + } + }; + +} // namespace impl + +/////////////////////////////////////////////////////////////////////////////// +// tag::kurtosis +// +namespace tag +{ + struct kurtosis + : depends_on<mean, moment<2>, moment<3>, moment<4> > + { + /// INTERNAL ONLY + /// + typedef accumulators::impl::kurtosis_impl<mpl::_1> impl; + }; +} + +/////////////////////////////////////////////////////////////////////////////// +// extract::kurtosis +// +namespace extract +{ + extractor<tag::kurtosis> const kurtosis = {}; + + BOOST_ACCUMULATORS_IGNORE_GLOBAL(kurtosis) +} + +using extract::kurtosis; + +// So that kurtosis can be automatically substituted with +// weighted_kurtosis when the weight parameter is non-void +template<> +struct as_weighted_feature<tag::kurtosis> +{ + typedef tag::weighted_kurtosis type; +}; + +template<> +struct feature_of<tag::weighted_kurtosis> + : feature_of<tag::kurtosis> +{ +}; + +}} // namespace boost::accumulators + +#endif diff --git a/boost/accumulators/statistics/max.hpp b/boost/accumulators/statistics/max.hpp new file mode 100644 index 0000000000..820f6593f0 --- /dev/null +++ b/boost/accumulators/statistics/max.hpp @@ -0,0 +1,85 @@ +/////////////////////////////////////////////////////////////////////////////// +// max.hpp +// +// Copyright 2005 Eric Niebler. Distributed under the Boost +// Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#ifndef BOOST_ACCUMULATORS_STATISTICS_MAX_HPP_EAN_28_10_2005 +#define BOOST_ACCUMULATORS_STATISTICS_MAX_HPP_EAN_28_10_2005 + +#include <limits> +#include <boost/mpl/placeholders.hpp> +#include <boost/accumulators/framework/accumulator_base.hpp> +#include <boost/accumulators/framework/extractor.hpp> +#include <boost/accumulators/framework/parameters/sample.hpp> +#include <boost/accumulators/numeric/functional.hpp> +#include <boost/accumulators/framework/depends_on.hpp> +#include <boost/accumulators/statistics_fwd.hpp> + +namespace boost { namespace accumulators +{ + +namespace impl +{ + /////////////////////////////////////////////////////////////////////////////// + // max_impl + template<typename Sample> + struct max_impl + : accumulator_base + { + // for boost::result_of + typedef Sample result_type; + + template<typename Args> + max_impl(Args const &args) + : max_(numeric::as_min(args[sample | Sample()])) + { + } + + template<typename Args> + void operator ()(Args const &args) + { + numeric::max_assign(this->max_, args[sample]); + } + + result_type result(dont_care) const + { + return this->max_; + } + + private: + Sample max_; + }; + +} // namespace impl + +/////////////////////////////////////////////////////////////////////////////// +// tag::max +// +namespace tag +{ + struct max + : depends_on<> + { + /// INTERNAL ONLY + /// + typedef accumulators::impl::max_impl<mpl::_1> impl; + }; +} + +/////////////////////////////////////////////////////////////////////////////// +// extract::max +// +namespace extract +{ + extractor<tag::max> const max = {}; + + BOOST_ACCUMULATORS_IGNORE_GLOBAL(max) +} + +using extract::max; + +}} // namespace boost::accumulators + +#endif diff --git a/boost/accumulators/statistics/mean.hpp b/boost/accumulators/statistics/mean.hpp new file mode 100644 index 0000000000..0b3ba5e7cd --- /dev/null +++ b/boost/accumulators/statistics/mean.hpp @@ -0,0 +1,298 @@ +/////////////////////////////////////////////////////////////////////////////// +// mean.hpp +// +// Copyright 2005 Eric Niebler. Distributed under the Boost +// Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#ifndef BOOST_ACCUMULATORS_STATISTICS_MEAN_HPP_EAN_28_10_2005 +#define BOOST_ACCUMULATORS_STATISTICS_MEAN_HPP_EAN_28_10_2005 + +#include <boost/mpl/placeholders.hpp> +#include <boost/accumulators/framework/accumulator_base.hpp> +#include <boost/accumulators/framework/extractor.hpp> +#include <boost/accumulators/numeric/functional.hpp> +#include <boost/accumulators/framework/parameters/sample.hpp> +#include <boost/accumulators/framework/depends_on.hpp> +#include <boost/accumulators/statistics_fwd.hpp> +#include <boost/accumulators/statistics/count.hpp> +#include <boost/accumulators/statistics/sum.hpp> + +namespace boost { namespace accumulators +{ + +namespace impl +{ + /////////////////////////////////////////////////////////////////////////////// + // mean_impl + // lazy, by default + template<typename Sample, typename SumFeature> + struct mean_impl + : accumulator_base + { + // for boost::result_of + typedef typename numeric::functional::average<Sample, std::size_t>::result_type result_type; + + mean_impl(dont_care) {} + + template<typename Args> + result_type result(Args const &args) const + { + extractor<SumFeature> sum; + return numeric::average(sum(args), count(args)); + } + }; + + template<typename Sample, typename Tag> + struct immediate_mean_impl + : accumulator_base + { + // for boost::result_of + typedef typename numeric::functional::average<Sample, std::size_t>::result_type result_type; + + template<typename Args> + immediate_mean_impl(Args const &args) + : mean(numeric::average(args[sample | Sample()], numeric::one<std::size_t>::value)) + { + } + + template<typename Args> + void operator ()(Args const &args) + { + std::size_t cnt = count(args); + this->mean = numeric::average( + (this->mean * (cnt - 1)) + args[parameter::keyword<Tag>::get()] + , cnt + ); + } + + result_type result(dont_care) const + { + return this->mean; + } + + private: + result_type mean; + }; + +} // namespace impl + +/////////////////////////////////////////////////////////////////////////////// +// tag::mean +// tag::immediate_mean +// tag::mean_of_weights +// tag::immediate_mean_of_weights +// tag::mean_of_variates +// tag::immediate_mean_of_variates +// +namespace tag +{ + struct mean + : depends_on<count, sum> + { + /// INTERNAL ONLY + /// + typedef accumulators::impl::mean_impl<mpl::_1, sum> impl; + }; + struct immediate_mean + : depends_on<count> + { + /// INTERNAL ONLY + /// + typedef accumulators::impl::immediate_mean_impl<mpl::_1, tag::sample> impl; + }; + struct mean_of_weights + : depends_on<count, sum_of_weights> + { + typedef mpl::true_ is_weight_accumulator; + /// INTERNAL ONLY + /// + typedef accumulators::impl::mean_impl<mpl::_2, sum_of_weights> impl; + }; + struct immediate_mean_of_weights + : depends_on<count> + { + typedef mpl::true_ is_weight_accumulator; + /// INTERNAL ONLY + /// + typedef accumulators::impl::immediate_mean_impl<mpl::_2, tag::weight> impl; + }; + template<typename VariateType, typename VariateTag> + struct mean_of_variates + : depends_on<count, sum_of_variates<VariateType, VariateTag> > + { + /// INTERNAL ONLY + /// + typedef mpl::always<accumulators::impl::mean_impl<VariateType, sum_of_variates<VariateType, VariateTag> > > impl; + }; + template<typename VariateType, typename VariateTag> + struct immediate_mean_of_variates + : depends_on<count> + { + /// INTERNAL ONLY + /// + typedef mpl::always<accumulators::impl::immediate_mean_impl<VariateType, VariateTag> > impl; + }; +} + +/////////////////////////////////////////////////////////////////////////////// +// extract::mean +// extract::mean_of_weights +// extract::mean_of_variates +// +namespace extract +{ + extractor<tag::mean> const mean = {}; + extractor<tag::mean_of_weights> const mean_of_weights = {}; + BOOST_ACCUMULATORS_DEFINE_EXTRACTOR(tag, mean_of_variates, (typename)(typename)) + + BOOST_ACCUMULATORS_IGNORE_GLOBAL(mean) + BOOST_ACCUMULATORS_IGNORE_GLOBAL(mean_of_weights) +} + +using extract::mean; +using extract::mean_of_weights; +using extract::mean_of_variates; + +// mean(lazy) -> mean +template<> +struct as_feature<tag::mean(lazy)> +{ + typedef tag::mean type; +}; + +// mean(immediate) -> immediate_mean +template<> +struct as_feature<tag::mean(immediate)> +{ + typedef tag::immediate_mean type; +}; + +// mean_of_weights(lazy) -> mean_of_weights +template<> +struct as_feature<tag::mean_of_weights(lazy)> +{ + typedef tag::mean_of_weights type; +}; + +// mean_of_weights(immediate) -> immediate_mean_of_weights +template<> +struct as_feature<tag::mean_of_weights(immediate)> +{ + typedef tag::immediate_mean_of_weights type; +}; + +// mean_of_variates<VariateType, VariateTag>(lazy) -> mean_of_variates<VariateType, VariateTag> +template<typename VariateType, typename VariateTag> +struct as_feature<tag::mean_of_variates<VariateType, VariateTag>(lazy)> +{ + typedef tag::mean_of_variates<VariateType, VariateTag> type; +}; + +// mean_of_variates<VariateType, VariateTag>(immediate) -> immediate_mean_of_variates<VariateType, VariateTag> +template<typename VariateType, typename VariateTag> +struct as_feature<tag::mean_of_variates<VariateType, VariateTag>(immediate)> +{ + typedef tag::immediate_mean_of_variates<VariateType, VariateTag> type; +}; + +// for the purposes of feature-based dependency resolution, +// immediate_mean provides the same feature as mean +template<> +struct feature_of<tag::immediate_mean> + : feature_of<tag::mean> +{ +}; + +// for the purposes of feature-based dependency resolution, +// immediate_mean provides the same feature as mean +template<> +struct feature_of<tag::immediate_mean_of_weights> + : feature_of<tag::mean_of_weights> +{ +}; + +// for the purposes of feature-based dependency resolution, +// immediate_mean provides the same feature as mean +template<typename VariateType, typename VariateTag> +struct feature_of<tag::immediate_mean_of_variates<VariateType, VariateTag> > + : feature_of<tag::mean_of_variates<VariateType, VariateTag> > +{ +}; + +// So that mean can be automatically substituted with +// weighted_mean when the weight parameter is non-void. +template<> +struct as_weighted_feature<tag::mean> +{ + typedef tag::weighted_mean type; +}; + +template<> +struct feature_of<tag::weighted_mean> + : feature_of<tag::mean> +{}; + +// So that immediate_mean can be automatically substituted with +// immediate_weighted_mean when the weight parameter is non-void. +template<> +struct as_weighted_feature<tag::immediate_mean> +{ + typedef tag::immediate_weighted_mean type; +}; + +template<> +struct feature_of<tag::immediate_weighted_mean> + : feature_of<tag::immediate_mean> +{}; + +// So that mean_of_weights<> can be automatically substituted with +// weighted_mean_of_variates<> when the weight parameter is non-void. +template<typename VariateType, typename VariateTag> +struct as_weighted_feature<tag::mean_of_variates<VariateType, VariateTag> > +{ + typedef tag::weighted_mean_of_variates<VariateType, VariateTag> type; +}; + +template<typename VariateType, typename VariateTag> +struct feature_of<tag::weighted_mean_of_variates<VariateType, VariateTag> > + : feature_of<tag::mean_of_variates<VariateType, VariateTag> > +{ +}; + +// So that immediate_mean_of_weights<> can be automatically substituted with +// immediate_weighted_mean_of_variates<> when the weight parameter is non-void. +template<typename VariateType, typename VariateTag> +struct as_weighted_feature<tag::immediate_mean_of_variates<VariateType, VariateTag> > +{ + typedef tag::immediate_weighted_mean_of_variates<VariateType, VariateTag> type; +}; + +template<typename VariateType, typename VariateTag> +struct feature_of<tag::immediate_weighted_mean_of_variates<VariateType, VariateTag> > + : feature_of<tag::immediate_mean_of_variates<VariateType, VariateTag> > +{ +}; + +//////////////////////////////////////////////////////////////////////////// +//// droppable_accumulator<mean_impl> +//// need to specialize droppable lazy mean to cache the result at the +//// point the accumulator is dropped. +///// INTERNAL ONLY +///// +//template<typename Sample, typename SumFeature> +//struct droppable_accumulator<impl::mean_impl<Sample, SumFeature> > +// : droppable_accumulator_base< +// with_cached_result<impl::mean_impl<Sample, SumFeature> > +// > +//{ +// template<typename Args> +// droppable_accumulator(Args const &args) +// : droppable_accumulator::base(args) +// { +// } +//}; + +}} // namespace boost::accumulators + +#endif diff --git a/boost/accumulators/statistics/median.hpp b/boost/accumulators/statistics/median.hpp new file mode 100644 index 0000000000..13ebb28621 --- /dev/null +++ b/boost/accumulators/statistics/median.hpp @@ -0,0 +1,301 @@ +/////////////////////////////////////////////////////////////////////////////// +// median.hpp +// +// Copyright 2006 Eric Niebler, Olivier Gygi. Distributed under the Boost +// Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#ifndef BOOST_ACCUMULATORS_STATISTICS_MEDIAN_HPP_EAN_28_10_2005 +#define BOOST_ACCUMULATORS_STATISTICS_MEDIAN_HPP_EAN_28_10_2005 + +#include <boost/mpl/placeholders.hpp> +#include <boost/range/iterator_range.hpp> +#include <boost/accumulators/framework/accumulator_base.hpp> +#include <boost/accumulators/framework/extractor.hpp> +#include <boost/accumulators/numeric/functional.hpp> +#include <boost/accumulators/framework/parameters/sample.hpp> +#include <boost/accumulators/framework/depends_on.hpp> +#include <boost/accumulators/statistics_fwd.hpp> +#include <boost/accumulators/statistics/count.hpp> +#include <boost/accumulators/statistics/p_square_quantile.hpp> +#include <boost/accumulators/statistics/density.hpp> +#include <boost/accumulators/statistics/p_square_cumulative_distribution.hpp> + +namespace boost { namespace accumulators +{ + +namespace impl +{ + /////////////////////////////////////////////////////////////////////////////// + // median_impl + // + /** + @brief Median estimation based on the \f$P^2\f$ quantile estimator + + The \f$P^2\f$ algorithm is invoked with a quantile probability of 0.5. + */ + template<typename Sample> + struct median_impl + : accumulator_base + { + // for boost::result_of + typedef typename numeric::functional::average<Sample, std::size_t>::result_type result_type; + + median_impl(dont_care) {} + + template<typename Args> + result_type result(Args const &args) const + { + return p_square_quantile_for_median(args); + } + }; + /////////////////////////////////////////////////////////////////////////////// + // with_density_median_impl + // + /** + @brief Median estimation based on the density estimator + + The algorithm determines the bin in which the \f$0.5*cnt\f$-th sample lies, \f$cnt\f$ being + the total number of samples. It returns the approximate horizontal position of this sample, + based on a linear interpolation inside the bin. + */ + template<typename Sample> + struct with_density_median_impl + : accumulator_base + { + typedef typename numeric::functional::average<Sample, std::size_t>::result_type float_type; + typedef std::vector<std::pair<float_type, float_type> > histogram_type; + typedef iterator_range<typename histogram_type::iterator> range_type; + // for boost::result_of + typedef float_type result_type; + + template<typename Args> + with_density_median_impl(Args const &args) + : sum(numeric::average(args[sample | Sample()], (std::size_t)1)) + , is_dirty(true) + { + } + + void operator ()(dont_care) + { + this->is_dirty = true; + } + + + template<typename Args> + result_type result(Args const &args) const + { + if (this->is_dirty) + { + this->is_dirty = false; + + std::size_t cnt = count(args); + range_type histogram = density(args); + typename range_type::iterator it = histogram.begin(); + while (this->sum < 0.5 * cnt) + { + this->sum += it->second * cnt; + ++it; + } + --it; + float_type over = numeric::average(this->sum - 0.5 * cnt, it->second * cnt); + this->median = it->first * over + (it + 1)->first * (1. - over); + } + + return this->median; + } + + private: + mutable float_type sum; + mutable bool is_dirty; + mutable float_type median; + }; + + /////////////////////////////////////////////////////////////////////////////// + // with_p_square_cumulative_distribution_median_impl + // + /** + @brief Median estimation based on the \f$P^2\f$ cumulative distribution estimator + + The algorithm determines the first (leftmost) bin with a height exceeding 0.5. It + returns the approximate horizontal position of where the cumulative distribution + equals 0.5, based on a linear interpolation inside the bin. + */ + template<typename Sample> + struct with_p_square_cumulative_distribution_median_impl + : accumulator_base + { + typedef typename numeric::functional::average<Sample, std::size_t>::result_type float_type; + typedef std::vector<std::pair<float_type, float_type> > histogram_type; + typedef iterator_range<typename histogram_type::iterator> range_type; + // for boost::result_of + typedef float_type result_type; + + with_p_square_cumulative_distribution_median_impl(dont_care) + : is_dirty(true) + { + } + + void operator ()(dont_care) + { + this->is_dirty = true; + } + + template<typename Args> + result_type result(Args const &args) const + { + if (this->is_dirty) + { + this->is_dirty = false; + + range_type histogram = p_square_cumulative_distribution(args); + typename range_type::iterator it = histogram.begin(); + while (it->second < 0.5) + { + ++it; + } + float_type over = numeric::average(it->second - 0.5, it->second - (it - 1)->second); + this->median = it->first * over + (it + 1)->first * ( 1. - over ); + } + + return this->median; + } + private: + + mutable bool is_dirty; + mutable float_type median; + }; + +} // namespace impl + +/////////////////////////////////////////////////////////////////////////////// +// tag::median +// tag::with_densisty_median +// tag::with_p_square_cumulative_distribution_median +// +namespace tag +{ + struct median + : depends_on<p_square_quantile_for_median> + { + /// INTERNAL ONLY + /// + typedef accumulators::impl::median_impl<mpl::_1> impl; + }; + struct with_density_median + : depends_on<count, density> + { + /// INTERNAL ONLY + /// + typedef accumulators::impl::with_density_median_impl<mpl::_1> impl; + }; + struct with_p_square_cumulative_distribution_median + : depends_on<p_square_cumulative_distribution> + { + /// INTERNAL ONLY + /// + typedef accumulators::impl::with_p_square_cumulative_distribution_median_impl<mpl::_1> impl; + }; +} + +/////////////////////////////////////////////////////////////////////////////// +// extract::median +// extract::with_density_median +// extract::with_p_square_cumulative_distribution_median +// +namespace extract +{ + extractor<tag::median> const median = {}; + extractor<tag::with_density_median> const with_density_median = {}; + extractor<tag::with_p_square_cumulative_distribution_median> const with_p_square_cumulative_distribution_median = {}; + + BOOST_ACCUMULATORS_IGNORE_GLOBAL(median) + BOOST_ACCUMULATORS_IGNORE_GLOBAL(with_density_median) + BOOST_ACCUMULATORS_IGNORE_GLOBAL(with_p_square_cumulative_distribution_median) +} + +using extract::median; +using extract::with_density_median; +using extract::with_p_square_cumulative_distribution_median; + +// median(with_p_square_quantile) -> median +template<> +struct as_feature<tag::median(with_p_square_quantile)> +{ + typedef tag::median type; +}; + +// median(with_density) -> with_density_median +template<> +struct as_feature<tag::median(with_density)> +{ + typedef tag::with_density_median type; +}; + +// median(with_p_square_cumulative_distribution) -> with_p_square_cumulative_distribution_median +template<> +struct as_feature<tag::median(with_p_square_cumulative_distribution)> +{ + typedef tag::with_p_square_cumulative_distribution_median type; +}; + +// for the purposes of feature-based dependency resolution, +// with_density_median and with_p_square_cumulative_distribution_median +// provide the same feature as median +template<> +struct feature_of<tag::with_density_median> + : feature_of<tag::median> +{ +}; + +template<> +struct feature_of<tag::with_p_square_cumulative_distribution_median> + : feature_of<tag::median> +{ +}; + +// So that median can be automatically substituted with +// weighted_median when the weight parameter is non-void. +template<> +struct as_weighted_feature<tag::median> +{ + typedef tag::weighted_median type; +}; + +template<> +struct feature_of<tag::weighted_median> + : feature_of<tag::median> +{ +}; + +// So that with_density_median can be automatically substituted with +// with_density_weighted_median when the weight parameter is non-void. +template<> +struct as_weighted_feature<tag::with_density_median> +{ + typedef tag::with_density_weighted_median type; +}; + +template<> +struct feature_of<tag::with_density_weighted_median> + : feature_of<tag::with_density_median> +{ +}; + +// So that with_p_square_cumulative_distribution_median can be automatically substituted with +// with_p_square_cumulative_distribution_weighted_median when the weight parameter is non-void. +template<> +struct as_weighted_feature<tag::with_p_square_cumulative_distribution_median> +{ + typedef tag::with_p_square_cumulative_distribution_weighted_median type; +}; + +template<> +struct feature_of<tag::with_p_square_cumulative_distribution_weighted_median> + : feature_of<tag::with_p_square_cumulative_distribution_median> +{ +}; + +}} // namespace boost::accumulators + +#endif diff --git a/boost/accumulators/statistics/min.hpp b/boost/accumulators/statistics/min.hpp new file mode 100644 index 0000000000..83943bdb3c --- /dev/null +++ b/boost/accumulators/statistics/min.hpp @@ -0,0 +1,85 @@ +/////////////////////////////////////////////////////////////////////////////// +// min.hpp +// +// Copyright 2005 Eric Niebler. Distributed under the Boost +// Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#ifndef BOOST_ACCUMULATORS_STATISTICS_MIN_HPP_EAN_28_10_2005 +#define BOOST_ACCUMULATORS_STATISTICS_MIN_HPP_EAN_28_10_2005 + +#include <limits> +#include <boost/mpl/placeholders.hpp> +#include <boost/accumulators/framework/accumulator_base.hpp> +#include <boost/accumulators/framework/extractor.hpp> +#include <boost/accumulators/framework/parameters/sample.hpp> +#include <boost/accumulators/numeric/functional.hpp> +#include <boost/accumulators/framework/depends_on.hpp> +#include <boost/accumulators/statistics_fwd.hpp> + +namespace boost { namespace accumulators +{ + +namespace impl +{ + /////////////////////////////////////////////////////////////////////////////// + // min_impl + template<typename Sample> + struct min_impl + : accumulator_base + { + // for boost::result_of + typedef Sample result_type; + + template<typename Args> + min_impl(Args const &args) + : min_(numeric::as_max(args[sample | Sample()])) + { + } + + template<typename Args> + void operator ()(Args const &args) + { + numeric::min_assign(this->min_, args[sample]); + } + + result_type result(dont_care) const + { + return this->min_; + } + + private: + Sample min_; + }; + +} // namespace impl + +/////////////////////////////////////////////////////////////////////////////// +// tag::min +// +namespace tag +{ + struct min + : depends_on<> + { + /// INTERNAL ONLY + /// + typedef accumulators::impl::min_impl<mpl::_1> impl; + }; +} + +/////////////////////////////////////////////////////////////////////////////// +// extract::min +// +namespace extract +{ + extractor<tag::min> const min = {}; + + BOOST_ACCUMULATORS_IGNORE_GLOBAL(min) +} + +using extract::min; + +}} // namespace boost::accumulators + +#endif diff --git a/boost/accumulators/statistics/moment.hpp b/boost/accumulators/statistics/moment.hpp new file mode 100644 index 0000000000..20421730c3 --- /dev/null +++ b/boost/accumulators/statistics/moment.hpp @@ -0,0 +1,125 @@ +/////////////////////////////////////////////////////////////////////////////// +// moment.hpp +// +// Copyright 2005 Eric Niebler. Distributed under the Boost +// Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#ifndef BOOST_ACCUMULATORS_STATISTICS_MOMENT_HPP_EAN_15_11_2005 +#define BOOST_ACCUMULATORS_STATISTICS_MOMENT_HPP_EAN_15_11_2005 + +#include <boost/config/no_tr1/cmath.hpp> +#include <boost/mpl/int.hpp> +#include <boost/mpl/assert.hpp> +#include <boost/mpl/placeholders.hpp> +#include <boost/accumulators/framework/accumulator_base.hpp> +#include <boost/accumulators/framework/extractor.hpp> +#include <boost/accumulators/numeric/functional.hpp> +#include <boost/accumulators/framework/parameters/sample.hpp> +#include <boost/accumulators/framework/depends_on.hpp> +#include <boost/accumulators/statistics_fwd.hpp> +#include <boost/accumulators/statistics/count.hpp> + +namespace boost { namespace numeric +{ + /// INTERNAL ONLY + /// + template<typename T> + T const &pow(T const &x, mpl::int_<1>) + { + return x; + } + + /// INTERNAL ONLY + /// + template<typename T, int N> + T pow(T const &x, mpl::int_<N>) + { + using namespace operators; + T y = numeric::pow(x, mpl::int_<N/2>()); + T z = y * y; + return (N % 2) ? (z * x) : z; + } +}} + +namespace boost { namespace accumulators +{ + +namespace impl +{ + /////////////////////////////////////////////////////////////////////////////// + // moment_impl + template<typename N, typename Sample> + struct moment_impl + : accumulator_base // TODO: also depends_on sum of powers + { + BOOST_MPL_ASSERT_RELATION(N::value, >, 0); + // for boost::result_of + typedef typename numeric::functional::average<Sample, std::size_t>::result_type result_type; + + template<typename Args> + moment_impl(Args const &args) + : sum(args[sample | Sample()]) + { + } + + template<typename Args> + void operator ()(Args const &args) + { + this->sum += numeric::pow(args[sample], N()); + } + + template<typename Args> + result_type result(Args const &args) const + { + return numeric::average(this->sum, count(args)); + } + + private: + Sample sum; + }; + +} // namespace impl + +/////////////////////////////////////////////////////////////////////////////// +// tag::moment +// +namespace tag +{ + template<int N> + struct moment + : depends_on<count> + { + /// INTERNAL ONLY + /// + typedef accumulators::impl::moment_impl<mpl::int_<N>, mpl::_1> impl; + }; +} + +/////////////////////////////////////////////////////////////////////////////// +// extract::moment +// +namespace extract +{ + BOOST_ACCUMULATORS_DEFINE_EXTRACTOR(tag, moment, (int)) +} + +using extract::moment; + +// So that moment<N> can be automatically substituted with +// weighted_moment<N> when the weight parameter is non-void +template<int N> +struct as_weighted_feature<tag::moment<N> > +{ + typedef tag::weighted_moment<N> type; +}; + +template<int N> +struct feature_of<tag::weighted_moment<N> > + : feature_of<tag::moment<N> > +{ +}; + +}} // namespace boost::accumulators + +#endif diff --git a/boost/accumulators/statistics/p_square_cumulative_distribution.hpp b/boost/accumulators/statistics/p_square_cumulative_distribution.hpp new file mode 100644 index 0000000000..437469c65b --- /dev/null +++ b/boost/accumulators/statistics/p_square_cumulative_distribution.hpp @@ -0,0 +1,260 @@ +/////////////////////////////////////////////////////////////////////////////// +// p_square_cumulative_distribution.hpp +// +// Copyright 2005 Daniel Egloff, Olivier Gygi. Distributed under the Boost +// Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#ifndef BOOST_ACCUMULATORS_STATISTICS_P_SQUARE_CUMULATIVE_DISTRIBUTION_HPP_DE_01_01_2006 +#define BOOST_ACCUMULATORS_STATISTICS_P_SQUARE_CUMULATIVE_DISTRIBUTION_HPP_DE_01_01_2006 + +#include <vector> +#include <functional> +#include <boost/parameter/keyword.hpp> +#include <boost/range.hpp> +#include <boost/mpl/placeholders.hpp> +#include <boost/accumulators/framework/accumulator_base.hpp> +#include <boost/accumulators/framework/extractor.hpp> +#include <boost/accumulators/numeric/functional.hpp> +#include <boost/accumulators/framework/parameters/sample.hpp> +#include <boost/accumulators/statistics_fwd.hpp> +#include <boost/accumulators/statistics/count.hpp> + +namespace boost { namespace accumulators +{ +/////////////////////////////////////////////////////////////////////////////// +// num_cells named parameter +// +BOOST_PARAMETER_NESTED_KEYWORD(tag, p_square_cumulative_distribution_num_cells, num_cells) + +namespace impl +{ + /////////////////////////////////////////////////////////////////////////////// + // p_square_cumulative_distribution_impl + // cumulative_distribution calculation (as histogram) + /** + @brief Histogram calculation of the cumulative distribution with the \f$P^2\f$ algorithm + + A histogram of the sample cumulative distribution is computed dynamically without storing samples + based on the \f$ P^2 \f$ algorithm. The returned histogram has a specifiable amount (num_cells) + equiprobable (and not equal-sized) cells. + + For further details, see + + R. Jain and I. Chlamtac, The P^2 algorithmus for dynamic calculation of quantiles and + histograms without storing observations, Communications of the ACM, + Volume 28 (October), Number 10, 1985, p. 1076-1085. + + @param p_square_cumulative_distribution_num_cells. + */ + template<typename Sample> + struct p_square_cumulative_distribution_impl + : accumulator_base + { + typedef typename numeric::functional::average<Sample, std::size_t>::result_type float_type; + typedef std::vector<float_type> array_type; + typedef std::vector<std::pair<float_type, float_type> > histogram_type; + // for boost::result_of + typedef iterator_range<typename histogram_type::iterator> result_type; + + template<typename Args> + p_square_cumulative_distribution_impl(Args const &args) + : num_cells(args[p_square_cumulative_distribution_num_cells]) + , heights(num_cells + 1) + , actual_positions(num_cells + 1) + , desired_positions(num_cells + 1) + , positions_increments(num_cells + 1) + , histogram(num_cells + 1) + , is_dirty(true) + { + std::size_t b = this->num_cells; + + for (std::size_t i = 0; i < b + 1; ++i) + { + this->actual_positions[i] = i + 1.; + this->desired_positions[i] = i + 1.; + this->positions_increments[i] = numeric::average(i, b); + } + } + + template<typename Args> + void operator ()(Args const &args) + { + this->is_dirty = true; + + std::size_t cnt = count(args); + std::size_t sample_cell = 1; // k + std::size_t b = this->num_cells; + + // accumulate num_cells + 1 first samples + if (cnt <= b + 1) + { + this->heights[cnt - 1] = args[sample]; + + // complete the initialization of heights by sorting + if (cnt == b + 1) + { + std::sort(this->heights.begin(), this->heights.end()); + } + } + else + { + // find cell k such that heights[k-1] <= args[sample] < heights[k] and adjust extreme values + if (args[sample] < this->heights[0]) + { + this->heights[0] = args[sample]; + sample_cell = 1; + } + else if (this->heights[b] <= args[sample]) + { + this->heights[b] = args[sample]; + sample_cell = b; + } + else + { + typename array_type::iterator it; + it = std::upper_bound( + this->heights.begin() + , this->heights.end() + , args[sample] + ); + + sample_cell = std::distance(this->heights.begin(), it); + } + + // increment positions of markers above sample_cell + for (std::size_t i = sample_cell; i < b + 1; ++i) + { + ++this->actual_positions[i]; + } + + // update desired position of markers 2 to num_cells + 1 + // (desired position of first marker is always 1) + for (std::size_t i = 1; i < b + 1; ++i) + { + this->desired_positions[i] += this->positions_increments[i]; + } + + // adjust heights of markers 2 to num_cells if necessary + for (std::size_t i = 1; i < b; ++i) + { + // offset to desire position + float_type d = this->desired_positions[i] - this->actual_positions[i]; + + // offset to next position + float_type dp = this->actual_positions[i + 1] - this->actual_positions[i]; + + // offset to previous position + float_type dm = this->actual_positions[i - 1] - this->actual_positions[i]; + + // height ds + float_type hp = (this->heights[i + 1] - this->heights[i]) / dp; + float_type hm = (this->heights[i - 1] - this->heights[i]) / dm; + + if ( ( d >= 1. && dp > 1. ) || ( d <= -1. && dm < -1. ) ) + { + short sign_d = static_cast<short>(d / std::abs(d)); + + // try adjusting heights[i] using p-squared formula + float_type h = this->heights[i] + sign_d / (dp - dm) * ( (sign_d - dm) * hp + (dp - sign_d) * hm ); + + if ( this->heights[i - 1] < h && h < this->heights[i + 1] ) + { + this->heights[i] = h; + } + else + { + // use linear formula + if (d>0) + { + this->heights[i] += hp; + } + if (d<0) + { + this->heights[i] -= hm; + } + } + this->actual_positions[i] += sign_d; + } + } + } + } + + template<typename Args> + result_type result(Args const &args) const + { + if (this->is_dirty) + { + this->is_dirty = false; + + // creates a vector of std::pair where each pair i holds + // the values heights[i] (x-axis of histogram) and + // actual_positions[i] / cnt (y-axis of histogram) + + std::size_t cnt = count(args); + + for (std::size_t i = 0; i < this->histogram.size(); ++i) + { + this->histogram[i] = std::make_pair(this->heights[i], numeric::average(this->actual_positions[i], cnt)); + } + } + //return histogram; + return make_iterator_range(this->histogram); + } + + private: + std::size_t num_cells; // number of cells b + array_type heights; // q_i + array_type actual_positions; // n_i + array_type desired_positions; // n'_i + array_type positions_increments; // dn'_i + mutable histogram_type histogram; // histogram + mutable bool is_dirty; + }; + +} // namespace detail + +/////////////////////////////////////////////////////////////////////////////// +// tag::p_square_cumulative_distribution +// +namespace tag +{ + struct p_square_cumulative_distribution + : depends_on<count> + , p_square_cumulative_distribution_num_cells + { + /// INTERNAL ONLY + /// + typedef accumulators::impl::p_square_cumulative_distribution_impl<mpl::_1> impl; + }; +} + +/////////////////////////////////////////////////////////////////////////////// +// extract::p_square_cumulative_distribution +// +namespace extract +{ + extractor<tag::p_square_cumulative_distribution> const p_square_cumulative_distribution = {}; + + BOOST_ACCUMULATORS_IGNORE_GLOBAL(p_square_cumulative_distribution) +} + +using extract::p_square_cumulative_distribution; + +// So that p_square_cumulative_distribution can be automatically substituted with +// weighted_p_square_cumulative_distribution when the weight parameter is non-void +template<> +struct as_weighted_feature<tag::p_square_cumulative_distribution> +{ + typedef tag::weighted_p_square_cumulative_distribution type; +}; + +template<> +struct feature_of<tag::weighted_p_square_cumulative_distribution> + : feature_of<tag::p_square_cumulative_distribution> +{ +}; + +}} // namespace boost::accumulators + +#endif diff --git a/boost/accumulators/statistics/p_square_quantile.hpp b/boost/accumulators/statistics/p_square_quantile.hpp new file mode 100644 index 0000000000..844dfa7728 --- /dev/null +++ b/boost/accumulators/statistics/p_square_quantile.hpp @@ -0,0 +1,257 @@ +/////////////////////////////////////////////////////////////////////////////// +// p_square_quantile.hpp +// +// Copyright 2005 Daniel Egloff. Distributed under the Boost +// Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#ifndef BOOST_ACCUMULATORS_STATISTICS_P_SQUARE_QUANTILE_HPP_DE_01_01_2006 +#define BOOST_ACCUMULATORS_STATISTICS_P_SQUARE_QUANTILE_HPP_DE_01_01_2006 + +#include <cmath> +#include <functional> +#include <boost/array.hpp> +#include <boost/mpl/placeholders.hpp> +#include <boost/type_traits/is_same.hpp> +#include <boost/parameter/keyword.hpp> +#include <boost/accumulators/framework/accumulator_base.hpp> +#include <boost/accumulators/framework/extractor.hpp> +#include <boost/accumulators/numeric/functional.hpp> +#include <boost/accumulators/framework/parameters/sample.hpp> +#include <boost/accumulators/framework/depends_on.hpp> +#include <boost/accumulators/statistics_fwd.hpp> +#include <boost/accumulators/statistics/count.hpp> +#include <boost/accumulators/statistics/parameters/quantile_probability.hpp> + +namespace boost { namespace accumulators +{ + +namespace impl +{ + /////////////////////////////////////////////////////////////////////////////// + // p_square_quantile_impl + // single quantile estimation + /** + @brief Single quantile estimation with the \f$P^2\f$ algorithm + + The \f$P^2\f$ algorithm estimates a quantile dynamically without storing samples. Instead of + storing the whole sample cumulative distribution, only five points (markers) are stored. The heights + of these markers are the minimum and the maximum of the samples and the current estimates of the + \f$(p/2)\f$-, \f$p\f$- and \f$(1+p)/2\f$-quantiles. Their positions are equal to the number + of samples that are smaller or equal to the markers. Each time a new samples is recorded, the + positions of the markers are updated and if necessary their heights are adjusted using a piecewise- + parabolic formula. + + For further details, see + + R. Jain and I. Chlamtac, The P^2 algorithmus fordynamic calculation of quantiles and + histograms without storing observations, Communications of the ACM, + Volume 28 (October), Number 10, 1985, p. 1076-1085. + + @param quantile_probability + */ + template<typename Sample, typename Impl> + struct p_square_quantile_impl + : accumulator_base + { + typedef typename numeric::functional::average<Sample, std::size_t>::result_type float_type; + typedef array<float_type, 5> array_type; + // for boost::result_of + typedef float_type result_type; + + template<typename Args> + p_square_quantile_impl(Args const &args) + : p(is_same<Impl, for_median>::value ? 0.5 : args[quantile_probability | 0.5]) + , heights() + , actual_positions() + , desired_positions() + , positions_increments() + { + for(std::size_t i = 0; i < 5; ++i) + { + this->actual_positions[i] = i + 1; + } + + this->desired_positions[0] = 1.; + this->desired_positions[1] = 1. + 2. * this->p; + this->desired_positions[2] = 1. + 4. * this->p; + this->desired_positions[3] = 3. + 2. * this->p; + this->desired_positions[4] = 5.; + + this->positions_increments[0] = 0.; + this->positions_increments[1] = this->p / 2.; + this->positions_increments[2] = this->p; + this->positions_increments[3] = (1. + this->p) / 2.; + this->positions_increments[4] = 1.; + } + + template<typename Args> + void operator ()(Args const &args) + { + std::size_t cnt = count(args); + + // accumulate 5 first samples + if(cnt <= 5) + { + this->heights[cnt - 1] = args[sample]; + + // complete the initialization of heights by sorting + if(cnt == 5) + { + std::sort(this->heights.begin(), this->heights.end()); + } + } + else + { + std::size_t sample_cell = 1; // k + + // find cell k such that heights[k-1] <= args[sample] < heights[k] and ajust extreme values + if (args[sample] < this->heights[0]) + { + this->heights[0] = args[sample]; + sample_cell = 1; + } + else if (this->heights[4] <= args[sample]) + { + this->heights[4] = args[sample]; + sample_cell = 4; + } + else + { + typedef typename array_type::iterator iterator; + iterator it = std::upper_bound( + this->heights.begin() + , this->heights.end() + , args[sample] + ); + + sample_cell = std::distance(this->heights.begin(), it); + } + + // update positions of markers above sample_cell + for(std::size_t i = sample_cell; i < 5; ++i) + { + ++this->actual_positions[i]; + } + + // update desired positions of all markers + for(std::size_t i = 0; i < 5; ++i) + { + this->desired_positions[i] += this->positions_increments[i]; + } + + // adjust heights and actual positions of markers 1 to 3 if necessary + for(std::size_t i = 1; i <= 3; ++i) + { + // offset to desired positions + float_type d = this->desired_positions[i] - this->actual_positions[i]; + + // offset to next position + float_type dp = this->actual_positions[i + 1] - this->actual_positions[i]; + + // offset to previous position + float_type dm = this->actual_positions[i - 1] - this->actual_positions[i]; + + // height ds + float_type hp = (this->heights[i + 1] - this->heights[i]) / dp; + float_type hm = (this->heights[i - 1] - this->heights[i]) / dm; + + if((d >= 1. && dp > 1.) || (d <= -1. && dm < -1.)) + { + short sign_d = static_cast<short>(d / std::abs(d)); + + // try adjusting heights[i] using p-squared formula + float_type h = this->heights[i] + sign_d / (dp - dm) * ((sign_d - dm) * hp + + (dp - sign_d) * hm); + + if(this->heights[i - 1] < h && h < this->heights[i + 1]) + { + this->heights[i] = h; + } + else + { + // use linear formula + if(d > 0) + { + this->heights[i] += hp; + } + if(d < 0) + { + this->heights[i] -= hm; + } + } + this->actual_positions[i] += sign_d; + } + } + } + } + + result_type result(dont_care) const + { + return this->heights[2]; + } + + private: + float_type p; // the quantile probability p + array_type heights; // q_i + array_type actual_positions; // n_i + array_type desired_positions; // n'_i + array_type positions_increments; // dn'_i + }; + +} // namespace detail + +/////////////////////////////////////////////////////////////////////////////// +// tag::p_square_quantile +// +namespace tag +{ + struct p_square_quantile + : depends_on<count> + { + /// INTERNAL ONLY + /// + typedef accumulators::impl::p_square_quantile_impl<mpl::_1, regular> impl; + }; + struct p_square_quantile_for_median + : depends_on<count> + { + /// INTERNAL ONLY + /// + typedef accumulators::impl::p_square_quantile_impl<mpl::_1, for_median> impl; + }; +} + +/////////////////////////////////////////////////////////////////////////////// +// extract::p_square_quantile +// extract::p_square_quantile_for_median +// +namespace extract +{ + extractor<tag::p_square_quantile> const p_square_quantile = {}; + extractor<tag::p_square_quantile_for_median> const p_square_quantile_for_median = {}; + + BOOST_ACCUMULATORS_IGNORE_GLOBAL(p_square_quantile) + BOOST_ACCUMULATORS_IGNORE_GLOBAL(p_square_quantile_for_median) +} + +using extract::p_square_quantile; +using extract::p_square_quantile_for_median; + +// So that p_square_quantile can be automatically substituted with +// weighted_p_square_quantile when the weight parameter is non-void +template<> +struct as_weighted_feature<tag::p_square_quantile> +{ + typedef tag::weighted_p_square_quantile type; +}; + +template<> +struct feature_of<tag::weighted_p_square_quantile> + : feature_of<tag::p_square_quantile> +{ +}; + +}} // namespace boost::accumulators + +#endif diff --git a/boost/accumulators/statistics/parameters/quantile_probability.hpp b/boost/accumulators/statistics/parameters/quantile_probability.hpp new file mode 100644 index 0000000000..be68f8955e --- /dev/null +++ b/boost/accumulators/statistics/parameters/quantile_probability.hpp @@ -0,0 +1,20 @@ +/////////////////////////////////////////////////////////////////////////////// +// quantile_probability.hpp +// +// Copyright 2005 Eric Niebler. Distributed under the Boost +// Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#ifndef BOOST_ACCUMULATORS_STATISTICS_PARAMETERS_QUANTILE_PROBABILITY_HPP_EAN_03_11_2005 +#define BOOST_ACCUMULATORS_STATISTICS_PARAMETERS_QUANTILE_PROBABILITY_HPP_EAN_03_11_2005 + +#include <boost/parameter/keyword.hpp> + +namespace boost { namespace accumulators +{ + +BOOST_PARAMETER_KEYWORD(tag, quantile_probability) + +}} // namespace boost::accumulators + +#endif diff --git a/boost/accumulators/statistics/peaks_over_threshold.hpp b/boost/accumulators/statistics/peaks_over_threshold.hpp new file mode 100644 index 0000000000..01f36f91e6 --- /dev/null +++ b/boost/accumulators/statistics/peaks_over_threshold.hpp @@ -0,0 +1,401 @@ +/////////////////////////////////////////////////////////////////////////////// +// peaks_over_threshold.hpp +// +// Copyright 2006 Daniel Egloff, Olivier Gygi. Distributed under the Boost +// Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#ifndef BOOST_ACCUMULATORS_STATISTICS_PEAKS_OVER_THRESHOLD_HPP_DE_01_01_2006 +#define BOOST_ACCUMULATORS_STATISTICS_PEAKS_OVER_THRESHOLD_HPP_DE_01_01_2006 + +#include <vector> +#include <limits> +#include <numeric> +#include <functional> +#include <boost/config/no_tr1/cmath.hpp> // pow +#include <sstream> // stringstream +#include <stdexcept> // runtime_error +#include <boost/throw_exception.hpp> +#include <boost/range.hpp> +#include <boost/mpl/if.hpp> +#include <boost/mpl/int.hpp> +#include <boost/mpl/placeholders.hpp> +#include <boost/parameter/keyword.hpp> +#include <boost/tuple/tuple.hpp> +#include <boost/accumulators/framework/accumulator_base.hpp> +#include <boost/accumulators/framework/extractor.hpp> +#include <boost/accumulators/numeric/functional.hpp> +#include <boost/accumulators/framework/parameters/sample.hpp> +#include <boost/accumulators/framework/depends_on.hpp> +#include <boost/accumulators/statistics_fwd.hpp> +#include <boost/accumulators/statistics/parameters/quantile_probability.hpp> +#include <boost/accumulators/statistics/count.hpp> +#include <boost/accumulators/statistics/tail.hpp> + +#ifdef _MSC_VER +# pragma warning(push) +# pragma warning(disable: 4127) // conditional expression is constant +#endif + +namespace boost { namespace accumulators +{ + +/////////////////////////////////////////////////////////////////////////////// +// threshold_probability and threshold named parameters +// +BOOST_PARAMETER_NESTED_KEYWORD(tag, pot_threshold_value, threshold_value) +BOOST_PARAMETER_NESTED_KEYWORD(tag, pot_threshold_probability, threshold_probability) + +namespace impl +{ + /////////////////////////////////////////////////////////////////////////////// + // peaks_over_threshold_impl + // works with an explicit threshold value and does not depend on order statistics + /** + @brief Peaks over Threshold Method for Quantile and Tail Mean Estimation + + According to the theorem of Pickands-Balkema-de Haan, the distribution function \f$F_u(x)\f$ of + the excesses \f$x\f$ over some sufficiently high threshold \f$u\f$ of a distribution function \f$F(x)\f$ + may be approximated by a generalized Pareto distribution + \f[ + G_{\xi,\beta}(x) = + \left\{ + \begin{array}{ll} + \beta^{-1}\left(1+\frac{\xi x}{\beta}\right)^{-1/\xi-1} & \textrm{if }\xi\neq0\\ + \beta^{-1}\exp\left(-\frac{x}{\beta}\right) & \textrm{if }\xi=0, + \end{array} + \right. + \f] + with suitable parameters \f$\xi\f$ and \f$\beta\f$ that can be estimated, e.g., with the method of moments, cf. + Hosking and Wallis (1987), + \f[ + \begin{array}{lll} + \hat{\xi} & = & \frac{1}{2}\left[1-\frac{(\hat{\mu}-u)^2}{\hat{\sigma}^2}\right]\\ + \hat{\beta} & = & \frac{\hat{\mu}-u}{2}\left[\frac{(\hat{\mu}-u)^2}{\hat{\sigma}^2}+1\right], + \end{array} + \f] + \f$\hat{\mu}\f$ and \f$\hat{\sigma}^2\f$ being the empirical mean and variance of the samples over + the threshold \f$u\f$. Equivalently, the distribution function + \f$F_u(x-u)\f$ of the exceedances \f$x-u\f$ can be approximated by + \f$G_{\xi,\beta}(x-u)=G_{\xi,\beta,u}(x)\f$. Since for \f$x\geq u\f$ the distribution function \f$F(x)\f$ + can be written as + \f[ + F(x) = [1 - \P(X \leq u)]F_u(x - u) + \P(X \leq u) + \f] + and the probability \f$\P(X \leq u)\f$ can be approximated by the empirical distribution function + \f$F_n(u)\f$ evaluated at \f$u\f$, an estimator of \f$F(x)\f$ is given by + \f[ + \widehat{F}(x) = [1 - F_n(u)]G_{\xi,\beta,u}(x) + F_n(u). + \f] + It can be shown that \f$\widehat{F}(x)\f$ is a generalized + Pareto distribution \f$G_{\xi,\bar{\beta},\bar{u}}(x)\f$ with \f$\bar{\beta}=\beta[1-F_n(u)]^{\xi}\f$ + and \f$\bar{u}=u-\bar{\beta}\left\{[1-F_n(u)]^{-\xi}-1\right\}/\xi\f$. By inverting \f$\widehat{F}(x)\f$, + one obtains an estimator for the \f$\alpha\f$-quantile, + \f[ + \hat{q}_{\alpha} = \bar{u} + \frac{\bar{\beta}}{\xi}\left[(1-\alpha)^{-\xi}-1\right], + \f] + and similarly an estimator for the (coherent) tail mean, + \f[ + \widehat{CTM}_{\alpha} = \hat{q}_{\alpha} - \frac{\bar{\beta}}{\xi-1}(1-\alpha)^{-\xi}, + \f] + cf. McNeil and Frey (2000). + + Note that in case extreme values of the left tail are fitted, the distribution is mirrored with respect to the + \f$y\f$ axis such that the left tail can be treated as a right tail. The computed fit parameters thus define + the Pareto distribution that fits the mirrored left tail. When quantities like a quantile or a tail mean are + computed using the fit parameters obtained from the mirrored data, the result is mirrored back, yielding the + correct result. + + For further details, see + + J. R. M. Hosking and J. R. Wallis, Parameter and quantile estimation for the generalized Pareto distribution, + Technometrics, Volume 29, 1987, p. 339-349 + + A. J. McNeil and R. Frey, Estimation of Tail-Related Risk Measures for Heteroscedastic Financial Time Series: + an Extreme Value Approach, Journal of Empirical Finance, Volume 7, 2000, p. 271-300 + + @param quantile_probability + @param pot_threshold_value + */ + template<typename Sample, typename LeftRight> + struct peaks_over_threshold_impl + : accumulator_base + { + typedef typename numeric::functional::average<Sample, std::size_t>::result_type float_type; + // for boost::result_of + typedef boost::tuple<float_type, float_type, float_type> result_type; + // for left tail fitting, mirror the extreme values + typedef mpl::int_<is_same<LeftRight, left>::value ? -1 : 1> sign; + + template<typename Args> + peaks_over_threshold_impl(Args const &args) + : Nu_(0) + , mu_(sign::value * numeric::average(args[sample | Sample()], (std::size_t)1)) + , sigma2_(numeric::average(args[sample | Sample()], (std::size_t)1)) + , threshold_(sign::value * args[pot_threshold_value]) + , fit_parameters_(boost::make_tuple(0., 0., 0.)) + , is_dirty_(true) + { + } + + template<typename Args> + void operator ()(Args const &args) + { + this->is_dirty_ = true; + + if (sign::value * args[sample] > this->threshold_) + { + this->mu_ += args[sample]; + this->sigma2_ += args[sample] * args[sample]; + ++this->Nu_; + } + } + + template<typename Args> + result_type result(Args const &args) const + { + if (this->is_dirty_) + { + this->is_dirty_ = false; + + std::size_t cnt = count(args); + + this->mu_ = sign::value * numeric::average(this->mu_, this->Nu_); + this->sigma2_ = numeric::average(this->sigma2_, this->Nu_); + this->sigma2_ -= this->mu_ * this->mu_; + + float_type threshold_probability = numeric::average(cnt - this->Nu_, cnt); + + float_type tmp = numeric::average(( this->mu_ - this->threshold_ )*( this->mu_ - this->threshold_ ), this->sigma2_); + float_type xi_hat = 0.5 * ( 1. - tmp ); + float_type beta_hat = 0.5 * ( this->mu_ - this->threshold_ ) * ( 1. + tmp ); + float_type beta_bar = beta_hat * std::pow(1. - threshold_probability, xi_hat); + float_type u_bar = this->threshold_ - beta_bar * ( std::pow(1. - threshold_probability, -xi_hat) - 1.)/xi_hat; + this->fit_parameters_ = boost::make_tuple(u_bar, beta_bar, xi_hat); + } + + return this->fit_parameters_; + } + + private: + std::size_t Nu_; // number of samples larger than threshold + mutable float_type mu_; // mean of Nu_ largest samples + mutable float_type sigma2_; // variance of Nu_ largest samples + float_type threshold_; + mutable result_type fit_parameters_; // boost::tuple that stores fit parameters + mutable bool is_dirty_; + }; + + /////////////////////////////////////////////////////////////////////////////// + // peaks_over_threshold_prob_impl + // determines threshold from a given threshold probability using order statistics + /** + @brief Peaks over Threshold Method for Quantile and Tail Mean Estimation + + @sa peaks_over_threshold_impl + + @param quantile_probability + @param pot_threshold_probability + */ + template<typename Sample, typename LeftRight> + struct peaks_over_threshold_prob_impl + : accumulator_base + { + typedef typename numeric::functional::average<Sample, std::size_t>::result_type float_type; + // for boost::result_of + typedef boost::tuple<float_type, float_type, float_type> result_type; + // for left tail fitting, mirror the extreme values + typedef mpl::int_<is_same<LeftRight, left>::value ? -1 : 1> sign; + + template<typename Args> + peaks_over_threshold_prob_impl(Args const &args) + : mu_(sign::value * numeric::average(args[sample | Sample()], (std::size_t)1)) + , sigma2_(numeric::average(args[sample | Sample()], (std::size_t)1)) + , threshold_probability_(args[pot_threshold_probability]) + , fit_parameters_(boost::make_tuple(0., 0., 0.)) + , is_dirty_(true) + { + } + + void operator ()(dont_care) + { + this->is_dirty_ = true; + } + + template<typename Args> + result_type result(Args const &args) const + { + if (this->is_dirty_) + { + this->is_dirty_ = false; + + std::size_t cnt = count(args); + + // the n'th cached sample provides an approximate threshold value u + std::size_t n = static_cast<std::size_t>( + std::ceil( + cnt * ( ( is_same<LeftRight, left>::value ) ? this->threshold_probability_ : 1. - this->threshold_probability_ ) + ) + ); + + // If n is in a valid range, return result, otherwise return NaN or throw exception + if ( n >= static_cast<std::size_t>(tail(args).size())) + { + if (std::numeric_limits<float_type>::has_quiet_NaN) + { + return boost::make_tuple( + std::numeric_limits<float_type>::quiet_NaN() + , std::numeric_limits<float_type>::quiet_NaN() + , std::numeric_limits<float_type>::quiet_NaN() + ); + } + else + { + std::ostringstream msg; + msg << "index n = " << n << " is not in valid range [0, " << tail(args).size() << ")"; + boost::throw_exception(std::runtime_error(msg.str())); + return boost::make_tuple(Sample(0), Sample(0), Sample(0)); + } + } + else + { + float_type u = *(tail(args).begin() + n - 1) * sign::value; + + // compute mean and variance of samples above/under threshold value u + for (std::size_t i = 0; i < n; ++i) + { + mu_ += *(tail(args).begin() + i); + sigma2_ += *(tail(args).begin() + i) * (*(tail(args).begin() + i)); + } + + this->mu_ = sign::value * numeric::average(this->mu_, n); + this->sigma2_ = numeric::average(this->sigma2_, n); + this->sigma2_ -= this->mu_ * this->mu_; + + if (is_same<LeftRight, left>::value) + this->threshold_probability_ = 1. - this->threshold_probability_; + + float_type tmp = numeric::average(( this->mu_ - u )*( this->mu_ - u ), this->sigma2_); + float_type xi_hat = 0.5 * ( 1. - tmp ); + float_type beta_hat = 0.5 * ( this->mu_ - u ) * ( 1. + tmp ); + float_type beta_bar = beta_hat * std::pow(1. - threshold_probability_, xi_hat); + float_type u_bar = u - beta_bar * ( std::pow(1. - threshold_probability_, -xi_hat) - 1.)/xi_hat; + this->fit_parameters_ = boost::make_tuple(u_bar, beta_bar, xi_hat); + } + } + + return this->fit_parameters_; + } + + private: + mutable float_type mu_; // mean of samples above threshold u + mutable float_type sigma2_; // variance of samples above threshold u + mutable float_type threshold_probability_; + mutable result_type fit_parameters_; // boost::tuple that stores fit parameters + mutable bool is_dirty_; + }; + +} // namespace impl + +/////////////////////////////////////////////////////////////////////////////// +// tag::peaks_over_threshold +// +namespace tag +{ + template<typename LeftRight> + struct peaks_over_threshold + : depends_on<count> + , pot_threshold_value + { + /// INTERNAL ONLY + /// + typedef accumulators::impl::peaks_over_threshold_impl<mpl::_1, LeftRight> impl; + }; + + template<typename LeftRight> + struct peaks_over_threshold_prob + : depends_on<count, tail<LeftRight> > + , pot_threshold_probability + { + /// INTERNAL ONLY + /// + typedef accumulators::impl::peaks_over_threshold_prob_impl<mpl::_1, LeftRight> impl; + }; + + struct abstract_peaks_over_threshold + : depends_on<> + { + }; +} + +/////////////////////////////////////////////////////////////////////////////// +// extract::peaks_over_threshold +// +namespace extract +{ + extractor<tag::abstract_peaks_over_threshold> const peaks_over_threshold = {}; + + BOOST_ACCUMULATORS_IGNORE_GLOBAL(peaks_over_threshold) +} + +using extract::peaks_over_threshold; + +// peaks_over_threshold<LeftRight>(with_threshold_value) -> peaks_over_threshold<LeftRight> +template<typename LeftRight> +struct as_feature<tag::peaks_over_threshold<LeftRight>(with_threshold_value)> +{ + typedef tag::peaks_over_threshold<LeftRight> type; +}; + +// peaks_over_threshold<LeftRight>(with_threshold_probability) -> peaks_over_threshold_prob<LeftRight> +template<typename LeftRight> +struct as_feature<tag::peaks_over_threshold<LeftRight>(with_threshold_probability)> +{ + typedef tag::peaks_over_threshold_prob<LeftRight> type; +}; + +template<typename LeftRight> +struct feature_of<tag::peaks_over_threshold<LeftRight> > + : feature_of<tag::abstract_peaks_over_threshold> +{ +}; + +template<typename LeftRight> +struct feature_of<tag::peaks_over_threshold_prob<LeftRight> > + : feature_of<tag::abstract_peaks_over_threshold> +{ +}; + +// So that peaks_over_threshold can be automatically substituted +// with weighted_peaks_over_threshold when the weight parameter is non-void. +template<typename LeftRight> +struct as_weighted_feature<tag::peaks_over_threshold<LeftRight> > +{ + typedef tag::weighted_peaks_over_threshold<LeftRight> type; +}; + +template<typename LeftRight> +struct feature_of<tag::weighted_peaks_over_threshold<LeftRight> > + : feature_of<tag::peaks_over_threshold<LeftRight> > +{}; + +// So that peaks_over_threshold_prob can be automatically substituted +// with weighted_peaks_over_threshold_prob when the weight parameter is non-void. +template<typename LeftRight> +struct as_weighted_feature<tag::peaks_over_threshold_prob<LeftRight> > +{ + typedef tag::weighted_peaks_over_threshold_prob<LeftRight> type; +}; + +template<typename LeftRight> +struct feature_of<tag::weighted_peaks_over_threshold_prob<LeftRight> > + : feature_of<tag::peaks_over_threshold_prob<LeftRight> > +{}; + +}} // namespace boost::accumulators + +#ifdef _MSC_VER +# pragma warning(pop) +#endif + +#endif diff --git a/boost/accumulators/statistics/pot_quantile.hpp b/boost/accumulators/statistics/pot_quantile.hpp new file mode 100644 index 0000000000..aceff8cee5 --- /dev/null +++ b/boost/accumulators/statistics/pot_quantile.hpp @@ -0,0 +1,205 @@ +/////////////////////////////////////////////////////////////////////////////// +// pot_quantile.hpp +// +// Copyright 2006 Daniel Egloff, Olivier Gygi. Distributed under the Boost +// Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#ifndef BOOST_ACCUMULATORS_STATISTICS_POT_QUANTILE_HPP_DE_01_01_2006 +#define BOOST_ACCUMULATORS_STATISTICS_POT_QUANTILE_HPP_DE_01_01_2006 + +#include <vector> +#include <limits> +#include <numeric> +#include <functional> +#include <boost/parameter/keyword.hpp> +#include <boost/tuple/tuple.hpp> +#include <boost/mpl/if.hpp> +#include <boost/type_traits/is_same.hpp> +#include <boost/mpl/placeholders.hpp> +#include <boost/accumulators/framework/accumulator_base.hpp> +#include <boost/accumulators/framework/extractor.hpp> +#include <boost/accumulators/numeric/functional.hpp> +#include <boost/accumulators/framework/parameters/sample.hpp> +#include <boost/accumulators/statistics_fwd.hpp> +#include <boost/accumulators/statistics/tail.hpp> +#include <boost/accumulators/statistics/peaks_over_threshold.hpp> +#include <boost/accumulators/statistics/weighted_peaks_over_threshold.hpp> + +namespace boost { namespace accumulators +{ + +namespace impl +{ + /////////////////////////////////////////////////////////////////////////////// + // pot_quantile_impl + // + /** + @brief Quantile Estimation based on Peaks over Threshold Method (for both left and right tails) + + Computes an estimate + \f[ + \hat{q}_{\alpha} = \bar{u} + \frac{\bar{\beta}}{\xi}\left[(1-\alpha)^{-\xi}-1\right] + \f] + for a right or left extreme quantile, \f$\bar[u]\f$, \f$\bar{\beta}\f$ and \f$\xi\f$ being the parameters of the + generalized Pareto distribution that approximates the right tail of the distribution (or the mirrored left tail, + in case the left tail is used). In the latter case, the result is mirrored back, yielding the correct result. + */ + template<typename Sample, typename Impl, typename LeftRight> + struct pot_quantile_impl + : accumulator_base + { + typedef typename numeric::functional::average<Sample, std::size_t>::result_type float_type; + // for boost::result_of + typedef float_type result_type; + + pot_quantile_impl(dont_care) + : sign_((is_same<LeftRight, left>::value) ? -1 : 1) + { + } + + template<typename Args> + result_type result(Args const &args) const + { + typedef + typename mpl::if_< + is_same<Impl, weighted> + , tag::weighted_peaks_over_threshold<LeftRight> + , tag::peaks_over_threshold<LeftRight> + >::type + peaks_over_threshold_tag; + + extractor<peaks_over_threshold_tag> const some_peaks_over_threshold = {}; + + float_type u_bar = some_peaks_over_threshold(args).template get<0>(); + float_type beta_bar = some_peaks_over_threshold(args).template get<1>(); + float_type xi_hat = some_peaks_over_threshold(args).template get<2>(); + + return this->sign_ * (u_bar + beta_bar/xi_hat * ( std::pow( + is_same<LeftRight, left>::value ? args[quantile_probability] : 1. - args[quantile_probability] + , -xi_hat + ) - 1.)); + } + + private: + short sign_; // if the fit parameters from the mirrored left tail extreme values are used, mirror back the result + }; + +} // namespace impl + +/////////////////////////////////////////////////////////////////////////////// +// tag::pot_quantile<> +// tag::pot_quantile_prob<> +// tag::weighted_pot_quantile<> +// tag::weighted_pot_quantile_prob<> +// +namespace tag +{ + template<typename LeftRight> + struct pot_quantile + : depends_on<peaks_over_threshold<LeftRight> > + { + /// INTERNAL ONLY + /// + typedef accumulators::impl::pot_quantile_impl<mpl::_1, unweighted, LeftRight> impl; + }; + template<typename LeftRight> + struct pot_quantile_prob + : depends_on<peaks_over_threshold_prob<LeftRight> > + { + /// INTERNAL ONLY + /// + typedef accumulators::impl::pot_quantile_impl<mpl::_1, unweighted, LeftRight> impl; + }; + template<typename LeftRight> + struct weighted_pot_quantile + : depends_on<weighted_peaks_over_threshold<LeftRight> > + { + /// INTERNAL ONLY + /// + typedef accumulators::impl::pot_quantile_impl<mpl::_1, weighted, LeftRight> impl; + }; + template<typename LeftRight> + struct weighted_pot_quantile_prob + : depends_on<weighted_peaks_over_threshold_prob<LeftRight> > + { + /// INTERNAL ONLY + /// + typedef accumulators::impl::pot_quantile_impl<mpl::_1, weighted, LeftRight> impl; + }; +} + +// pot_quantile<LeftRight>(with_threshold_value) -> pot_quantile<LeftRight> +template<typename LeftRight> +struct as_feature<tag::pot_quantile<LeftRight>(with_threshold_value)> +{ + typedef tag::pot_quantile<LeftRight> type; +}; + +// pot_quantile<LeftRight>(with_threshold_probability) -> pot_quantile_prob<LeftRight> +template<typename LeftRight> +struct as_feature<tag::pot_quantile<LeftRight>(with_threshold_probability)> +{ + typedef tag::pot_quantile_prob<LeftRight> type; +}; + +// weighted_pot_quantile<LeftRight>(with_threshold_value) -> weighted_pot_quantile<LeftRight> +template<typename LeftRight> +struct as_feature<tag::weighted_pot_quantile<LeftRight>(with_threshold_value)> +{ + typedef tag::weighted_pot_quantile<LeftRight> type; +}; + +// weighted_pot_quantile<LeftRight>(with_threshold_probability) -> weighted_pot_quantile_prob<LeftRight> +template<typename LeftRight> +struct as_feature<tag::weighted_pot_quantile<LeftRight>(with_threshold_probability)> +{ + typedef tag::weighted_pot_quantile_prob<LeftRight> type; +}; + +// for the purposes of feature-based dependency resolution, +// pot_quantile<LeftRight> and pot_quantile_prob<LeftRight> provide +// the same feature as quantile +template<typename LeftRight> +struct feature_of<tag::pot_quantile<LeftRight> > + : feature_of<tag::quantile> +{ +}; + +template<typename LeftRight> +struct feature_of<tag::pot_quantile_prob<LeftRight> > + : feature_of<tag::quantile> +{ +}; + +// So that pot_quantile can be automatically substituted +// with weighted_pot_quantile when the weight parameter is non-void. +template<typename LeftRight> +struct as_weighted_feature<tag::pot_quantile<LeftRight> > +{ + typedef tag::weighted_pot_quantile<LeftRight> type; +}; + +template<typename LeftRight> +struct feature_of<tag::weighted_pot_quantile<LeftRight> > + : feature_of<tag::pot_quantile<LeftRight> > +{ +}; + +// So that pot_quantile_prob can be automatically substituted +// with weighted_pot_quantile_prob when the weight parameter is non-void. +template<typename LeftRight> +struct as_weighted_feature<tag::pot_quantile_prob<LeftRight> > +{ + typedef tag::weighted_pot_quantile_prob<LeftRight> type; +}; + +template<typename LeftRight> +struct feature_of<tag::weighted_pot_quantile_prob<LeftRight> > + : feature_of<tag::pot_quantile_prob<LeftRight> > +{ +}; + +}} // namespace boost::accumulators + +#endif diff --git a/boost/accumulators/statistics/pot_tail_mean.hpp b/boost/accumulators/statistics/pot_tail_mean.hpp new file mode 100644 index 0000000000..088ef98093 --- /dev/null +++ b/boost/accumulators/statistics/pot_tail_mean.hpp @@ -0,0 +1,211 @@ +/////////////////////////////////////////////////////////////////////////////// +// pot_tail_mean.hpp +// +// Copyright 2006 Daniel Egloff, Olivier Gygi. Distributed under the Boost +// Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#ifndef BOOST_ACCUMULATORS_STATISTICS_POT_TAIL_MEAN_HPP_DE_01_01_2006 +#define BOOST_ACCUMULATORS_STATISTICS_POT_TAIL_MEAN_HPP_DE_01_01_2006 + +#include <vector> +#include <limits> +#include <numeric> +#include <functional> +#include <boost/range.hpp> +#include <boost/parameter/keyword.hpp> +#include <boost/tuple/tuple.hpp> +#include <boost/mpl/if.hpp> +#include <boost/mpl/placeholders.hpp> +#include <boost/type_traits/is_same.hpp> +#include <boost/accumulators/framework/accumulator_base.hpp> +#include <boost/accumulators/framework/extractor.hpp> +#include <boost/accumulators/numeric/functional.hpp> +#include <boost/accumulators/framework/parameters/sample.hpp> +#include <boost/accumulators/statistics_fwd.hpp> +#include <boost/accumulators/statistics/peaks_over_threshold.hpp> +#include <boost/accumulators/statistics/weighted_peaks_over_threshold.hpp> +#include <boost/accumulators/statistics/pot_quantile.hpp> +#include <boost/accumulators/statistics/tail_mean.hpp> + +namespace boost { namespace accumulators +{ + +namespace impl +{ + /////////////////////////////////////////////////////////////////////////////// + // pot_tail_mean_impl + // + /** + @brief Estimation of the (coherent) tail mean based on the peaks over threshold method (for both left and right tails) + + Computes an estimate for the (coherent) tail mean + \f[ + \widehat{CTM}_{\alpha} = \hat{q}_{\alpha} - \frac{\bar{\beta}}{\xi-1}(1-\alpha)^{-\xi}, + \f] + where \f$\bar[u]\f$, \f$\bar{\beta}\f$ and \f$\xi\f$ are the parameters of the + generalized Pareto distribution that approximates the right tail of the distribution (or the + mirrored left tail, in case the left tail is used). In the latter case, the result is mirrored + back, yielding the correct result. + */ + template<typename Sample, typename Impl, typename LeftRight> + struct pot_tail_mean_impl + : accumulator_base + { + typedef typename numeric::functional::average<Sample, std::size_t>::result_type float_type; + // for boost::result_of + typedef float_type result_type; + + pot_tail_mean_impl(dont_care) + : sign_((is_same<LeftRight, left>::value) ? -1 : 1) + { + } + + template<typename Args> + result_type result(Args const &args) const + { + typedef + typename mpl::if_< + is_same<Impl, weighted> + , tag::weighted_peaks_over_threshold<LeftRight> + , tag::peaks_over_threshold<LeftRight> + >::type + peaks_over_threshold_tag; + + typedef + typename mpl::if_< + is_same<Impl, weighted> + , tag::weighted_pot_quantile<LeftRight> + , tag::pot_quantile<LeftRight> + >::type + pot_quantile_tag; + + extractor<peaks_over_threshold_tag> const some_peaks_over_threshold = {}; + extractor<pot_quantile_tag> const some_pot_quantile = {}; + + float_type beta_bar = some_peaks_over_threshold(args).template get<1>(); + float_type xi_hat = some_peaks_over_threshold(args).template get<2>(); + + return some_pot_quantile(args) - this->sign_ * beta_bar/( xi_hat - 1. ) * std::pow( + is_same<LeftRight, left>::value ? args[quantile_probability] : 1. - args[quantile_probability] + , -xi_hat); + } + private: + short sign_; // if the fit parameters from the mirrored left tail extreme values are used, mirror back the result + }; +} // namespace impl + +/////////////////////////////////////////////////////////////////////////////// +// tag::pot_tail_mean +// tag::pot_tail_mean_prob +// +namespace tag +{ + template<typename LeftRight> + struct pot_tail_mean + : depends_on<peaks_over_threshold<LeftRight>, pot_quantile<LeftRight> > + { + /// INTERNAL ONLY + /// + typedef accumulators::impl::pot_tail_mean_impl<mpl::_1, unweighted, LeftRight> impl; + }; + template<typename LeftRight> + struct pot_tail_mean_prob + : depends_on<peaks_over_threshold_prob<LeftRight>, pot_quantile_prob<LeftRight> > + { + /// INTERNAL ONLY + /// + typedef accumulators::impl::pot_tail_mean_impl<mpl::_1, unweighted, LeftRight> impl; + }; + template<typename LeftRight> + struct weighted_pot_tail_mean + : depends_on<weighted_peaks_over_threshold<LeftRight>, weighted_pot_quantile<LeftRight> > + { + /// INTERNAL ONLY + /// + typedef accumulators::impl::pot_tail_mean_impl<mpl::_1, weighted, LeftRight> impl; + }; + template<typename LeftRight> + struct weighted_pot_tail_mean_prob + : depends_on<weighted_peaks_over_threshold_prob<LeftRight>, weighted_pot_quantile_prob<LeftRight> > + { + /// INTERNAL ONLY + /// + typedef accumulators::impl::pot_tail_mean_impl<mpl::_1, weighted, LeftRight> impl; + }; +} + +// pot_tail_mean<LeftRight>(with_threshold_value) -> pot_tail_mean<LeftRight> +template<typename LeftRight> +struct as_feature<tag::pot_tail_mean<LeftRight>(with_threshold_value)> +{ + typedef tag::pot_tail_mean<LeftRight> type; +}; + +// pot_tail_mean<LeftRight>(with_threshold_probability) -> pot_tail_mean_prob<LeftRight> +template<typename LeftRight> +struct as_feature<tag::pot_tail_mean<LeftRight>(with_threshold_probability)> +{ + typedef tag::pot_tail_mean_prob<LeftRight> type; +}; + +// weighted_pot_tail_mean<LeftRight>(with_threshold_value) -> weighted_pot_tail_mean<LeftRight> +template<typename LeftRight> +struct as_feature<tag::weighted_pot_tail_mean<LeftRight>(with_threshold_value)> +{ + typedef tag::weighted_pot_tail_mean<LeftRight> type; +}; + +// weighted_pot_tail_mean<LeftRight>(with_threshold_probability) -> weighted_pot_tail_mean_prob<LeftRight> +template<typename LeftRight> +struct as_feature<tag::weighted_pot_tail_mean<LeftRight>(with_threshold_probability)> +{ + typedef tag::weighted_pot_tail_mean_prob<LeftRight> type; +}; + +// for the purposes of feature-based dependency resolution, +// pot_tail_mean<LeftRight> and pot_tail_mean_prob<LeftRight> provide +// the same feature as tail_mean +template<typename LeftRight> +struct feature_of<tag::pot_tail_mean<LeftRight> > + : feature_of<tag::tail_mean> +{ +}; + +template<typename LeftRight> +struct feature_of<tag::pot_tail_mean_prob<LeftRight> > + : feature_of<tag::tail_mean> +{ +}; + +// So that pot_tail_mean can be automatically substituted +// with weighted_pot_tail_mean when the weight parameter is non-void. +template<typename LeftRight> +struct as_weighted_feature<tag::pot_tail_mean<LeftRight> > +{ + typedef tag::weighted_pot_tail_mean<LeftRight> type; +}; + +template<typename LeftRight> +struct feature_of<tag::weighted_pot_tail_mean<LeftRight> > + : feature_of<tag::pot_tail_mean<LeftRight> > +{ +}; + +// So that pot_tail_mean_prob can be automatically substituted +// with weighted_pot_tail_mean_prob when the weight parameter is non-void. +template<typename LeftRight> +struct as_weighted_feature<tag::pot_tail_mean_prob<LeftRight> > +{ + typedef tag::weighted_pot_tail_mean_prob<LeftRight> type; +}; + +template<typename LeftRight> +struct feature_of<tag::weighted_pot_tail_mean_prob<LeftRight> > + : feature_of<tag::pot_tail_mean_prob<LeftRight> > +{ +}; + +}} // namespace boost::accumulators + +#endif diff --git a/boost/accumulators/statistics/rolling_count.hpp b/boost/accumulators/statistics/rolling_count.hpp new file mode 100644 index 0000000000..1e34f76354 --- /dev/null +++ b/boost/accumulators/statistics/rolling_count.hpp @@ -0,0 +1,80 @@ +/////////////////////////////////////////////////////////////////////////////// +// rolling_count.hpp +// +// Copyright 2008 Eric Niebler. Distributed under the Boost +// Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#ifndef BOOST_ACCUMULATORS_STATISTICS_ROLLING_COUNT_HPP_EAN_26_12_2008 +#define BOOST_ACCUMULATORS_STATISTICS_ROLLING_COUNT_HPP_EAN_26_12_2008 + +#include <boost/mpl/placeholders.hpp> +#include <boost/accumulators/framework/accumulator_base.hpp> +#include <boost/accumulators/framework/extractor.hpp> +#include <boost/accumulators/numeric/functional.hpp> +#include <boost/accumulators/framework/parameters/sample.hpp> +#include <boost/accumulators/framework/depends_on.hpp> +#include <boost/accumulators/statistics_fwd.hpp> +#include <boost/accumulators/statistics/rolling_window.hpp> + +namespace boost { namespace accumulators +{ + +namespace impl +{ + + /////////////////////////////////////////////////////////////////////////////// + // rolling_count_impl + // returns the count of elements in the rolling window + template<typename Sample> + struct rolling_count_impl + : accumulator_base + { + typedef std::size_t result_type; + + rolling_count_impl(dont_care) + {} + + template<typename Args> + result_type result(Args const &args) const + { + return static_cast<std::size_t>(rolling_window_plus1(args).size()) - is_rolling_window_plus1_full(args); + } + }; + +} // namespace impl + +/////////////////////////////////////////////////////////////////////////////// +// tag::rolling_count +// +namespace tag +{ + struct rolling_count + : depends_on< rolling_window_plus1 > + { + /// INTERNAL ONLY + /// + typedef accumulators::impl::rolling_count_impl< mpl::_1 > impl; + + #ifdef BOOST_ACCUMULATORS_DOXYGEN_INVOKED + /// tag::rolling_window::window_size named parameter + static boost::parameter::keyword<tag::rolling_window_size> const window_size; + #endif + }; +} // namespace tag + +/////////////////////////////////////////////////////////////////////////////// +// extract::rolling_count +// +namespace extract +{ + extractor<tag::rolling_count> const rolling_count = {}; + + BOOST_ACCUMULATORS_IGNORE_GLOBAL(rolling_count) +} + +using extract::rolling_count; + +}} // namespace boost::accumulators + +#endif diff --git a/boost/accumulators/statistics/rolling_mean.hpp b/boost/accumulators/statistics/rolling_mean.hpp new file mode 100644 index 0000000000..ddcbaa39b9 --- /dev/null +++ b/boost/accumulators/statistics/rolling_mean.hpp @@ -0,0 +1,81 @@ +/////////////////////////////////////////////////////////////////////////////// +// rolling_mean.hpp +// +// Copyright 2008 Eric Niebler. Distributed under the Boost +// Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#ifndef BOOST_ACCUMULATORS_STATISTICS_ROLLING_MEAN_HPP_EAN_26_12_2008 +#define BOOST_ACCUMULATORS_STATISTICS_ROLLING_MEAN_HPP_EAN_26_12_2008 + +#include <boost/mpl/placeholders.hpp> +#include <boost/accumulators/framework/accumulator_base.hpp> +#include <boost/accumulators/framework/extractor.hpp> +#include <boost/accumulators/numeric/functional.hpp> +#include <boost/accumulators/framework/parameters/sample.hpp> +#include <boost/accumulators/framework/depends_on.hpp> +#include <boost/accumulators/statistics_fwd.hpp> +#include <boost/accumulators/statistics/rolling_sum.hpp> +#include <boost/accumulators/statistics/rolling_count.hpp> + +namespace boost { namespace accumulators +{ + +namespace impl +{ + + /////////////////////////////////////////////////////////////////////////////// + // rolling_mean_impl + // returns the unshifted results from the shifted rolling window + template<typename Sample> + struct rolling_mean_impl + : accumulator_base + { + typedef typename numeric::functional::average<Sample, std::size_t>::result_type result_type; + + rolling_mean_impl(dont_care) + {} + + template<typename Args> + result_type result(Args const &args) const + { + return numeric::average(rolling_sum(args), rolling_count(args)); + } + }; + +} // namespace impl + +/////////////////////////////////////////////////////////////////////////////// +// tag::rolling_mean +// +namespace tag +{ + struct rolling_mean + : depends_on< rolling_sum, rolling_count > + { + /// INTERNAL ONLY + /// + typedef accumulators::impl::rolling_mean_impl< mpl::_1 > impl; + + #ifdef BOOST_ACCUMULATORS_DOXYGEN_INVOKED + /// tag::rolling_window::window_size named parameter + static boost::parameter::keyword<tag::rolling_window_size> const window_size; + #endif + }; +} // namespace tag + +/////////////////////////////////////////////////////////////////////////////// +// extract::rolling_mean +// +namespace extract +{ + extractor<tag::rolling_mean> const rolling_mean = {}; + + BOOST_ACCUMULATORS_IGNORE_GLOBAL(rolling_mean) +} + +using extract::rolling_mean; + +}} // namespace boost::accumulators + +#endif diff --git a/boost/accumulators/statistics/rolling_sum.hpp b/boost/accumulators/statistics/rolling_sum.hpp new file mode 100644 index 0000000000..b41ab39e33 --- /dev/null +++ b/boost/accumulators/statistics/rolling_sum.hpp @@ -0,0 +1,93 @@ +/////////////////////////////////////////////////////////////////////////////// +// rolling_sum.hpp +// +// Copyright 2008 Eric Niebler. Distributed under the Boost +// Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#ifndef BOOST_ACCUMULATORS_STATISTICS_ROLLING_SUM_HPP_EAN_26_12_2008 +#define BOOST_ACCUMULATORS_STATISTICS_ROLLING_SUM_HPP_EAN_26_12_2008 + +#include <boost/mpl/placeholders.hpp> +#include <boost/accumulators/framework/accumulator_base.hpp> +#include <boost/accumulators/framework/extractor.hpp> +#include <boost/accumulators/numeric/functional.hpp> +#include <boost/accumulators/framework/parameters/sample.hpp> +#include <boost/accumulators/framework/depends_on.hpp> +#include <boost/accumulators/statistics_fwd.hpp> +#include <boost/accumulators/statistics/rolling_window.hpp> + +namespace boost { namespace accumulators +{ + +namespace impl +{ + /////////////////////////////////////////////////////////////////////////////// + // rolling_sum_impl + // returns the sum of the samples in the rolling window + template<typename Sample> + struct rolling_sum_impl + : accumulator_base + { + typedef Sample result_type; + + template<typename Args> + rolling_sum_impl(Args const &args) + : sum_(args[sample | Sample()]) + {} + + template<typename Args> + void operator ()(Args const &args) + { + if(is_rolling_window_plus1_full(args)) + { + this->sum_ -= rolling_window_plus1(args).front(); + } + this->sum_ += args[sample]; + } + + template<typename Args> + result_type result(Args const &args) const + { + return this->sum_; + } + + private: + Sample sum_; + }; +} // namespace impl + +/////////////////////////////////////////////////////////////////////////////// +// tag::rolling_sum +// +namespace tag +{ + struct rolling_sum + : depends_on< rolling_window_plus1 > + { + /// INTERNAL ONLY + /// + typedef accumulators::impl::rolling_sum_impl< mpl::_1 > impl; + + #ifdef BOOST_ACCUMULATORS_DOXYGEN_INVOKED + /// tag::rolling_window::window_size named parameter + static boost::parameter::keyword<tag::rolling_window_size> const window_size; + #endif + }; +} // namespace tag + +/////////////////////////////////////////////////////////////////////////////// +// extract::rolling_sum +// +namespace extract +{ + extractor<tag::rolling_sum> const rolling_sum = {}; + + BOOST_ACCUMULATORS_IGNORE_GLOBAL(rolling_sum) +} + +using extract::rolling_sum; + +}} // namespace boost::accumulators + +#endif diff --git a/boost/accumulators/statistics/rolling_window.hpp b/boost/accumulators/statistics/rolling_window.hpp new file mode 100644 index 0000000000..d2f4b0dfe1 --- /dev/null +++ b/boost/accumulators/statistics/rolling_window.hpp @@ -0,0 +1,169 @@ +/////////////////////////////////////////////////////////////////////////////// +// rolling_window.hpp +// +// Copyright 2008 Eric Niebler. Distributed under the Boost +// Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#ifndef BOOST_ACCUMULATORS_STATISTICS_ROLLING_WINDOW_HPP_EAN_26_12_2008 +#define BOOST_ACCUMULATORS_STATISTICS_ROLLING_WINDOW_HPP_EAN_26_12_2008 + +#include <cstddef> +#include <boost/version.hpp> +#include <boost/assert.hpp> +#include <boost/circular_buffer.hpp> +#include <boost/range/iterator_range.hpp> +#include <boost/accumulators/framework/extractor.hpp> +#include <boost/accumulators/framework/depends_on.hpp> +#include <boost/accumulators/framework/accumulator_base.hpp> +#include <boost/accumulators/framework/parameters/sample.hpp> +#include <boost/accumulators/framework/parameters/accumulator.hpp> +#include <boost/accumulators/numeric/functional.hpp> +#include <boost/accumulators/statistics_fwd.hpp> + +namespace boost { namespace accumulators +{ + +/////////////////////////////////////////////////////////////////////////////// +// tag::rolling_window::size named parameter +BOOST_PARAMETER_NESTED_KEYWORD(tag, rolling_window_size, window_size) + +namespace impl +{ + /////////////////////////////////////////////////////////////////////////////// + // rolling_window_plus1_impl + // stores the latest N+1 samples, where N is specified at construction time + // with the rolling_window_size named parameter + template<typename Sample> + struct rolling_window_plus1_impl + : accumulator_base + { + typedef typename circular_buffer<Sample>::const_iterator const_iterator; + typedef iterator_range<const_iterator> result_type; + + template<typename Args> + rolling_window_plus1_impl(Args const & args) + : buffer_(args[rolling_window_size] + 1) + {} + + #if BOOST_VERSION < 103600 + // Before Boost 1.36, copying a circular buffer didn't copy + // it's capacity, and we need that behavior. + rolling_window_plus1_impl(rolling_window_plus1_impl const &that) + : buffer_(that.buffer_) + { + this->buffer_.set_capacity(that.buffer_.capacity()); + } + + rolling_window_plus1_impl &operator =(rolling_window_plus1_impl const &that) + { + this->buffer_ = that.buffer_; + this->buffer_.set_capacity(that.buffer_.capacity()); + } + #endif + + template<typename Args> + void operator ()(Args const &args) + { + this->buffer_.push_back(args[sample]); + } + + bool full() const + { + return this->buffer_.full(); + } + + // The result of a shifted rolling window is the range including + // everything except the most recently added element. + result_type result(dont_care) const + { + return result_type(this->buffer_.begin(), this->buffer_.end()); + } + + private: + circular_buffer<Sample> buffer_; + }; + + template<typename Args> + bool is_rolling_window_plus1_full(Args const &args) + { + return find_accumulator<tag::rolling_window_plus1>(args[accumulator]).full(); + } + + /////////////////////////////////////////////////////////////////////////////// + // rolling_window_impl + // stores the latest N samples, where N is specified at construction type + // with the rolling_window_size named parameter + template<typename Sample> + struct rolling_window_impl + : accumulator_base + { + typedef typename circular_buffer<Sample>::const_iterator const_iterator; + typedef iterator_range<const_iterator> result_type; + + rolling_window_impl(dont_care) + {} + + template<typename Args> + result_type result(Args const &args) const + { + return rolling_window_plus1(args).advance_begin(is_rolling_window_plus1_full(args)); + } + }; + +} // namespace impl + +/////////////////////////////////////////////////////////////////////////////// +// tag::rolling_window_plus1 +// tag::rolling_window +// +namespace tag +{ + struct rolling_window_plus1 + : depends_on<> + , tag::rolling_window_size + { + /// INTERNAL ONLY + /// + typedef accumulators::impl::rolling_window_plus1_impl< mpl::_1 > impl; + + #ifdef BOOST_ACCUMULATORS_DOXYGEN_INVOKED + /// tag::rolling_window::size named parameter + static boost::parameter::keyword<tag::rolling_window_size> const window_size; + #endif + }; + + struct rolling_window + : depends_on< rolling_window_plus1 > + { + /// INTERNAL ONLY + /// + typedef accumulators::impl::rolling_window_impl< mpl::_1 > impl; + + #ifdef BOOST_ACCUMULATORS_DOXYGEN_INVOKED + /// tag::rolling_window::size named parameter + static boost::parameter::keyword<tag::rolling_window_size> const window_size; + #endif + }; + +} // namespace tag + +/////////////////////////////////////////////////////////////////////////////// +// extract::rolling_window_plus1 +// extract::rolling_window +// +namespace extract +{ + extractor<tag::rolling_window_plus1> const rolling_window_plus1 = {}; + extractor<tag::rolling_window> const rolling_window = {}; + + BOOST_ACCUMULATORS_IGNORE_GLOBAL(rolling_window_plus1) + BOOST_ACCUMULATORS_IGNORE_GLOBAL(rolling_window) +} + +using extract::rolling_window_plus1; +using extract::rolling_window; + +}} // namespace boost::accumulators + +#endif diff --git a/boost/accumulators/statistics/skewness.hpp b/boost/accumulators/statistics/skewness.hpp new file mode 100644 index 0000000000..2e29018f5d --- /dev/null +++ b/boost/accumulators/statistics/skewness.hpp @@ -0,0 +1,114 @@ +/////////////////////////////////////////////////////////////////////////////// +// skewness.hpp +// +// Copyright 2006 Olivier Gygi, Daniel Egloff. Distributed under the Boost +// Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#ifndef BOOST_ACCUMULATORS_STATISTICS_SKEWNESS_HPP_EAN_28_10_2005 +#define BOOST_ACCUMULATORS_STATISTICS_SKEWNESS_HPP_EAN_28_10_2005 + +#include <limits> +#include <boost/mpl/placeholders.hpp> +#include <boost/accumulators/framework/accumulator_base.hpp> +#include <boost/accumulators/framework/extractor.hpp> +#include <boost/accumulators/framework/parameters/sample.hpp> +#include <boost/accumulators/numeric/functional.hpp> +#include <boost/accumulators/framework/depends_on.hpp> +#include <boost/accumulators/statistics_fwd.hpp> +#include <boost/accumulators/statistics/moment.hpp> +#include <boost/accumulators/statistics/mean.hpp> + + +namespace boost { namespace accumulators +{ + +namespace impl +{ + /////////////////////////////////////////////////////////////////////////////// + // skewness_impl + /** + @brief Skewness estimation + + The skewness of a sample distribution is defined as the ratio of the 3rd central moment and the \f$ 3/2 \f$-th power + of the 2nd central moment (the variance) of the sampless 3. The skewness can also be expressed by the simple moments: + + \f[ + \hat{g}_1 = + \frac + {\widehat{m}_n^{(3)}-3\widehat{m}_n^{(2)}\hat{\mu}_n+2\hat{\mu}_n^3} + {\left(\widehat{m}_n^{(2)} - \hat{\mu}_n^{2}\right)^{3/2}} + \f] + + where \f$ \widehat{m}_n^{(i)} \f$ are the \f$ i \f$-th moment and \f$ \hat{\mu}_n \f$ the mean (first moment) of the + \f$ n \f$ samples. + */ + template<typename Sample> + struct skewness_impl + : accumulator_base + { + // for boost::result_of + typedef typename numeric::functional::average<Sample, Sample>::result_type result_type; + + skewness_impl(dont_care) + { + } + + template<typename Args> + result_type result(Args const &args) const + { + return numeric::average( + accumulators::moment<3>(args) + - 3. * accumulators::moment<2>(args) * mean(args) + + 2. * mean(args) * mean(args) * mean(args) + , ( accumulators::moment<2>(args) - mean(args) * mean(args) ) + * std::sqrt( accumulators::moment<2>(args) - mean(args) * mean(args) ) + ); + } + }; + +} // namespace impl + +/////////////////////////////////////////////////////////////////////////////// +// tag::skewness +// +namespace tag +{ + struct skewness + : depends_on<mean, moment<2>, moment<3> > + { + /// INTERNAL ONLY + /// + typedef accumulators::impl::skewness_impl<mpl::_1> impl; + }; +} + +/////////////////////////////////////////////////////////////////////////////// +// extract::skewness +// +namespace extract +{ + extractor<tag::skewness> const skewness = {}; + + BOOST_ACCUMULATORS_IGNORE_GLOBAL(skewness) +} + +using extract::skewness; + +// So that skewness can be automatically substituted with +// weighted_skewness when the weight parameter is non-void +template<> +struct as_weighted_feature<tag::skewness> +{ + typedef tag::weighted_skewness type; +}; + +template<> +struct feature_of<tag::weighted_skewness> + : feature_of<tag::skewness> +{ +}; + +}} // namespace boost::accumulators + +#endif diff --git a/boost/accumulators/statistics/stats.hpp b/boost/accumulators/statistics/stats.hpp new file mode 100644 index 0000000000..22c4ede027 --- /dev/null +++ b/boost/accumulators/statistics/stats.hpp @@ -0,0 +1,29 @@ +/////////////////////////////////////////////////////////////////////////////// +/// \file stats.hpp +/// Contains the stats<> template. +/// +// Copyright 2005 Eric Niebler. Distributed under the Boost +// Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#ifndef BOOST_ACCUMULATORS_STATISTICS_STATS_HPP_EAN_28_10_2005 +#define BOOST_ACCUMULATORS_STATISTICS_STATS_HPP_EAN_28_10_2005 + +#include <boost/preprocessor/repetition/enum_params.hpp> +#include <boost/mpl/vector.hpp> +#include <boost/accumulators/statistics_fwd.hpp> + +namespace boost { namespace accumulators +{ + +/////////////////////////////////////////////////////////////////////////////// +/// An MPL sequence of statistics. +template<BOOST_PP_ENUM_PARAMS(BOOST_ACCUMULATORS_MAX_FEATURES, typename Stat)> +struct stats + : mpl::vector<BOOST_PP_ENUM_PARAMS(BOOST_ACCUMULATORS_MAX_FEATURES, Stat)> +{ +}; + +}} // namespace boost::accumulators + +#endif diff --git a/boost/accumulators/statistics/sum.hpp b/boost/accumulators/statistics/sum.hpp new file mode 100644 index 0000000000..126ce244fd --- /dev/null +++ b/boost/accumulators/statistics/sum.hpp @@ -0,0 +1,141 @@ +/////////////////////////////////////////////////////////////////////////////// +// sum.hpp +// +// Copyright 2005 Eric Niebler. Distributed under the Boost +// Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#ifndef BOOST_ACCUMULATORS_STATISTICS_SUM_HPP_EAN_28_10_2005 +#define BOOST_ACCUMULATORS_STATISTICS_SUM_HPP_EAN_28_10_2005 + +#include <boost/mpl/placeholders.hpp> +#include <boost/accumulators/framework/accumulator_base.hpp> +#include <boost/accumulators/framework/extractor.hpp> +#include <boost/accumulators/numeric/functional.hpp> +#include <boost/accumulators/framework/parameters/sample.hpp> +#include <boost/accumulators/framework/parameters/weight.hpp> +#include <boost/accumulators/framework/accumulators/external_accumulator.hpp> +#include <boost/accumulators/framework/depends_on.hpp> +#include <boost/accumulators/statistics_fwd.hpp> +#include <boost/accumulators/statistics/count.hpp> + +namespace boost { namespace accumulators +{ + +namespace impl +{ + /////////////////////////////////////////////////////////////////////////////// + // sum_impl + template<typename Sample, typename Tag> + struct sum_impl + : accumulator_base + { + // for boost::result_of + typedef Sample result_type; + + template<typename Args> + sum_impl(Args const &args) + : sum(args[parameter::keyword<Tag>::get() | Sample()]) + { + } + + template<typename Args> + void operator ()(Args const &args) + { + // what about overflow? + this->sum += args[parameter::keyword<Tag>::get()]; + } + + result_type result(dont_care) const + { + return this->sum; + } + + private: + + Sample sum; + }; + +} // namespace impl + +/////////////////////////////////////////////////////////////////////////////// +// tag::sum +// tag::sum_of_weights +// tag::sum_of_variates +// +namespace tag +{ + struct sum + : depends_on<> + { + /// INTERNAL ONLY + /// + typedef accumulators::impl::sum_impl<mpl::_1, tag::sample> impl; + }; + + struct sum_of_weights + : depends_on<> + { + typedef mpl::true_ is_weight_accumulator; + /// INTERNAL ONLY + /// + typedef accumulators::impl::sum_impl<mpl::_2, tag::weight> impl; + }; + + template<typename VariateType, typename VariateTag> + struct sum_of_variates + : depends_on<> + { + /// INTERNAL ONLY + /// + typedef mpl::always<accumulators::impl::sum_impl<VariateType, VariateTag> > impl; + }; + + struct abstract_sum_of_variates + : depends_on<> + { + }; +} + +/////////////////////////////////////////////////////////////////////////////// +// extract::sum +// extract::sum_of_weights +// extract::sum_of_variates +// +namespace extract +{ + extractor<tag::sum> const sum = {}; + extractor<tag::sum_of_weights> const sum_of_weights = {}; + extractor<tag::abstract_sum_of_variates> const sum_of_variates = {}; + + BOOST_ACCUMULATORS_IGNORE_GLOBAL(sum) + BOOST_ACCUMULATORS_IGNORE_GLOBAL(sum_of_weights) + BOOST_ACCUMULATORS_IGNORE_GLOBAL(sum_of_variates) +} + +using extract::sum; +using extract::sum_of_weights; +using extract::sum_of_variates; + +// So that mean can be automatically substituted with +// weighted_mean when the weight parameter is non-void. +template<> +struct as_weighted_feature<tag::sum> +{ + typedef tag::weighted_sum type; +}; + +template<> +struct feature_of<tag::weighted_sum> + : feature_of<tag::sum> +{}; + +template<typename VariateType, typename VariateTag> +struct feature_of<tag::sum_of_variates<VariateType, VariateTag> > + : feature_of<tag::abstract_sum_of_variates> +{ +}; + +}} // namespace boost::accumulators + +#endif diff --git a/boost/accumulators/statistics/sum_kahan.hpp b/boost/accumulators/statistics/sum_kahan.hpp new file mode 100644 index 0000000000..97ade18da8 --- /dev/null +++ b/boost/accumulators/statistics/sum_kahan.hpp @@ -0,0 +1,188 @@ +/////////////////////////////////////////////////////////////////////////////// +// sum_kahan.hpp +// +// Copyright 2010 Gaetano Mendola, 2011 Simon West. Distributed under the Boost +// Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#ifndef BOOST_ACCUMULATORS_STATISTICS_SUM_KAHAN_HPP_EAN_26_07_2010 +#define BOOST_ACCUMULATORS_STATISTICS_SUM_KAHAN_HPP_EAN_26_07_2010 + +#include <boost/accumulators/framework/accumulator_base.hpp> +#include <boost/accumulators/framework/parameters/sample.hpp> +#include <boost/accumulators/statistics_fwd.hpp> +#include <boost/accumulators/statistics/sum.hpp> +#include <boost/accumulators/statistics/weighted_sum_kahan.hpp> +#include <boost/numeric/conversion/cast.hpp> + +namespace boost { namespace accumulators +{ + +namespace impl +{ + +#if _MSC_VER > 1400 +# pragma float_control(push) +# pragma float_control(precise, on) +#endif + +template<typename Sample, typename Tag> +struct sum_kahan_impl + : accumulator_base +{ + typedef Sample result_type; + + //////////////////////////////////////////////////////////////////////////// + // sum_kahan_impl + /** + @brief Kahan summation algorithm + + The Kahan summation algorithm reduces the numerical error obtained with standard + sequential sum. + + */ + template<typename Args> + sum_kahan_impl(Args const & args) + : sum(args[parameter::keyword<Tag>::get() | Sample()]), + compensation(boost::numeric_cast<Sample>(0.0)) + { + } + + template<typename Args> + void +#if BOOST_ACCUMULATORS_GCC_VERSION > 40305 + __attribute__((__optimize__("no-associative-math"))) +#endif + operator ()(Args const & args) + { + const Sample myTmp1 = args[parameter::keyword<Tag>::get()] - this->compensation; + const Sample myTmp2 = this->sum + myTmp1; + this->compensation = (myTmp2 - this->sum) - myTmp1; + this->sum = myTmp2; + } + + result_type result(dont_care) const + { + return this->sum; + } + +private: + Sample sum; + Sample compensation; +}; + +#if _MSC_VER > 1400 +# pragma float_control(pop) +#endif + +} // namespace impl + +/////////////////////////////////////////////////////////////////////////////// +// tag::sum_kahan +// tag::sum_of_weights_kahan +// tag::sum_of_variates_kahan +// +namespace tag +{ + + struct sum_kahan + : depends_on<> + { + /// INTERNAL ONLY + /// + typedef impl::sum_kahan_impl< mpl::_1, tag::sample > impl; + }; + + struct sum_of_weights_kahan + : depends_on<> + { + typedef mpl::true_ is_weight_accumulator; + /// INTERNAL ONLY + /// + typedef accumulators::impl::sum_kahan_impl<mpl::_2, tag::weight> impl; + }; + + template<typename VariateType, typename VariateTag> + struct sum_of_variates_kahan + : depends_on<> + { + /// INTERNAL ONLY + /// + typedef mpl::always<accumulators::impl::sum_kahan_impl<VariateType, VariateTag> > impl; + }; + +} // namespace tag + +/////////////////////////////////////////////////////////////////////////////// +// extract::sum_kahan +// extract::sum_of_weights_kahan +// extract::sum_of_variates_kahan +// +namespace extract +{ + extractor<tag::sum_kahan> const sum_kahan = {}; + extractor<tag::sum_of_weights_kahan> const sum_of_weights_kahan = {}; + extractor<tag::abstract_sum_of_variates> const sum_of_variates_kahan = {}; + + BOOST_ACCUMULATORS_IGNORE_GLOBAL(sum_kahan) + BOOST_ACCUMULATORS_IGNORE_GLOBAL(sum_of_weights_kahan) + BOOST_ACCUMULATORS_IGNORE_GLOBAL(sum_of_variates_kahan) +} // namespace extract + +using extract::sum_kahan; +using extract::sum_of_weights_kahan; +using extract::sum_of_variates_kahan; + +// sum(kahan) -> sum_kahan +template<> +struct as_feature<tag::sum(kahan)> +{ + typedef tag::sum_kahan type; +}; + +// sum_of_weights(kahan) -> sum_of_weights_kahan +template<> +struct as_feature<tag::sum_of_weights(kahan)> +{ + typedef tag::sum_of_weights_kahan type; +}; + +// So that sum_kahan can be automatically substituted with +// weighted_sum_kahan when the weight parameter is non-void. +template<> +struct as_weighted_feature<tag::sum_kahan> +{ + typedef tag::weighted_sum_kahan type; +}; + +template<> +struct feature_of<tag::weighted_sum_kahan> + : feature_of<tag::sum> +{}; + +// for the purposes of feature-based dependency resolution, +// sum_kahan provides the same feature as sum +template<> +struct feature_of<tag::sum_kahan> + : feature_of<tag::sum> +{ +}; + +// for the purposes of feature-based dependency resolution, +// sum_of_weights_kahan provides the same feature as sum_of_weights +template<> +struct feature_of<tag::sum_of_weights_kahan> + : feature_of<tag::sum_of_weights> +{ +}; + +template<typename VariateType, typename VariateTag> +struct feature_of<tag::sum_of_variates_kahan<VariateType, VariateTag> > + : feature_of<tag::abstract_sum_of_variates> +{ +}; + +}} // namespace boost::accumulators + +#endif + diff --git a/boost/accumulators/statistics/tail.hpp b/boost/accumulators/statistics/tail.hpp new file mode 100644 index 0000000000..159a9b67db --- /dev/null +++ b/boost/accumulators/statistics/tail.hpp @@ -0,0 +1,334 @@ +/////////////////////////////////////////////////////////////////////////////// +// tail.hpp +// +// Copyright 2005 Eric Niebler, Michael Gauckler. Distributed under the Boost +// Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#ifndef BOOST_ACCUMULATORS_STATISTICS_TAIL_HPP_EAN_28_10_2005 +#define BOOST_ACCUMULATORS_STATISTICS_TAIL_HPP_EAN_28_10_2005 + +#include <vector> +#include <functional> +#include <boost/assert.hpp> +#include <boost/range.hpp> +#include <boost/mpl/if.hpp> +#include <boost/mpl/or.hpp> +#include <boost/mpl/placeholders.hpp> +#include <boost/parameter/keyword.hpp> +#include <boost/iterator/reverse_iterator.hpp> +#include <boost/iterator/permutation_iterator.hpp> +#include <boost/accumulators/framework/accumulator_base.hpp> +#include <boost/accumulators/framework/extractor.hpp> +#include <boost/accumulators/numeric/functional.hpp> +#include <boost/accumulators/framework/parameters/sample.hpp> +#include <boost/accumulators/framework/depends_on.hpp> +#include <boost/accumulators/statistics_fwd.hpp> + +namespace boost { namespace accumulators +{ +/////////////////////////////////////////////////////////////////////////////// +// cache_size named parameters +BOOST_PARAMETER_NESTED_KEYWORD(tag, right_tail_cache_size, cache_size) +BOOST_PARAMETER_NESTED_KEYWORD(tag, left_tail_cache_size, cache_size) + +namespace detail +{ + /////////////////////////////////////////////////////////////////////////////// + // tail_range + /// INTERNAL ONLY + /// + template<typename ElementIterator, typename IndexIterator> + struct tail_range + { + typedef boost::iterator_range< + boost::reverse_iterator<boost::permutation_iterator<ElementIterator, IndexIterator> > + > type; + }; + + /////////////////////////////////////////////////////////////////////////////// + // make_tail_range + /// INTERNAL ONLY + /// + template<typename ElementIterator, typename IndexIterator> + typename tail_range<ElementIterator, IndexIterator>::type + make_tail_range(ElementIterator elem_begin, IndexIterator index_begin, IndexIterator index_end) + { + return boost::make_iterator_range( + boost::make_reverse_iterator( + boost::make_permutation_iterator(elem_begin, index_end) + ) + , boost::make_reverse_iterator( + boost::make_permutation_iterator(elem_begin, index_begin) + ) + ); + } + + /////////////////////////////////////////////////////////////////////////////// + // stat_assign_visitor + /// INTERNAL ONLY + /// + template<typename Args> + struct stat_assign_visitor + { + stat_assign_visitor(Args const &a, std::size_t i) + : args(a) + , index(i) + { + } + + template<typename Stat> + void operator ()(Stat &stat) const + { + stat.assign(this->args, this->index); + } + + private: + stat_assign_visitor &operator =(stat_assign_visitor const &); + Args const &args; + std::size_t index; + }; + + /////////////////////////////////////////////////////////////////////////////// + // stat_assign + /// INTERNAL ONLY + /// + template<typename Args> + inline stat_assign_visitor<Args> const stat_assign(Args const &args, std::size_t index) + { + return stat_assign_visitor<Args>(args, index); + } + + /////////////////////////////////////////////////////////////////////////////// + // is_tail_variate_feature + /// INTERNAL ONLY + /// + template<typename Stat, typename LeftRight> + struct is_tail_variate_feature + : mpl::false_ + { + }; + + /// INTERNAL ONLY + /// + template<typename VariateType, typename VariateTag, typename LeftRight> + struct is_tail_variate_feature<tag::tail_variate<VariateType, VariateTag, LeftRight>, LeftRight> + : mpl::true_ + { + }; + + /// INTERNAL ONLY + /// + template<typename LeftRight> + struct is_tail_variate_feature<tag::tail_weights<LeftRight>, LeftRight> + : mpl::true_ + { + }; + +} // namespace detail + +namespace impl +{ + /////////////////////////////////////////////////////////////////////////////// + // tail_impl + template<typename Sample, typename LeftRight> + struct tail_impl + : accumulator_base + { + // LeftRight must be either right or left + BOOST_MPL_ASSERT(( + mpl::or_<is_same<LeftRight, right>, is_same<LeftRight, left> > + )); + + typedef + typename mpl::if_< + is_same<LeftRight, right> + , numeric::functional::greater<Sample const, Sample const> + , numeric::functional::less<Sample const, Sample const> + >::type + predicate_type; + + // for boost::result_of + typedef typename detail::tail_range< + typename std::vector<Sample>::const_iterator + , std::vector<std::size_t>::iterator + >::type result_type; + + template<typename Args> + tail_impl(Args const &args) + : is_sorted(false) + , indices() + , samples(args[tag::tail<LeftRight>::cache_size], args[sample | Sample()]) + { + this->indices.reserve(this->samples.size()); + } + + tail_impl(tail_impl const &that) + : is_sorted(that.is_sorted) + , indices(that.indices) + , samples(that.samples) + { + this->indices.reserve(this->samples.size()); + } + + // This just stores the heap and the samples. + // In operator()() below, if we are adding a new sample + // to the sample cache, we force all the + // tail_variates to update also. (It's not + // good enough to wait for the accumulator_set to do it + // for us because then information about whether a sample + // was stored and where is lost, and would need to be + // queried at runtime, which would be slow.) This is + // implemented as a filtered visitation over the stats, + // which we can access because args[accumulator] gives us + // all the stats. + + template<typename Args> + void operator ()(Args const &args) + { + if(this->indices.size() < this->samples.size()) + { + this->indices.push_back(this->indices.size()); + this->assign(args, this->indices.back()); + } + else if(predicate_type()(args[sample], this->samples[this->indices[0]])) + { + std::pop_heap(this->indices.begin(), this->indices.end(), indirect_cmp(this->samples)); + this->assign(args, this->indices.back()); + } + } + + result_type result(dont_care) const + { + if(!this->is_sorted) + { + // Must use the same predicate here as in push_heap/pop_heap above. + std::sort_heap(this->indices.begin(), this->indices.end(), indirect_cmp(this->samples)); + // sort_heap puts elements in reverse order. Calling std::reverse + // turns the sorted sequence back into a valid heap. + std::reverse(this->indices.begin(), this->indices.end()); + this->is_sorted = true; + } + + return detail::make_tail_range( + this->samples.begin() + , this->indices.begin() + , this->indices.end() + ); + } + + private: + + struct is_tail_variate + { + template<typename T> + struct apply + : detail::is_tail_variate_feature< + typename detail::feature_tag<T>::type + , LeftRight + > + {}; + }; + + template<typename Args> + void assign(Args const &args, std::size_t index) + { + BOOST_ASSERT(index < this->samples.size()); + this->samples[index] = args[sample]; + std::push_heap(this->indices.begin(), this->indices.end(), indirect_cmp(this->samples)); + this->is_sorted = false; + // Tell the tail variates to store their values also + args[accumulator].template visit_if<is_tail_variate>(detail::stat_assign(args, index)); + } + + /////////////////////////////////////////////////////////////////////////////// + // + struct indirect_cmp + : std::binary_function<std::size_t, std::size_t, bool> + { + indirect_cmp(std::vector<Sample> const &s) + : samples(s) + { + } + + bool operator ()(std::size_t left, std::size_t right) const + { + return predicate_type()(this->samples[left], this->samples[right]); + } + + private: + indirect_cmp &operator =(indirect_cmp const &); + std::vector<Sample> const &samples; + }; + + mutable bool is_sorted; + mutable std::vector<std::size_t> indices; + std::vector<Sample> samples; + }; + +} // namespace impl + +// TODO The templatized tag::tail below should inherit from the correct named parameter. +// The following lines provide a workaround, but there must be a better way of doing this. +template<typename T> +struct tail_cache_size_named_arg +{ +}; +template<> +struct tail_cache_size_named_arg<left> + : tag::left_tail_cache_size +{ +}; +template<> +struct tail_cache_size_named_arg<right> + : tag::right_tail_cache_size +{ +}; + +/////////////////////////////////////////////////////////////////////////////// +// tag::tail<> +// +namespace tag +{ + template<typename LeftRight> + struct tail + : depends_on<> + , tail_cache_size_named_arg<LeftRight> + { + /// INTERNAL ONLY + /// + typedef accumulators::impl::tail_impl<mpl::_1, LeftRight> impl; + + #ifdef BOOST_ACCUMULATORS_DOXYGEN_INVOKED + /// tag::tail<LeftRight>::cache_size named parameter + static boost::parameter::keyword<tail_cache_size_named_arg<LeftRight> > const cache_size; + #endif + }; + + struct abstract_tail + : depends_on<> + { + }; +} + +/////////////////////////////////////////////////////////////////////////////// +// extract::tail +// +namespace extract +{ + extractor<tag::abstract_tail> const tail = {}; + + BOOST_ACCUMULATORS_IGNORE_GLOBAL(tail) +} + +using extract::tail; + +template<typename LeftRight> +struct feature_of<tag::tail<LeftRight> > + : feature_of<tag::abstract_tail> +{ +}; + +}} // namespace boost::accumulators + +#endif diff --git a/boost/accumulators/statistics/tail_mean.hpp b/boost/accumulators/statistics/tail_mean.hpp new file mode 100644 index 0000000000..323eeb0f91 --- /dev/null +++ b/boost/accumulators/statistics/tail_mean.hpp @@ -0,0 +1,246 @@ +/////////////////////////////////////////////////////////////////////////////// +// tail_mean.hpp +// +// Copyright 2006 Daniel Egloff, Olivier Gygi. Distributed under the Boost +// Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#ifndef BOOST_ACCUMULATORS_STATISTICS_TAIL_MEAN_HPP_DE_01_01_2006 +#define BOOST_ACCUMULATORS_STATISTICS_TAIL_MEAN_HPP_DE_01_01_2006 + +#include <numeric> +#include <vector> +#include <limits> +#include <functional> +#include <sstream> +#include <stdexcept> +#include <boost/throw_exception.hpp> +#include <boost/parameter/keyword.hpp> +#include <boost/mpl/placeholders.hpp> +#include <boost/type_traits/is_same.hpp> +#include <boost/accumulators/framework/accumulator_base.hpp> +#include <boost/accumulators/framework/extractor.hpp> +#include <boost/accumulators/numeric/functional.hpp> +#include <boost/accumulators/framework/parameters/sample.hpp> +#include <boost/accumulators/statistics_fwd.hpp> +#include <boost/accumulators/statistics/count.hpp> +#include <boost/accumulators/statistics/tail.hpp> +#include <boost/accumulators/statistics/tail_quantile.hpp> +#include <boost/accumulators/statistics/parameters/quantile_probability.hpp> + +#ifdef _MSC_VER +# pragma warning(push) +# pragma warning(disable: 4127) // conditional expression is constant +#endif + +namespace boost { namespace accumulators +{ + +namespace impl +{ + + /////////////////////////////////////////////////////////////////////////////// + // coherent_tail_mean_impl + // + /** + @brief Estimation of the coherent tail mean based on order statistics (for both left and right tails) + + The coherent tail mean \f$\widehat{CTM}_{n,\alpha}(X)\f$ is equal to the non-coherent tail mean \f$\widehat{NCTM}_{n,\alpha}(X)\f$ + plus a correction term that ensures coherence in case of non-continuous distributions. + + \f[ + \widehat{CTM}_{n,\alpha}^{\mathrm{right}}(X) = \widehat{NCTM}_{n,\alpha}^{\mathrm{right}}(X) + + \frac{1}{\lceil n(1-\alpha)\rceil}\hat{q}_{n,\alpha}(X)\left(1 - \alpha - \frac{1}{n}\lceil n(1-\alpha)\rceil \right) + \f] + + \f[ + \widehat{CTM}_{n,\alpha}^{\mathrm{left}}(X) = \widehat{NCTM}_{n,\alpha}^{\mathrm{left}}(X) + + \frac{1}{\lceil n\alpha\rceil}\hat{q}_{n,\alpha}(X)\left(\alpha - \frac{1}{n}\lceil n\alpha\rceil \right) + \f] + */ + template<typename Sample, typename LeftRight> + struct coherent_tail_mean_impl + : accumulator_base + { + typedef typename numeric::functional::average<Sample, std::size_t>::result_type float_type; + // for boost::result_of + typedef float_type result_type; + + coherent_tail_mean_impl(dont_care) {} + + template<typename Args> + result_type result(Args const &args) const + { + std::size_t cnt = count(args); + + std::size_t n = static_cast<std::size_t>( + std::ceil( + cnt * ( ( is_same<LeftRight, left>::value ) ? args[quantile_probability] : 1. - args[quantile_probability] ) + ) + ); + + extractor<tag::non_coherent_tail_mean<LeftRight> > const some_non_coherent_tail_mean = {}; + + return some_non_coherent_tail_mean(args) + + numeric::average(quantile(args), n) + * ( + ( is_same<LeftRight, left>::value ) ? args[quantile_probability] : 1. - args[quantile_probability] + - numeric::average(n, count(args)) + ); + } + }; + + /////////////////////////////////////////////////////////////////////////////// + // non_coherent_tail_mean_impl + // + /** + @brief Estimation of the (non-coherent) tail mean based on order statistics (for both left and right tails) + + An estimation of the non-coherent tail mean \f$\widehat{NCTM}_{n,\alpha}(X)\f$ is given by the mean of the + \f$\lceil n\alpha\rceil\f$ smallest samples (left tail) or the mean of the \f$\lceil n(1-\alpha)\rceil\f$ + largest samples (right tail), \f$n\f$ being the total number of samples and \f$\alpha\f$ the quantile level: + + \f[ + \widehat{NCTM}_{n,\alpha}^{\mathrm{right}}(X) = \frac{1}{\lceil n(1-\alpha)\rceil} \sum_{i=\lceil \alpha n \rceil}^n X_{i:n} + \f] + + \f[ + \widehat{NCTM}_{n,\alpha}^{\mathrm{left}}(X) = \frac{1}{\lceil n\alpha\rceil} \sum_{i=1}^{\lceil \alpha n \rceil} X_{i:n} + \f] + + It thus requires the caching of at least the \f$\lceil n\alpha\rceil\f$ smallest or the \f$\lceil n(1-\alpha)\rceil\f$ + largest samples. + + @param quantile_probability + */ + template<typename Sample, typename LeftRight> + struct non_coherent_tail_mean_impl + : accumulator_base + { + typedef typename numeric::functional::average<Sample, std::size_t>::result_type float_type; + // for boost::result_of + typedef float_type result_type; + + non_coherent_tail_mean_impl(dont_care) {} + + template<typename Args> + result_type result(Args const &args) const + { + std::size_t cnt = count(args); + + std::size_t n = static_cast<std::size_t>( + std::ceil( + cnt * ( ( is_same<LeftRight, left>::value ) ? args[quantile_probability] : 1. - args[quantile_probability] ) + ) + ); + + // If n is in a valid range, return result, otherwise return NaN or throw exception + if (n <= static_cast<std::size_t>(tail(args).size())) + return numeric::average( + std::accumulate( + tail(args).begin() + , tail(args).begin() + n + , Sample(0) + ) + , n + ); + else + { + if (std::numeric_limits<result_type>::has_quiet_NaN) + { + return std::numeric_limits<result_type>::quiet_NaN(); + } + else + { + std::ostringstream msg; + msg << "index n = " << n << " is not in valid range [0, " << tail(args).size() << ")"; + boost::throw_exception(std::runtime_error(msg.str())); + return Sample(0); + } + } + } + }; + +} // namespace impl + + +/////////////////////////////////////////////////////////////////////////////// +// tag::coherent_tail_mean<> +// tag::non_coherent_tail_mean<> +// +namespace tag +{ + template<typename LeftRight> + struct coherent_tail_mean + : depends_on<count, quantile, non_coherent_tail_mean<LeftRight> > + { + typedef accumulators::impl::coherent_tail_mean_impl<mpl::_1, LeftRight> impl; + }; + + template<typename LeftRight> + struct non_coherent_tail_mean + : depends_on<count, tail<LeftRight> > + { + typedef accumulators::impl::non_coherent_tail_mean_impl<mpl::_1, LeftRight> impl; + }; + + struct abstract_non_coherent_tail_mean + : depends_on<> + { + }; +} + +/////////////////////////////////////////////////////////////////////////////// +// extract::non_coherent_tail_mean; +// extract::coherent_tail_mean; +// +namespace extract +{ + extractor<tag::abstract_non_coherent_tail_mean> const non_coherent_tail_mean = {}; + extractor<tag::tail_mean> const coherent_tail_mean = {}; + + BOOST_ACCUMULATORS_IGNORE_GLOBAL(non_coherent_tail_mean) + BOOST_ACCUMULATORS_IGNORE_GLOBAL(coherent_tail_mean) +} + +using extract::non_coherent_tail_mean; +using extract::coherent_tail_mean; + +// for the purposes of feature-based dependency resolution, +// coherent_tail_mean<LeftRight> provides the same feature as tail_mean +template<typename LeftRight> +struct feature_of<tag::coherent_tail_mean<LeftRight> > + : feature_of<tag::tail_mean> +{ +}; + +template<typename LeftRight> +struct feature_of<tag::non_coherent_tail_mean<LeftRight> > + : feature_of<tag::abstract_non_coherent_tail_mean> +{ +}; + +// So that non_coherent_tail_mean can be automatically substituted +// with weighted_non_coherent_tail_mean when the weight parameter is non-void. +template<typename LeftRight> +struct as_weighted_feature<tag::non_coherent_tail_mean<LeftRight> > +{ + typedef tag::non_coherent_weighted_tail_mean<LeftRight> type; +}; + +template<typename LeftRight> +struct feature_of<tag::non_coherent_weighted_tail_mean<LeftRight> > + : feature_of<tag::non_coherent_tail_mean<LeftRight> > +{}; + +// NOTE that non_coherent_tail_mean cannot be feature-grouped with tail_mean, +// which is the base feature for coherent tail means, since (at least for +// non-continuous distributions) non_coherent_tail_mean is a different measure! + +}} // namespace boost::accumulators + +#ifdef _MSC_VER +# pragma warning(pop) +#endif + +#endif diff --git a/boost/accumulators/statistics/tail_quantile.hpp b/boost/accumulators/statistics/tail_quantile.hpp new file mode 100644 index 0000000000..d3b96b4968 --- /dev/null +++ b/boost/accumulators/statistics/tail_quantile.hpp @@ -0,0 +1,158 @@ +/////////////////////////////////////////////////////////////////////////////// +// tail_quantile.hpp +// +// Copyright 2006 Daniel Egloff, Olivier Gygi. Distributed under the Boost +// Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#ifndef BOOST_ACCUMULATORS_STATISTICS_TAIL_QUANTILE_HPP_DE_01_01_2006 +#define BOOST_ACCUMULATORS_STATISTICS_TAIL_QUANTILE_HPP_DE_01_01_2006 + +#include <vector> +#include <limits> +#include <functional> +#include <sstream> +#include <stdexcept> +#include <boost/config/no_tr1/cmath.hpp> // For ceil +#include <boost/throw_exception.hpp> +#include <boost/parameter/keyword.hpp> +#include <boost/mpl/placeholders.hpp> +#include <boost/mpl/if.hpp> +#include <boost/type_traits/is_same.hpp> +#include <boost/accumulators/framework/depends_on.hpp> +#include <boost/accumulators/framework/accumulator_base.hpp> +#include <boost/accumulators/framework/extractor.hpp> +#include <boost/accumulators/numeric/functional.hpp> +#include <boost/accumulators/framework/parameters/sample.hpp> +#include <boost/accumulators/statistics_fwd.hpp> +#include <boost/accumulators/statistics/tail.hpp> +#include <boost/accumulators/statistics/count.hpp> +#include <boost/accumulators/statistics/parameters/quantile_probability.hpp> + +#ifdef _MSC_VER +# pragma warning(push) +# pragma warning(disable: 4127) // conditional expression is constant +#endif + +namespace boost { namespace accumulators +{ + +namespace impl +{ + /////////////////////////////////////////////////////////////////////////////// + // tail_quantile_impl + // Tail quantile estimation based on order statistics + /** + @brief Tail quantile estimation based on order statistics (for both left and right tails) + + The estimation of a tail quantile \f$\hat{q}\f$ with level \f$\alpha\f$ based on order statistics requires the + chaching of at least the \f$\lceil n\alpha\rceil\f$ smallest or the \f$\lceil n(1-\alpha)\rceil\f$ largest samples, + \f$n\f$ being the total number of samples. The largest of the \f$\lceil n\alpha\rceil\f$ smallest samples or the + smallest of the \f$\lceil n(1-\alpha)\rceil\f$ largest samples provides an estimate for the quantile: + + \f[ + \hat{q}_{n,\alpha} = X_{\lceil \alpha n \rceil:n} + \f] + + @param quantile_probability + */ + template<typename Sample, typename LeftRight> + struct tail_quantile_impl + : accumulator_base + { + // for boost::result_of + typedef Sample result_type; + + tail_quantile_impl(dont_care) {} + + template<typename Args> + result_type result(Args const &args) const + { + std::size_t cnt = count(args); + + std::size_t n = static_cast<std::size_t>( + std::ceil( + cnt * ( ( is_same<LeftRight, left>::value ) ? args[quantile_probability] : 1. - args[quantile_probability] ) + ) + ); + + // If n is in a valid range, return result, otherwise return NaN or throw exception + if ( n < static_cast<std::size_t>(tail(args).size())) + { + // Note that the cached samples of the left are sorted in ascending order, + // whereas the samples of the right tail are sorted in descending order + return *(boost::begin(tail(args)) + n - 1); + } + else + { + if (std::numeric_limits<result_type>::has_quiet_NaN) + { + return std::numeric_limits<result_type>::quiet_NaN(); + } + else + { + std::ostringstream msg; + msg << "index n = " << n << " is not in valid range [0, " << tail(args).size() << ")"; + boost::throw_exception(std::runtime_error(msg.str())); + return Sample(0); + } + } + } + }; +} // namespace impl + +/////////////////////////////////////////////////////////////////////////////// +// tag::tail_quantile<> +// +namespace tag +{ + template<typename LeftRight> + struct tail_quantile + : depends_on<count, tail<LeftRight> > + { + /// INTERNAL ONLY + /// + typedef accumulators::impl::tail_quantile_impl<mpl::_1, LeftRight> impl; + }; +} + +/////////////////////////////////////////////////////////////////////////////// +// extract::tail_quantile +// +namespace extract +{ + extractor<tag::quantile> const tail_quantile = {}; + + BOOST_ACCUMULATORS_IGNORE_GLOBAL(tail_quantile) +} + +using extract::tail_quantile; + +// for the purposes of feature-based dependency resolution, +// tail_quantile<LeftRight> provide the same feature as quantile +template<typename LeftRight> +struct feature_of<tag::tail_quantile<LeftRight> > + : feature_of<tag::quantile> +{ +}; + +// So that tail_quantile can be automatically substituted with +// weighted_tail_quantile when the weight parameter is non-void. +template<typename LeftRight> +struct as_weighted_feature<tag::tail_quantile<LeftRight> > +{ + typedef tag::weighted_tail_quantile<LeftRight> type; +}; + +template<typename LeftRight> +struct feature_of<tag::weighted_tail_quantile<LeftRight> > + : feature_of<tag::tail_quantile<LeftRight> > +{}; + +}} // namespace boost::accumulators + +#ifdef _MSC_VER +# pragma warning(pop) +#endif + +#endif diff --git a/boost/accumulators/statistics/tail_variate.hpp b/boost/accumulators/statistics/tail_variate.hpp new file mode 100644 index 0000000000..a9fc7d297a --- /dev/null +++ b/boost/accumulators/statistics/tail_variate.hpp @@ -0,0 +1,141 @@ +/////////////////////////////////////////////////////////////////////////////// +// tail_variate.hpp +// +// Copyright 2005 Eric Niebler, Michael Gauckler. Distributed under the Boost +// Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#ifndef BOOST_STAT_STATISTICS_TAIL_VARIATE_HPP_EAN_28_10_2005 +#define BOOST_STAT_STATISTICS_TAIL_VARIATE_HPP_EAN_28_10_2005 + +#include <boost/range.hpp> +#include <boost/mpl/always.hpp> +#include <boost/mpl/placeholders.hpp> +#include <boost/iterator/reverse_iterator.hpp> +#include <boost/iterator/permutation_iterator.hpp> +#include <boost/accumulators/framework/accumulator_base.hpp> +#include <boost/accumulators/framework/extractor.hpp> +#include <boost/accumulators/framework/depends_on.hpp> +#include <boost/accumulators/statistics_fwd.hpp> +#include <boost/accumulators/statistics/tail.hpp> + +namespace boost { namespace accumulators +{ + +namespace impl +{ + /////////////////////////////////////////////////////////////////////////////// + // tail_variate_impl + template<typename VariateType, typename VariateTag, typename LeftRight> + struct tail_variate_impl + : accumulator_base + { + // for boost::result_of + typedef + typename detail::tail_range< + typename std::vector<VariateType>::const_iterator + , std::vector<std::size_t>::iterator + >::type + result_type; + + template<typename Args> + tail_variate_impl(Args const &args) + : variates(args[tag::tail<LeftRight>::cache_size], args[parameter::keyword<VariateTag>::get() | VariateType()]) + { + } + + template<typename Args> + void assign(Args const &args, std::size_t index) + { + this->variates[index] = args[parameter::keyword<VariateTag>::get()]; + } + + template<typename Args> + result_type result(Args const &args) const + { + // getting the order result causes the indices vector to be sorted. + extractor<tag::tail<LeftRight> > const some_tail = {}; + return this->do_result(some_tail(args)); + } + + private: + template<typename TailRng> + result_type do_result(TailRng const &rng) const + { + return detail::make_tail_range( + this->variates.begin() + , rng.end().base().base() // the index iterator + , rng.begin().base().base() // (begin and end reversed because these are reverse iterators) + ); + } + + std::vector<VariateType> variates; + }; + +} // namespace impl + +/////////////////////////////////////////////////////////////////////////////// +// tag::tail_variate<> +// +namespace tag +{ + template<typename VariateType, typename VariateTag, typename LeftRight> + struct tail_variate + : depends_on<tail<LeftRight> > + { + /// INTERNAL ONLY + /// + typedef mpl::always<accumulators::impl::tail_variate_impl<VariateType, VariateTag, LeftRight> > impl; + }; + + struct abstract_tail_variate + : depends_on<> + { + }; + + template<typename LeftRight> + struct tail_weights + : depends_on<tail<LeftRight> > + { + /// INTERNAL ONLY + /// + typedef accumulators::impl::tail_variate_impl<mpl::_2, tag::weight, LeftRight> impl; + }; + + struct abstract_tail_weights + : depends_on<> + { + }; +} + +/////////////////////////////////////////////////////////////////////////////// +// extract::tail_variate +// extract::tail_weights +// +namespace extract +{ + extractor<tag::abstract_tail_variate> const tail_variate = {}; + extractor<tag::abstract_tail_weights> const tail_weights = {}; + + BOOST_ACCUMULATORS_IGNORE_GLOBAL(tail_variate) + BOOST_ACCUMULATORS_IGNORE_GLOBAL(tail_weights) +} + +using extract::tail_variate; +using extract::tail_weights; + +template<typename VariateType, typename VariateTag, typename LeftRight> +struct feature_of<tag::tail_variate<VariateType, VariateTag, LeftRight> > + : feature_of<tag::abstract_tail_variate> +{ +}; + +template<typename LeftRight> +struct feature_of<tag::tail_weights<LeftRight> > +{ + typedef tag::abstract_tail_weights type; +}; + +}} // namespace boost::accumulators + +#endif diff --git a/boost/accumulators/statistics/tail_variate_means.hpp b/boost/accumulators/statistics/tail_variate_means.hpp new file mode 100644 index 0000000000..98991e217b --- /dev/null +++ b/boost/accumulators/statistics/tail_variate_means.hpp @@ -0,0 +1,258 @@ +/////////////////////////////////////////////////////////////////////////////// +// tail_variate_means.hpp +// +// Copyright 2006 Daniel Egloff, Olivier Gygi. Distributed under the Boost +// Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#ifndef BOOST_ACCUMULATORS_STATISTICS_TAIL_VARIATE_MEANS_HPP_DE_01_01_2006 +#define BOOST_ACCUMULATORS_STATISTICS_TAIL_VARIATE_MEANS_HPP_DE_01_01_2006 + +#include <numeric> +#include <vector> +#include <limits> +#include <functional> +#include <sstream> +#include <stdexcept> +#include <boost/throw_exception.hpp> +#include <boost/parameter/keyword.hpp> +#include <boost/mpl/placeholders.hpp> +#include <boost/type_traits/is_same.hpp> +#include <boost/accumulators/framework/accumulator_base.hpp> +#include <boost/accumulators/framework/extractor.hpp> +#include <boost/accumulators/numeric/functional.hpp> +#include <boost/accumulators/framework/parameters/sample.hpp> +#include <boost/accumulators/statistics_fwd.hpp> +#include <boost/accumulators/statistics/tail.hpp> +#include <boost/accumulators/statistics/tail_variate.hpp> +#include <boost/accumulators/statistics/tail_mean.hpp> +#include <boost/accumulators/statistics/parameters/quantile_probability.hpp> + +#ifdef _MSC_VER +# pragma warning(push) +# pragma warning(disable: 4127) // conditional expression is constant +#endif + +namespace boost { namespace accumulators +{ + +namespace impl +{ + /** + @brief Estimation of the absolute and relative tail variate means (for both left and right tails) + + For all \f$j\f$-th variates associated to the \f$\lceil n(1-\alpha)\rceil\f$ largest samples (or the + \f$\lceil n(1-\alpha)\rceil\f$ smallest samples in case of the left tail), the absolute tail means + \f$\widehat{ATM}_{n,\alpha}(X, j)\f$ are computed and returned as an iterator range. Alternatively, + the relative tail means \f$\widehat{RTM}_{n,\alpha}(X, j)\f$ are returned, which are the absolute + tail means normalized with the (non-coherent) sample tail mean \f$\widehat{NCTM}_{n,\alpha}(X)\f$. + + \f[ + \widehat{ATM}_{n,\alpha}^{\mathrm{right}}(X, j) = + \frac{1}{\lceil n(1-\alpha) \rceil} + \sum_{i=\lceil \alpha n \rceil}^n \xi_{j,i} + \f] + + \f[ + \widehat{ATM}_{n,\alpha}^{\mathrm{left}}(X, j) = + \frac{1}{\lceil n\alpha \rceil} + \sum_{i=1}^{\lceil n\alpha \rceil} \xi_{j,i} + \f] + + \f[ + \widehat{RTM}_{n,\alpha}^{\mathrm{right}}(X, j) = + \frac{\sum_{i=\lceil n\alpha \rceil}^n \xi_{j,i}} + {\lceil n(1-\alpha)\rceil\widehat{NCTM}_{n,\alpha}^{\mathrm{right}}(X)} + \f] + + \f[ + \widehat{RTM}_{n,\alpha}^{\mathrm{left}}(X, j) = + \frac{\sum_{i=1}^{\lceil n\alpha \rceil} \xi_{j,i}} + {\lceil n\alpha\rceil\widehat{NCTM}_{n,\alpha}^{\mathrm{left}}(X)} + \f] + */ + + /////////////////////////////////////////////////////////////////////////////// + // tail_variate_means_impl + // by default: absolute tail_variate_means + template<typename Sample, typename Impl, typename LeftRight, typename VariateTag> + struct tail_variate_means_impl + : accumulator_base + { + typedef typename numeric::functional::average<Sample, std::size_t>::result_type float_type; + typedef std::vector<float_type> array_type; + // for boost::result_of + typedef iterator_range<typename array_type::iterator> result_type; + + tail_variate_means_impl(dont_care) {} + + template<typename Args> + result_type result(Args const &args) const + { + std::size_t cnt = count(args); + + std::size_t n = static_cast<std::size_t>( + std::ceil( + cnt * ( ( is_same<LeftRight, left>::value ) ? args[quantile_probability] : 1. - args[quantile_probability] ) + ) + ); + + std::size_t num_variates = tail_variate(args).begin()->size(); + + this->tail_means_.clear(); + this->tail_means_.resize(num_variates, Sample(0)); + + // If n is in a valid range, return result, otherwise return NaN or throw exception + if (n < static_cast<std::size_t>(tail(args).size())) + { + this->tail_means_ = std::accumulate( + tail_variate(args).begin() + , tail_variate(args).begin() + n + , this->tail_means_ + , numeric::plus + ); + + float_type factor = n * ( (is_same<Impl, relative>::value) ? non_coherent_tail_mean(args) : 1. ); + + std::transform( + this->tail_means_.begin() + , this->tail_means_.end() + , this->tail_means_.begin() + , std::bind2nd(std::divides<float_type>(), factor) + ); + } + else + { + if (std::numeric_limits<float_type>::has_quiet_NaN) + { + std::fill( + this->tail_means_.begin() + , this->tail_means_.end() + , std::numeric_limits<float_type>::quiet_NaN() + ); + } + else + { + std::ostringstream msg; + msg << "index n = " << n << " is not in valid range [0, " << tail(args).size() << ")"; + boost::throw_exception(std::runtime_error(msg.str())); + } + } + return make_iterator_range(this->tail_means_); + } + + private: + + mutable array_type tail_means_; + + }; + +} // namespace impl + +/////////////////////////////////////////////////////////////////////////////// +// tag::absolute_tail_variate_means +// tag::relative_tail_variate_means +// +namespace tag +{ + template<typename LeftRight, typename VariateType, typename VariateTag> + struct absolute_tail_variate_means + : depends_on<count, non_coherent_tail_mean<LeftRight>, tail_variate<VariateType, VariateTag, LeftRight> > + { + typedef accumulators::impl::tail_variate_means_impl<mpl::_1, absolute, LeftRight, VariateTag> impl; + }; + template<typename LeftRight, typename VariateType, typename VariateTag> + struct relative_tail_variate_means + : depends_on<count, non_coherent_tail_mean<LeftRight>, tail_variate<VariateType, VariateTag, LeftRight> > + { + typedef accumulators::impl::tail_variate_means_impl<mpl::_1, relative, LeftRight, VariateTag> impl; + }; + struct abstract_absolute_tail_variate_means + : depends_on<> + { + }; + struct abstract_relative_tail_variate_means + : depends_on<> + { + }; +} + +/////////////////////////////////////////////////////////////////////////////// +// extract::tail_variate_means +// extract::relative_tail_variate_means +// +namespace extract +{ + extractor<tag::abstract_absolute_tail_variate_means> const tail_variate_means = {}; + extractor<tag::abstract_relative_tail_variate_means> const relative_tail_variate_means = {}; + + BOOST_ACCUMULATORS_IGNORE_GLOBAL(tail_variate_means) + BOOST_ACCUMULATORS_IGNORE_GLOBAL(relative_tail_variate_means) +} + +using extract::tail_variate_means; +using extract::relative_tail_variate_means; + +// tail_variate_means<LeftRight, VariateType, VariateTag>(absolute) -> absolute_tail_variate_means<LeftRight, VariateType, VariateTag> +template<typename LeftRight, typename VariateType, typename VariateTag> +struct as_feature<tag::tail_variate_means<LeftRight, VariateType, VariateTag>(absolute)> +{ + typedef tag::absolute_tail_variate_means<LeftRight, VariateType, VariateTag> type; +}; + +// tail_variate_means<LeftRight, VariateType, VariateTag>(relative) ->relative_tail_variate_means<LeftRight, VariateType, VariateTag> +template<typename LeftRight, typename VariateType, typename VariateTag> +struct as_feature<tag::tail_variate_means<LeftRight, VariateType, VariateTag>(relative)> +{ + typedef tag::relative_tail_variate_means<LeftRight, VariateType, VariateTag> type; +}; + +// Provides non-templatized extractor +template<typename LeftRight, typename VariateType, typename VariateTag> +struct feature_of<tag::absolute_tail_variate_means<LeftRight, VariateType, VariateTag> > + : feature_of<tag::abstract_absolute_tail_variate_means> +{ +}; + +// Provides non-templatized extractor +template<typename LeftRight, typename VariateType, typename VariateTag> +struct feature_of<tag::relative_tail_variate_means<LeftRight, VariateType, VariateTag> > + : feature_of<tag::abstract_relative_tail_variate_means> +{ +}; + +// So that absolute_tail_means can be automatically substituted +// with absolute_weighted_tail_means when the weight parameter is non-void. +template<typename LeftRight, typename VariateType, typename VariateTag> +struct as_weighted_feature<tag::absolute_tail_variate_means<LeftRight, VariateType, VariateTag> > +{ + typedef tag::absolute_weighted_tail_variate_means<LeftRight, VariateType, VariateTag> type; +}; + +template<typename LeftRight, typename VariateType, typename VariateTag> +struct feature_of<tag::absolute_weighted_tail_variate_means<LeftRight, VariateType, VariateTag> > + : feature_of<tag::absolute_tail_variate_means<LeftRight, VariateType, VariateTag> > +{ +}; + +// So that relative_tail_means can be automatically substituted +// with relative_weighted_tail_means when the weight parameter is non-void. +template<typename LeftRight, typename VariateType, typename VariateTag> +struct as_weighted_feature<tag::relative_tail_variate_means<LeftRight, VariateType, VariateTag> > +{ + typedef tag::relative_weighted_tail_variate_means<LeftRight, VariateType, VariateTag> type; +}; + +template<typename LeftRight, typename VariateType, typename VariateTag> +struct feature_of<tag::relative_weighted_tail_variate_means<LeftRight, VariateType, VariateTag> > + : feature_of<tag::relative_tail_variate_means<LeftRight, VariateType, VariateTag> > +{ +}; + +}} // namespace boost::accumulators + +#ifdef _MSC_VER +# pragma warning(pop) +#endif + +#endif diff --git a/boost/accumulators/statistics/times2_iterator.hpp b/boost/accumulators/statistics/times2_iterator.hpp new file mode 100644 index 0000000000..211a46e74f --- /dev/null +++ b/boost/accumulators/statistics/times2_iterator.hpp @@ -0,0 +1,58 @@ +/////////////////////////////////////////////////////////////////////////////// +// times2_iterator.hpp +// +// Copyright 2006 Eric Niebler. Distributed under the Boost +// Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#ifndef BOOST_ACCUMULATORS_STATISTICS_TIMES2_ITERATOR_HPP_DE_01_01_2006 +#define BOOST_ACCUMULATORS_STATISTICS_TIMES2_ITERATOR_HPP_DE_01_01_2006 + +#include <functional> +#include <boost/range/begin.hpp> +#include <boost/range/end.hpp> +#include <boost/range/iterator_range.hpp> +#include <boost/iterator/transform_iterator.hpp> +#include <boost/iterator/counting_iterator.hpp> +#include <boost/iterator/permutation_iterator.hpp> + +namespace boost { namespace accumulators +{ + +namespace detail +{ + typedef transform_iterator< + std::binder1st<std::multiplies<std::size_t> > + , counting_iterator<std::size_t> + > times2_iterator; + + inline times2_iterator make_times2_iterator(std::size_t i) + { + return make_transform_iterator( + make_counting_iterator(i) + , std::bind1st(std::multiplies<std::size_t>(), 2) + ); + } + + + /////////////////////////////////////////////////////////////////////////////// + // lvalue_index_iterator + template<typename Base> + struct lvalue_index_iterator + : Base + { + lvalue_index_iterator(Base base) + : Base(base) + { + } + + typename Base::reference operator [](typename Base::difference_type n) const + { + return *(*this + n); + } + }; +} // namespace detail + +}} + +#endif diff --git a/boost/accumulators/statistics/variance.hpp b/boost/accumulators/statistics/variance.hpp new file mode 100644 index 0000000000..81807a2621 --- /dev/null +++ b/boost/accumulators/statistics/variance.hpp @@ -0,0 +1,236 @@ +/////////////////////////////////////////////////////////////////////////////// +// variance.hpp +// +// Copyright 2005 Daniel Egloff, Eric Niebler. Distributed under the Boost +// Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#ifndef BOOST_ACCUMULATORS_STATISTICS_VARIANCE_HPP_EAN_28_10_2005 +#define BOOST_ACCUMULATORS_STATISTICS_VARIANCE_HPP_EAN_28_10_2005 + +#include <boost/mpl/placeholders.hpp> +#include <boost/accumulators/framework/accumulator_base.hpp> +#include <boost/accumulators/framework/extractor.hpp> +#include <boost/accumulators/numeric/functional.hpp> +#include <boost/accumulators/framework/parameters/sample.hpp> +#include <boost/accumulators/framework/depends_on.hpp> +#include <boost/accumulators/statistics_fwd.hpp> +#include <boost/accumulators/statistics/count.hpp> +#include <boost/accumulators/statistics/sum.hpp> +#include <boost/accumulators/statistics/mean.hpp> +#include <boost/accumulators/statistics/moment.hpp> + +namespace boost { namespace accumulators +{ + +namespace impl +{ + //! Lazy calculation of variance. + /*! + Default sample variance implementation based on the second moment \f$ M_n^{(2)} \f$ moment<2>, mean and count. + \f[ + \sigma_n^2 = M_n^{(2)} - \mu_n^2. + \f] + where + \f[ + \mu_n = \frac{1}{n} \sum_{i = 1}^n x_i. + \f] + is the estimate of the sample mean and \f$n\f$ is the number of samples. + */ + template<typename Sample, typename MeanFeature> + struct lazy_variance_impl + : accumulator_base + { + // for boost::result_of + typedef typename numeric::functional::average<Sample, std::size_t>::result_type result_type; + + lazy_variance_impl(dont_care) {} + + template<typename Args> + result_type result(Args const &args) const + { + extractor<MeanFeature> mean; + result_type tmp = mean(args); + return accumulators::moment<2>(args) - tmp * tmp; + } + }; + + //! Iterative calculation of variance. + /*! + Iterative calculation of sample variance \f$\sigma_n^2\f$ according to the formula + \f[ + \sigma_n^2 = \frac{1}{n} \sum_{i = 1}^n (x_i - \mu_n)^2 = \frac{n-1}{n} \sigma_{n-1}^2 + \frac{1}{n-1}(x_n - \mu_n)^2. + \f] + where + \f[ + \mu_n = \frac{1}{n} \sum_{i = 1}^n x_i. + \f] + is the estimate of the sample mean and \f$n\f$ is the number of samples. + + Note that the sample variance is not defined for \f$n <= 1\f$. + + A simplification can be obtained by the approximate recursion + \f[ + \sigma_n^2 \approx \frac{n-1}{n} \sigma_{n-1}^2 + \frac{1}{n}(x_n - \mu_n)^2. + \f] + because the difference + \f[ + \left(\frac{1}{n-1} - \frac{1}{n}\right)(x_n - \mu_n)^2 = \frac{1}{n(n-1)}(x_n - \mu_n)^2. + \f] + converges to zero as \f$n \rightarrow \infty\f$. However, for small \f$ n \f$ the difference + can be non-negligible. + */ + template<typename Sample, typename MeanFeature, typename Tag> + struct variance_impl + : accumulator_base + { + // for boost::result_of + typedef typename numeric::functional::average<Sample, std::size_t>::result_type result_type; + + template<typename Args> + variance_impl(Args const &args) + : variance(numeric::average(args[sample | Sample()], numeric::one<std::size_t>::value)) + { + } + + template<typename Args> + void operator ()(Args const &args) + { + std::size_t cnt = count(args); + + if(cnt > 1) + { + extractor<MeanFeature> mean; + result_type tmp = args[parameter::keyword<Tag>::get()] - mean(args); + this->variance = + numeric::average(this->variance * (cnt - 1), cnt) + + numeric::average(tmp * tmp, cnt - 1); + } + } + + result_type result(dont_care) const + { + return this->variance; + } + + private: + result_type variance; + }; + +} // namespace impl + +/////////////////////////////////////////////////////////////////////////////// +// tag::variance +// tag::immediate_variance +// +namespace tag +{ + struct lazy_variance + : depends_on<moment<2>, mean> + { + /// INTERNAL ONLY + /// + typedef accumulators::impl::lazy_variance_impl<mpl::_1, mean> impl; + }; + + struct variance + : depends_on<count, immediate_mean> + { + /// INTERNAL ONLY + /// + typedef accumulators::impl::variance_impl<mpl::_1, mean, sample> impl; + }; +} + +/////////////////////////////////////////////////////////////////////////////// +// extract::lazy_variance +// extract::variance +// +namespace extract +{ + extractor<tag::lazy_variance> const lazy_variance = {}; + extractor<tag::variance> const variance = {}; + + BOOST_ACCUMULATORS_IGNORE_GLOBAL(lazy_variance) + BOOST_ACCUMULATORS_IGNORE_GLOBAL(variance) +} + +using extract::lazy_variance; +using extract::variance; + +// variance(lazy) -> lazy_variance +template<> +struct as_feature<tag::variance(lazy)> +{ + typedef tag::lazy_variance type; +}; + +// variance(immediate) -> variance +template<> +struct as_feature<tag::variance(immediate)> +{ + typedef tag::variance type; +}; + +// for the purposes of feature-based dependency resolution, +// immediate_variance provides the same feature as variance +template<> +struct feature_of<tag::lazy_variance> + : feature_of<tag::variance> +{ +}; + +// So that variance can be automatically substituted with +// weighted_variance when the weight parameter is non-void. +template<> +struct as_weighted_feature<tag::variance> +{ + typedef tag::weighted_variance type; +}; + +// for the purposes of feature-based dependency resolution, +// weighted_variance provides the same feature as variance +template<> +struct feature_of<tag::weighted_variance> + : feature_of<tag::variance> +{ +}; + +// So that immediate_variance can be automatically substituted with +// immediate_weighted_variance when the weight parameter is non-void. +template<> +struct as_weighted_feature<tag::lazy_variance> +{ + typedef tag::lazy_weighted_variance type; +}; + +// for the purposes of feature-based dependency resolution, +// immediate_weighted_variance provides the same feature as immediate_variance +template<> +struct feature_of<tag::lazy_weighted_variance> + : feature_of<tag::lazy_variance> +{ +}; + +//////////////////////////////////////////////////////////////////////////// +//// droppable_accumulator<variance_impl> +//// need to specialize droppable lazy variance to cache the result at the +//// point the accumulator is dropped. +///// INTERNAL ONLY +///// +//template<typename Sample, typename MeanFeature> +//struct droppable_accumulator<impl::variance_impl<Sample, MeanFeature> > +// : droppable_accumulator_base< +// with_cached_result<impl::variance_impl<Sample, MeanFeature> > +// > +//{ +// template<typename Args> +// droppable_accumulator(Args const &args) +// : droppable_accumulator::base(args) +// { +// } +//}; + +}} // namespace boost::accumulators + +#endif diff --git a/boost/accumulators/statistics/variates/covariate.hpp b/boost/accumulators/statistics/variates/covariate.hpp new file mode 100644 index 0000000000..eeb4835274 --- /dev/null +++ b/boost/accumulators/statistics/variates/covariate.hpp @@ -0,0 +1,21 @@ +/////////////////////////////////////////////////////////////////////////////// +// weight.hpp +// +// Copyright 2005 Eric Niebler. Distributed under the Boost +// Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#ifndef BOOST_ACCUMULATORS_STATISTICS_VARIATES_COVARIATE_HPP_EAN_03_11_2005 +#define BOOST_ACCUMULATORS_STATISTICS_VARIATES_COVARIATE_HPP_EAN_03_11_2005 + +#include <boost/parameter/keyword.hpp> + +namespace boost { namespace accumulators +{ + +BOOST_PARAMETER_KEYWORD(tag, covariate1) +BOOST_PARAMETER_KEYWORD(tag, covariate2) + +}} // namespace boost::accumulators + +#endif diff --git a/boost/accumulators/statistics/weighted_covariance.hpp b/boost/accumulators/statistics/weighted_covariance.hpp new file mode 100644 index 0000000000..83585b1b65 --- /dev/null +++ b/boost/accumulators/statistics/weighted_covariance.hpp @@ -0,0 +1,133 @@ +/////////////////////////////////////////////////////////////////////////////// +// weighted_covariance.hpp +// +// Copyright 2006 Daniel Egloff, Olivier Gygi. Distributed under the Boost +// Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#ifndef BOOST_ACCUMULATORS_STATISTICS_WEIGHTED_COVARIANCE_HPP_DE_01_01_2006 +#define BOOST_ACCUMULATORS_STATISTICS_WEIGHTED_COVARIANCE_HPP_DE_01_01_2006 + +#include <vector> +#include <limits> +#include <numeric> +#include <functional> +#include <complex> +#include <boost/mpl/assert.hpp> +#include <boost/mpl/bool.hpp> +#include <boost/range.hpp> +#include <boost/parameter/keyword.hpp> +#include <boost/mpl/placeholders.hpp> +#include <boost/numeric/ublas/io.hpp> +#include <boost/numeric/ublas/matrix.hpp> +#include <boost/type_traits/is_scalar.hpp> +#include <boost/type_traits/is_same.hpp> +#include <boost/accumulators/framework/accumulator_base.hpp> +#include <boost/accumulators/framework/extractor.hpp> +#include <boost/accumulators/numeric/functional.hpp> +#include <boost/accumulators/framework/parameters/sample.hpp> +#include <boost/accumulators/statistics_fwd.hpp> +#include <boost/accumulators/statistics/count.hpp> +#include <boost/accumulators/statistics/covariance.hpp> // for numeric::outer_product() and type traits +#include <boost/accumulators/statistics/weighted_mean.hpp> + +namespace boost { namespace accumulators +{ + +namespace impl +{ + /////////////////////////////////////////////////////////////////////////////// + // weighted_covariance_impl + // + /** + @brief Weighted Covariance Estimator + + An iterative Monte Carlo estimator for the weighted covariance \f$\mathrm{Cov}(X,X')\f$, where \f$X\f$ is a sample + and \f$X'\f$ a variate, is given by: + + \f[ + \hat{c}_n = \frac{\bar{w}_n-w_n}{\bar{w}_n} \hat{c}_{n-1} + \frac{w_n}{\bar{w}_n-w_n}(X_n - \hat{\mu}_n)(X_n' - \hat{\mu}_n'), + \quad n\ge2,\quad\hat{c}_1 = 0, + \f] + + \f$\hat{\mu}_n\f$ and \f$\hat{\mu}_n'\f$ being the weighted means of the samples and variates and + \f$\bar{w}_n\f$ the sum of the \f$n\f$ first weights \f$w_i\f$. + */ + template<typename Sample, typename Weight, typename VariateType, typename VariateTag> + struct weighted_covariance_impl + : accumulator_base + { + typedef typename numeric::functional::multiplies<Weight, typename numeric::functional::average<Sample, std::size_t>::result_type>::result_type weighted_sample_type; + typedef typename numeric::functional::multiplies<Weight, typename numeric::functional::average<VariateType, std::size_t>::result_type>::result_type weighted_variate_type; + // for boost::result_of + typedef typename numeric::functional::outer_product<weighted_sample_type, weighted_variate_type>::result_type result_type; + + template<typename Args> + weighted_covariance_impl(Args const &args) + : cov_( + numeric::outer_product( + numeric::average(args[sample | Sample()], (std::size_t)1) + * numeric::one<Weight>::value + , numeric::average(args[parameter::keyword<VariateTag>::get() | VariateType()], (std::size_t)1) + * numeric::one<Weight>::value + ) + ) + { + } + + template<typename Args> + void operator ()(Args const &args) + { + std::size_t cnt = count(args); + + if (cnt > 1) + { + extractor<tag::weighted_mean_of_variates<VariateType, VariateTag> > const some_weighted_mean_of_variates = {}; + + this->cov_ = this->cov_ * (sum_of_weights(args) - args[weight]) / sum_of_weights(args) + + numeric::outer_product( + some_weighted_mean_of_variates(args) - args[parameter::keyword<VariateTag>::get()] + , weighted_mean(args) - args[sample] + ) * args[weight] / (sum_of_weights(args) - args[weight]); + } + } + + result_type result(dont_care) const + { + return this->cov_; + } + + private: + result_type cov_; + }; + +} // namespace impl + +/////////////////////////////////////////////////////////////////////////////// +// tag::weighted_covariance +// +namespace tag +{ + template<typename VariateType, typename VariateTag> + struct weighted_covariance + : depends_on<count, sum_of_weights, weighted_mean, weighted_mean_of_variates<VariateType, VariateTag> > + { + typedef accumulators::impl::weighted_covariance_impl<mpl::_1, mpl::_2, VariateType, VariateTag> impl; + }; +} + +/////////////////////////////////////////////////////////////////////////////// +// extract::weighted_covariance +// +namespace extract +{ + extractor<tag::abstract_covariance> const weighted_covariance = {}; + + BOOST_ACCUMULATORS_IGNORE_GLOBAL(weighted_covariance) +} + +using extract::weighted_covariance; + +}} // namespace boost::accumulators + +#endif diff --git a/boost/accumulators/statistics/weighted_density.hpp b/boost/accumulators/statistics/weighted_density.hpp new file mode 100644 index 0000000000..68dc639377 --- /dev/null +++ b/boost/accumulators/statistics/weighted_density.hpp @@ -0,0 +1,221 @@ +/////////////////////////////////////////////////////////////////////////////// +// weighted_density.hpp +// +// Copyright 2006 Daniel Egloff, Olivier Gygi. Distributed under the Boost +// Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#ifndef BOOST_ACCUMULATORS_STATISTICS_WEIGHTED_DENSITY_HPP_DE_01_01_2006 +#define BOOST_ACCUMULATORS_STATISTICS_WEIGHTED_DENSITY_HPP_DE_01_01_2006 + +#include <vector> +#include <limits> +#include <functional> +#include <boost/range.hpp> +#include <boost/parameter/keyword.hpp> +#include <boost/mpl/placeholders.hpp> +#include <boost/accumulators/framework/accumulator_base.hpp> +#include <boost/accumulators/framework/extractor.hpp> +#include <boost/accumulators/numeric/functional.hpp> +#include <boost/accumulators/framework/parameters/sample.hpp> +#include <boost/accumulators/statistics_fwd.hpp> +#include <boost/accumulators/statistics/sum.hpp> +#include <boost/accumulators/statistics/max.hpp> +#include <boost/accumulators/statistics/min.hpp> +#include <boost/accumulators/statistics/density.hpp> // for named parameters density_cache_size and density_num_bins + +namespace boost { namespace accumulators +{ + +namespace impl +{ + /////////////////////////////////////////////////////////////////////////////// + // weighted_density_impl + // density histogram for weighted samples + /** + @brief Histogram density estimator for weighted samples + + The histogram density estimator returns a histogram of the sample distribution. The positions and sizes of the bins + are determined using a specifiable number of cached samples (cache_size). The range between the minimum and the + maximum of the cached samples is subdivided into a specifiable number of bins (num_bins) of same size. Additionally, + an under- and an overflow bin is added to capture future under- and overflow samples. Once the bins are determined, + the cached samples and all subsequent samples are added to the correct bins. At the end, a range of std::pair is + returned, where each pair contains the position of the bin (lower bound) and the sum of the weights (normalized with the + sum of all weights). + + @param density_cache_size Number of first samples used to determine min and max. + @param density_num_bins Number of bins (two additional bins collect under- and overflow samples). + */ + template<typename Sample, typename Weight> + struct weighted_density_impl + : accumulator_base + { + typedef typename numeric::functional::average<Weight, std::size_t>::result_type float_type; + typedef std::vector<std::pair<float_type, float_type> > histogram_type; + typedef std::vector<float_type> array_type; + // for boost::result_of + typedef iterator_range<typename histogram_type::iterator> result_type; + + template<typename Args> + weighted_density_impl(Args const &args) + : cache_size(args[density_cache_size]) + , cache(cache_size) + , num_bins(args[density_num_bins]) + , samples_in_bin(num_bins + 2, 0.) + , bin_positions(num_bins + 2) + , histogram( + num_bins + 2 + , std::make_pair( + numeric::average(args[sample | Sample()],(std::size_t)1) + , numeric::average(args[sample | Sample()],(std::size_t)1) + ) + ) + , is_dirty(true) + { + } + + template<typename Args> + void operator ()(Args const &args) + { + this->is_dirty = true; + + std::size_t cnt = count(args); + + // Fill up cache with cache_size first samples + if (cnt <= this->cache_size) + { + this->cache[cnt - 1] = std::make_pair(args[sample], args[weight]); + } + + // Once cache_size samples have been accumulated, create num_bins bins of same size between + // the minimum and maximum of the cached samples as well as an under- and an overflow bin. + // Store their lower bounds (bin_positions) and fill the bins with the cached samples (samples_in_bin). + if (cnt == this->cache_size) + { + float_type minimum = numeric::average((min)(args),(std::size_t)1); + float_type maximum = numeric::average((max)(args),(std::size_t)1); + float_type bin_size = numeric::average(maximum - minimum, this->num_bins); + + // determine bin positions (their lower bounds) + for (std::size_t i = 0; i < this->num_bins + 2; ++i) + { + this->bin_positions[i] = minimum + (i - 1.) * bin_size; + } + + for (typename histogram_type::const_iterator iter = this->cache.begin(); iter != this->cache.end(); ++iter) + { + if (iter->first < this->bin_positions[1]) + { + this->samples_in_bin[0] += iter->second; + } + else if (iter->first >= this->bin_positions[this->num_bins + 1]) + { + this->samples_in_bin[this->num_bins + 1] += iter->second; + } + else + { + typename array_type::iterator it = std::upper_bound( + this->bin_positions.begin() + , this->bin_positions.end() + , iter->first + ); + + std::size_t d = std::distance(this->bin_positions.begin(), it); + this->samples_in_bin[d - 1] += iter->second; + } + } + } + // Add each subsequent sample to the correct bin + else if (cnt > this->cache_size) + { + if (args[sample] < this->bin_positions[1]) + { + this->samples_in_bin[0] += args[weight]; + } + else if (args[sample] >= this->bin_positions[this->num_bins + 1]) + { + this->samples_in_bin[this->num_bins + 1] += args[weight]; + } + else + { + typename array_type::iterator it = std::upper_bound( + this->bin_positions.begin() + , this->bin_positions.end() + , args[sample] + ); + + std::size_t d = std::distance(this->bin_positions.begin(), it); + this->samples_in_bin[d - 1] += args[weight]; + } + } + } + + template<typename Args> + result_type result(Args const &args) const + { + if (this->is_dirty) + { + this->is_dirty = false; + + // creates a vector of std::pair where each pair i holds + // the values bin_positions[i] (x-axis of histogram) and + // samples_in_bin[i] / cnt (y-axis of histogram). + + for (std::size_t i = 0; i < this->num_bins + 2; ++i) + { + this->histogram[i] = std::make_pair(this->bin_positions[i], numeric::average(this->samples_in_bin[i], sum_of_weights(args))); + } + } + + // returns a range of pairs + return make_iterator_range(this->histogram); + } + + private: + std::size_t cache_size; // number of cached samples + histogram_type cache; // cache to store the first cache_size samples with their weights as std::pair + std::size_t num_bins; // number of bins + array_type samples_in_bin; // number of samples in each bin + array_type bin_positions; // lower bounds of bins + mutable histogram_type histogram; // histogram + mutable bool is_dirty; + }; + +} // namespace impl + +/////////////////////////////////////////////////////////////////////////////// +// tag::weighted_density +// +namespace tag +{ + struct weighted_density + : depends_on<count, sum_of_weights, min, max> + , density_cache_size + , density_num_bins + { + /// INTERNAL ONLY + /// + typedef accumulators::impl::weighted_density_impl<mpl::_1, mpl::_2> impl; + + #ifdef BOOST_ACCUMULATORS_DOXYGEN_INVOKED + static boost::parameter::keyword<density_cache_size> const cache_size; + static boost::parameter::keyword<density_num_bins> const num_bins; + #endif + }; +} + +/////////////////////////////////////////////////////////////////////////////// +// extract::weighted_density +// +namespace extract +{ + extractor<tag::density> const weighted_density = {}; + + BOOST_ACCUMULATORS_IGNORE_GLOBAL(weighted_density) +} + +using extract::weighted_density; + +}} // namespace boost::accumulators + +#endif diff --git a/boost/accumulators/statistics/weighted_extended_p_square.hpp b/boost/accumulators/statistics/weighted_extended_p_square.hpp new file mode 100644 index 0000000000..7d7f38c77c --- /dev/null +++ b/boost/accumulators/statistics/weighted_extended_p_square.hpp @@ -0,0 +1,290 @@ +/////////////////////////////////////////////////////////////////////////////// +// weighted_extended_p_square.hpp +// +// Copyright 2005 Daniel Egloff. Distributed under the Boost +// Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#ifndef BOOST_ACCUMULATORS_STATISTICS_WEIGHTED_EXTENDED_P_SQUARE_HPP_DE_01_01_2006 +#define BOOST_ACCUMULATORS_STATISTICS_WEIGHTED_EXTENDED_P_SQUARE_HPP_DE_01_01_2006 + +#include <vector> +#include <functional> +#include <boost/range/begin.hpp> +#include <boost/range/end.hpp> +#include <boost/range/iterator_range.hpp> +#include <boost/iterator/transform_iterator.hpp> +#include <boost/iterator/counting_iterator.hpp> +#include <boost/iterator/permutation_iterator.hpp> +#include <boost/parameter/keyword.hpp> +#include <boost/mpl/placeholders.hpp> +#include <boost/accumulators/framework/accumulator_base.hpp> +#include <boost/accumulators/framework/extractor.hpp> +#include <boost/accumulators/numeric/functional.hpp> +#include <boost/accumulators/framework/parameters/sample.hpp> +#include <boost/accumulators/framework/depends_on.hpp> +#include <boost/accumulators/statistics_fwd.hpp> +#include <boost/accumulators/statistics/count.hpp> +#include <boost/accumulators/statistics/sum.hpp> +#include <boost/accumulators/statistics/times2_iterator.hpp> +#include <boost/accumulators/statistics/extended_p_square.hpp> + +namespace boost { namespace accumulators +{ + +namespace impl +{ + /////////////////////////////////////////////////////////////////////////////// + // weighted_extended_p_square_impl + // multiple quantile estimation with weighted samples + /** + @brief Multiple quantile estimation with the extended \f$P^2\f$ algorithm for weighted samples + + This version of the extended \f$P^2\f$ algorithm extends the extended \f$P^2\f$ algorithm to + support weighted samples. The extended \f$P^2\f$ algorithm dynamically estimates several + quantiles without storing samples. Assume that \f$m\f$ quantiles + \f$\xi_{p_1}, \ldots, \xi_{p_m}\f$ are to be estimated. Instead of storing the whole sample + cumulative distribution, the algorithm maintains only \f$m+2\f$ principal markers and + \f$m+1\f$ middle markers, whose positions are updated with each sample and whose heights + are adjusted (if necessary) using a piecewise-parablic formula. The heights of the principal + markers are the current estimates of the quantiles and are returned as an iterator range. + + For further details, see + + K. E. E. Raatikainen, Simultaneous estimation of several quantiles, Simulation, Volume 49, + Number 4 (October), 1986, p. 159-164. + + The extended \f$ P^2 \f$ algorithm generalizess the \f$ P^2 \f$ algorithm of + + R. Jain and I. Chlamtac, The P^2 algorithmus for dynamic calculation of quantiles and + histograms without storing observations, Communications of the ACM, + Volume 28 (October), Number 10, 1985, p. 1076-1085. + + @param extended_p_square_probabilities A vector of quantile probabilities. + */ + template<typename Sample, typename Weight> + struct weighted_extended_p_square_impl + : accumulator_base + { + typedef typename numeric::functional::multiplies<Sample, Weight>::result_type weighted_sample; + typedef typename numeric::functional::average<weighted_sample, std::size_t>::result_type float_type; + typedef std::vector<float_type> array_type; + // for boost::result_of + typedef iterator_range< + detail::lvalue_index_iterator< + permutation_iterator< + typename array_type::const_iterator + , detail::times2_iterator + > + > + > result_type; + + template<typename Args> + weighted_extended_p_square_impl(Args const &args) + : probabilities( + boost::begin(args[extended_p_square_probabilities]) + , boost::end(args[extended_p_square_probabilities]) + ) + , heights(2 * probabilities.size() + 3) + , actual_positions(heights.size()) + , desired_positions(heights.size()) + { + } + + template<typename Args> + void operator ()(Args const &args) + { + std::size_t cnt = count(args); + std::size_t sample_cell = 1; // k + std::size_t num_quantiles = this->probabilities.size(); + + // m+2 principal markers and m+1 middle markers + std::size_t num_markers = 2 * num_quantiles + 3; + + // first accumulate num_markers samples + if(cnt <= num_markers) + { + this->heights[cnt - 1] = args[sample]; + this->actual_positions[cnt - 1] = args[weight]; + + // complete the initialization of heights (and actual_positions) by sorting + if(cnt == num_markers) + { + // TODO: we need to sort the initial samples (in heights) in ascending order and + // sort their weights (in actual_positions) the same way. The following lines do + // it, but there must be a better and more efficient way of doing this. + typename array_type::iterator it_begin, it_end, it_min; + + it_begin = this->heights.begin(); + it_end = this->heights.end(); + + std::size_t pos = 0; + + while (it_begin != it_end) + { + it_min = std::min_element(it_begin, it_end); + std::size_t d = std::distance(it_begin, it_min); + std::swap(*it_begin, *it_min); + std::swap(this->actual_positions[pos], this->actual_positions[pos + d]); + ++it_begin; + ++pos; + } + + // calculate correct initial actual positions + for (std::size_t i = 1; i < num_markers; ++i) + { + actual_positions[i] += actual_positions[i - 1]; + } + } + } + else + { + if(args[sample] < this->heights[0]) + { + this->heights[0] = args[sample]; + this->actual_positions[0] = args[weight]; + sample_cell = 1; + } + else if(args[sample] >= this->heights[num_markers - 1]) + { + this->heights[num_markers - 1] = args[sample]; + sample_cell = num_markers - 1; + } + else + { + // find cell k = sample_cell such that heights[k-1] <= sample < heights[k] + + typedef typename array_type::iterator iterator; + iterator it = std::upper_bound( + this->heights.begin() + , this->heights.end() + , args[sample] + ); + + sample_cell = std::distance(this->heights.begin(), it); + } + + // update actual position of all markers above sample_cell + for(std::size_t i = sample_cell; i < num_markers; ++i) + { + this->actual_positions[i] += args[weight]; + } + + // compute desired positions + { + this->desired_positions[0] = this->actual_positions[0]; + this->desired_positions[num_markers - 1] = sum_of_weights(args); + this->desired_positions[1] = (sum_of_weights(args) - this->actual_positions[0]) * probabilities[0] + / 2. + this->actual_positions[0]; + this->desired_positions[num_markers - 2] = (sum_of_weights(args) - this->actual_positions[0]) + * (probabilities[num_quantiles - 1] + 1.) + / 2. + this->actual_positions[0]; + + for (std::size_t i = 0; i < num_quantiles; ++i) + { + this->desired_positions[2 * i + 2] = (sum_of_weights(args) - this->actual_positions[0]) + * probabilities[i] + this->actual_positions[0]; + } + + for (std::size_t i = 1; i < num_quantiles; ++i) + { + this->desired_positions[2 * i + 1] = (sum_of_weights(args) - this->actual_positions[0]) + * (probabilities[i - 1] + probabilities[i]) + / 2. + this->actual_positions[0]; + } + } + + // adjust heights and actual_positions of markers 1 to num_markers - 2 if necessary + for (std::size_t i = 1; i <= num_markers - 2; ++i) + { + // offset to desired position + float_type d = this->desired_positions[i] - this->actual_positions[i]; + + // offset to next position + float_type dp = this->actual_positions[i + 1] - this->actual_positions[i]; + + // offset to previous position + float_type dm = this->actual_positions[i - 1] - this->actual_positions[i]; + + // height ds + float_type hp = (this->heights[i + 1] - this->heights[i]) / dp; + float_type hm = (this->heights[i - 1] - this->heights[i]) / dm; + + if((d >= 1 && dp > 1) || (d <= -1 && dm < -1)) + { + short sign_d = static_cast<short>(d / std::abs(d)); + + float_type h = this->heights[i] + sign_d / (dp - dm) * ((sign_d - dm)*hp + (dp - sign_d) * hm); + + // try adjusting heights[i] using p-squared formula + if(this->heights[i - 1] < h && h < this->heights[i + 1]) + { + this->heights[i] = h; + } + else + { + // use linear formula + if(d > 0) + { + this->heights[i] += hp; + } + if(d < 0) + { + this->heights[i] -= hm; + } + } + this->actual_positions[i] += sign_d; + } + } + } + } + + result_type result(dont_care) const + { + // for i in [1,probabilities.size()], return heights[i * 2] + detail::times2_iterator idx_begin = detail::make_times2_iterator(1); + detail::times2_iterator idx_end = detail::make_times2_iterator(this->probabilities.size() + 1); + + return result_type( + make_permutation_iterator(this->heights.begin(), idx_begin) + , make_permutation_iterator(this->heights.begin(), idx_end) + ); + } + + private: + array_type probabilities; // the quantile probabilities + array_type heights; // q_i + array_type actual_positions; // n_i + array_type desired_positions; // d_i + }; + +} // namespace impl + +/////////////////////////////////////////////////////////////////////////////// +// tag::weighted_extended_p_square +// +namespace tag +{ + struct weighted_extended_p_square + : depends_on<count, sum_of_weights> + , extended_p_square_probabilities + { + typedef accumulators::impl::weighted_extended_p_square_impl<mpl::_1, mpl::_2> impl; + }; +} + +/////////////////////////////////////////////////////////////////////////////// +// extract::weighted_extended_p_square +// +namespace extract +{ + extractor<tag::weighted_extended_p_square> const weighted_extended_p_square = {}; + + BOOST_ACCUMULATORS_IGNORE_GLOBAL(weighted_extended_p_square) +} + +using extract::weighted_extended_p_square; + +}} // namespace boost::accumulators + +#endif diff --git a/boost/accumulators/statistics/weighted_kurtosis.hpp b/boost/accumulators/statistics/weighted_kurtosis.hpp new file mode 100644 index 0000000000..d51db5cd38 --- /dev/null +++ b/boost/accumulators/statistics/weighted_kurtosis.hpp @@ -0,0 +1,105 @@ +/////////////////////////////////////////////////////////////////////////////// +// weighted_kurtosis.hpp +// +// Copyright 2006 Olivier Gygi, Daniel Egloff. Distributed under the Boost +// Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#ifndef BOOST_ACCUMULATORS_STATISTICS_WEIGHTED_KURTOSIS_HPP_EAN_28_10_2005 +#define BOOST_ACCUMULATORS_STATISTICS_WEIGHTED_KURTOSIS_HPP_EAN_28_10_2005 + +#include <limits> +#include <boost/mpl/placeholders.hpp> +#include <boost/accumulators/framework/accumulator_base.hpp> +#include <boost/accumulators/framework/extractor.hpp> +#include <boost/accumulators/framework/parameters/sample.hpp> +#include <boost/accumulators/numeric/functional.hpp> +#include <boost/accumulators/framework/depends_on.hpp> +#include <boost/accumulators/statistics_fwd.hpp> +#include <boost/accumulators/statistics/weighted_moment.hpp> +#include <boost/accumulators/statistics/weighted_mean.hpp> + +namespace boost { namespace accumulators +{ + +namespace impl +{ + /////////////////////////////////////////////////////////////////////////////// + // weighted_kurtosis_impl + /** + @brief Kurtosis estimation for weighted samples + + The kurtosis of a sample distribution is defined as the ratio of the 4th central moment and the square of the 2nd central + moment (the variance) of the samples, minus 3. The term \f$ -3 \f$ is added in order to ensure that the normal distribution + has zero kurtosis. The kurtosis can also be expressed by the simple moments: + + \f[ + \hat{g}_2 = + \frac + {\widehat{m}_n^{(4)}-4\widehat{m}_n^{(3)}\hat{\mu}_n+6\widehat{m}_n^{(2)}\hat{\mu}_n^2-3\hat{\mu}_n^4} + {\left(\widehat{m}_n^{(2)} - \hat{\mu}_n^{2}\right)^2} - 3, + \f] + + where \f$ \widehat{m}_n^{(i)} \f$ are the \f$ i \f$-th moment and \f$ \hat{\mu}_n \f$ the mean (first moment) of the + \f$ n \f$ samples. + + The kurtosis estimator for weighted samples is formally identical to the estimator for unweighted samples, except that + the weighted counterparts of all measures it depends on are to be taken. + */ + template<typename Sample, typename Weight> + struct weighted_kurtosis_impl + : accumulator_base + { + typedef typename numeric::functional::multiplies<Sample, Weight>::result_type weighted_sample; + // for boost::result_of + typedef typename numeric::functional::average<weighted_sample, weighted_sample>::result_type result_type; + + weighted_kurtosis_impl(dont_care) + { + } + + template<typename Args> + result_type result(Args const &args) const + { + return numeric::average( + accumulators::weighted_moment<4>(args) + - 4. * accumulators::weighted_moment<3>(args) * weighted_mean(args) + + 6. * accumulators::weighted_moment<2>(args) * weighted_mean(args) * weighted_mean(args) + - 3. * weighted_mean(args) * weighted_mean(args) * weighted_mean(args) * weighted_mean(args) + , ( accumulators::weighted_moment<2>(args) - weighted_mean(args) * weighted_mean(args) ) + * ( accumulators::weighted_moment<2>(args) - weighted_mean(args) * weighted_mean(args) ) + ) - 3.; + } + }; + +} // namespace impl + +/////////////////////////////////////////////////////////////////////////////// +// tag::weighted_kurtosis +// +namespace tag +{ + struct weighted_kurtosis + : depends_on<weighted_mean, weighted_moment<2>, weighted_moment<3>, weighted_moment<4> > + { + /// INTERNAL ONLY + /// + typedef accumulators::impl::weighted_kurtosis_impl<mpl::_1, mpl::_2> impl; + }; +} + +/////////////////////////////////////////////////////////////////////////////// +// extract::weighted_kurtosis +// +namespace extract +{ + extractor<tag::weighted_kurtosis> const weighted_kurtosis = {}; + + BOOST_ACCUMULATORS_IGNORE_GLOBAL(weighted_kurtosis) +} + +using extract::weighted_kurtosis; + +}} // namespace boost::accumulators + +#endif diff --git a/boost/accumulators/statistics/weighted_mean.hpp b/boost/accumulators/statistics/weighted_mean.hpp new file mode 100644 index 0000000000..c8d651353a --- /dev/null +++ b/boost/accumulators/statistics/weighted_mean.hpp @@ -0,0 +1,189 @@ +/////////////////////////////////////////////////////////////////////////////// +// weighted_mean.hpp +// +// Copyright 2006 Eric Niebler, Olivier Gygi. Distributed under the Boost +// Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#ifndef BOOST_ACCUMULATORS_STATISTICS_WEIGHTED_MEAN_HPP_EAN_03_11_2005 +#define BOOST_ACCUMULATORS_STATISTICS_WEIGHTED_MEAN_HPP_EAN_03_11_2005 + +#include <boost/mpl/assert.hpp> +#include <boost/mpl/eval_if.hpp> +#include <boost/mpl/placeholders.hpp> +#include <boost/type_traits/is_same.hpp> +#include <boost/accumulators/framework/accumulator_base.hpp> +#include <boost/accumulators/framework/extractor.hpp> +#include <boost/accumulators/numeric/functional.hpp> +#include <boost/accumulators/framework/parameters/weights.hpp> +#include <boost/accumulators/framework/depends_on.hpp> +#include <boost/accumulators/statistics_fwd.hpp> +#include <boost/accumulators/statistics/sum.hpp> +#include <boost/accumulators/statistics/mean.hpp> +#include <boost/accumulators/statistics/weighted_sum.hpp> + +namespace boost { namespace accumulators +{ + +namespace impl +{ + /////////////////////////////////////////////////////////////////////////////// + // weighted_mean_impl + // lazy, by default + template<typename Sample, typename Weight, typename Tag> + struct weighted_mean_impl + : accumulator_base + { + typedef typename numeric::functional::multiplies<Sample, Weight>::result_type weighted_sample; + // for boost::result_of + typedef typename numeric::functional::average<weighted_sample, Weight>::result_type result_type; + + weighted_mean_impl(dont_care) {} + + template<typename Args> + result_type result(Args const &args) const + { + typedef + typename mpl::if_< + is_same<Tag, tag::sample> + , tag::weighted_sum + , tag::weighted_sum_of_variates<Sample, Tag> + >::type + weighted_sum_tag; + + extractor<weighted_sum_tag> const some_weighted_sum = {}; + + return numeric::average(some_weighted_sum(args), sum_of_weights(args)); + } + }; + + /////////////////////////////////////////////////////////////////////////////// + // immediate_weighted_mean_impl + // immediate + template<typename Sample, typename Weight, typename Tag> + struct immediate_weighted_mean_impl + : accumulator_base + { + typedef typename numeric::functional::multiplies<Sample, Weight>::result_type weighted_sample; + // for boost::result_of + typedef typename numeric::functional::average<weighted_sample, Weight>::result_type result_type; + + template<typename Args> + immediate_weighted_mean_impl(Args const &args) + : mean( + numeric::average( + args[parameter::keyword<Tag>::get() | Sample()] + * numeric::one<Weight>::value + , numeric::one<Weight>::value + ) + ) + { + } + + template<typename Args> + void operator ()(Args const &args) + { + // Matthias: + // need to pass the argument pack since the weight might be an external + // accumulator set passed as a named parameter + Weight w_sum = sum_of_weights(args); + Weight w = args[weight]; + weighted_sample const &s = args[parameter::keyword<Tag>::get()] * w; + this->mean = numeric::average(this->mean * (w_sum - w) + s, w_sum); + } + + result_type result(dont_care) const + { + return this->mean; + } + + private: + result_type mean; + }; + +} // namespace impl + +/////////////////////////////////////////////////////////////////////////////// +// tag::weighted_mean +// tag::immediate_weighted_mean +// +namespace tag +{ + struct weighted_mean + : depends_on<sum_of_weights, weighted_sum> + { + /// INTERNAL ONLY + /// + typedef accumulators::impl::weighted_mean_impl<mpl::_1, mpl::_2, tag::sample> impl; + }; + struct immediate_weighted_mean + : depends_on<sum_of_weights> + { + /// INTERNAL ONLY + /// + typedef accumulators::impl::immediate_weighted_mean_impl<mpl::_1, mpl::_2, tag::sample> impl; + }; + template<typename VariateType, typename VariateTag> + struct weighted_mean_of_variates + : depends_on<sum_of_weights, weighted_sum_of_variates<VariateType, VariateTag> > + { + /// INTERNAL ONLY + /// + typedef accumulators::impl::weighted_mean_impl<VariateType, mpl::_2, VariateTag> impl; + }; + template<typename VariateType, typename VariateTag> + struct immediate_weighted_mean_of_variates + : depends_on<sum_of_weights> + { + /// INTERNAL ONLY + /// + typedef accumulators::impl::immediate_weighted_mean_impl<VariateType, mpl::_2, VariateTag> impl; + }; +} + +/////////////////////////////////////////////////////////////////////////////// +// extract::weighted_mean +// extract::weighted_mean_of_variates +// +namespace extract +{ + extractor<tag::mean> const weighted_mean = {}; + BOOST_ACCUMULATORS_DEFINE_EXTRACTOR(tag, weighted_mean_of_variates, (typename)(typename)) + + BOOST_ACCUMULATORS_IGNORE_GLOBAL(weighted_mean) +} + +using extract::weighted_mean; +using extract::weighted_mean_of_variates; + +// weighted_mean(lazy) -> weighted_mean +template<> +struct as_feature<tag::weighted_mean(lazy)> +{ + typedef tag::weighted_mean type; +}; + +// weighted_mean(immediate) -> immediate_weighted_mean +template<> +struct as_feature<tag::weighted_mean(immediate)> +{ + typedef tag::immediate_weighted_mean type; +}; + +// weighted_mean_of_variates<VariateType, VariateTag>(lazy) -> weighted_mean_of_variates<VariateType, VariateTag> +template<typename VariateType, typename VariateTag> +struct as_feature<tag::weighted_mean_of_variates<VariateType, VariateTag>(lazy)> +{ + typedef tag::weighted_mean_of_variates<VariateType, VariateTag> type; +}; + +// weighted_mean_of_variates<VariateType, VariateTag>(immediate) -> immediate_weighted_mean_of_variates<VariateType, VariateTag> +template<typename VariateType, typename VariateTag> +struct as_feature<tag::weighted_mean_of_variates<VariateType, VariateTag>(immediate)> +{ + typedef tag::immediate_weighted_mean_of_variates<VariateType, VariateTag> type; +}; + +}} // namespace boost::accumulators + +#endif diff --git a/boost/accumulators/statistics/weighted_median.hpp b/boost/accumulators/statistics/weighted_median.hpp new file mode 100644 index 0000000000..46880c88d1 --- /dev/null +++ b/boost/accumulators/statistics/weighted_median.hpp @@ -0,0 +1,237 @@ +/////////////////////////////////////////////////////////////////////////////// +// weighted_median.hpp +// +// Copyright 2006 Eric Niebler, Olivier Gygi. Distributed under the Boost +// Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#ifndef BOOST_ACCUMULATORS_STATISTICS_WEIGHTED_MEDIAN_HPP_EAN_28_10_2005 +#define BOOST_ACCUMULATORS_STATISTICS_WEIGHTED_MEDIAN_HPP_EAN_28_10_2005 + +#include <boost/mpl/placeholders.hpp> +#include <boost/range/iterator_range.hpp> +#include <boost/accumulators/framework/accumulator_base.hpp> +#include <boost/accumulators/framework/extractor.hpp> +#include <boost/accumulators/numeric/functional.hpp> +#include <boost/accumulators/framework/parameters/sample.hpp> +#include <boost/accumulators/framework/depends_on.hpp> +#include <boost/accumulators/statistics_fwd.hpp> +#include <boost/accumulators/statistics/count.hpp> +#include <boost/accumulators/statistics/median.hpp> +#include <boost/accumulators/statistics/weighted_p_square_quantile.hpp> +#include <boost/accumulators/statistics/weighted_density.hpp> +#include <boost/accumulators/statistics/weighted_p_square_cumulative_distribution.hpp> + +namespace boost { namespace accumulators +{ + +namespace impl +{ + /////////////////////////////////////////////////////////////////////////////// + // weighted_median_impl + // + /** + @brief Median estimation for weighted samples based on the \f$P^2\f$ quantile estimator + + The \f$P^2\f$ algorithm for weighted samples is invoked with a quantile probability of 0.5. + */ + template<typename Sample> + struct weighted_median_impl + : accumulator_base + { + // for boost::result_of + typedef typename numeric::functional::average<Sample, std::size_t>::result_type result_type; + + weighted_median_impl(dont_care) {} + + template<typename Args> + result_type result(Args const &args) const + { + return weighted_p_square_quantile_for_median(args); + } + }; + + /////////////////////////////////////////////////////////////////////////////// + // with_density_weighted_median_impl + // + /** + @brief Median estimation for weighted samples based on the density estimator + + The algorithm determines the bin in which the \f$0.5*cnt\f$-th sample lies, \f$cnt\f$ being + the total number of samples. It returns the approximate horizontal position of this sample, + based on a linear interpolation inside the bin. + */ + template<typename Sample> + struct with_density_weighted_median_impl + : accumulator_base + { + typedef typename numeric::functional::average<Sample, std::size_t>::result_type float_type; + typedef std::vector<std::pair<float_type, float_type> > histogram_type; + typedef iterator_range<typename histogram_type::iterator> range_type; + // for boost::result_of + typedef float_type result_type; + + template<typename Args> + with_density_weighted_median_impl(Args const &args) + : sum(numeric::average(args[sample | Sample()], (std::size_t)1)) + , is_dirty(true) + { + } + + void operator ()(dont_care) + { + this->is_dirty = true; + } + + template<typename Args> + result_type result(Args const &args) const + { + if (this->is_dirty) + { + this->is_dirty = false; + + std::size_t cnt = count(args); + range_type histogram = weighted_density(args); + typename range_type::iterator it = histogram.begin(); + while (this->sum < 0.5 * cnt) + { + this->sum += it->second * cnt; + ++it; + } + --it; + float_type over = numeric::average(this->sum - 0.5 * cnt, it->second * cnt); + this->median = it->first * over + (it + 1)->first * ( 1. - over ); + } + + return this->median; + } + + private: + mutable float_type sum; + mutable bool is_dirty; + mutable float_type median; + }; + + /////////////////////////////////////////////////////////////////////////////// + // with_p_square_cumulative_distribution_weighted_median_impl + // + /** + @brief Median estimation for weighted samples based on the \f$P^2\f$ cumulative distribution estimator + + The algorithm determines the first (leftmost) bin with a height exceeding 0.5. It + returns the approximate horizontal position of where the cumulative distribution + equals 0.5, based on a linear interpolation inside the bin. + */ + template<typename Sample, typename Weight> + struct with_p_square_cumulative_distribution_weighted_median_impl + : accumulator_base + { + typedef typename numeric::functional::multiplies<Sample, Weight>::result_type weighted_sample; + typedef typename numeric::functional::average<weighted_sample, std::size_t>::result_type float_type; + typedef std::vector<std::pair<float_type, float_type> > histogram_type; + typedef iterator_range<typename histogram_type::iterator> range_type; + // for boost::result_of + typedef float_type result_type; + + with_p_square_cumulative_distribution_weighted_median_impl(dont_care) + : is_dirty(true) + { + } + + void operator ()(dont_care) + { + this->is_dirty = true; + } + + template<typename Args> + result_type result(Args const &args) const + { + if (this->is_dirty) + { + this->is_dirty = false; + + range_type histogram = weighted_p_square_cumulative_distribution(args); + typename range_type::iterator it = histogram.begin(); + while (it->second < 0.5) + { + ++it; + } + float_type over = numeric::average(it->second - 0.5, it->second - (it - 1)->second); + this->median = it->first * over + (it + 1)->first * ( 1. - over ); + } + + return this->median; + } + private: + mutable bool is_dirty; + mutable float_type median; + }; + +} // namespace impl + +/////////////////////////////////////////////////////////////////////////////// +// tag::weighted_median +// tag::with_density_weighted_median +// tag::with_p_square_cumulative_distribution_weighted_median +// +namespace tag +{ + struct weighted_median + : depends_on<weighted_p_square_quantile_for_median> + { + /// INTERNAL ONLY + /// + typedef accumulators::impl::weighted_median_impl<mpl::_1> impl; + }; + struct with_density_weighted_median + : depends_on<count, weighted_density> + { + /// INTERNAL ONLY + /// + typedef accumulators::impl::with_density_weighted_median_impl<mpl::_1> impl; + }; + struct with_p_square_cumulative_distribution_weighted_median + : depends_on<weighted_p_square_cumulative_distribution> + { + /// INTERNAL ONLY + /// + typedef accumulators::impl::with_p_square_cumulative_distribution_weighted_median_impl<mpl::_1, mpl::_2> impl; + }; + +} + +/////////////////////////////////////////////////////////////////////////////// +// extract::weighted_median +// +namespace extract +{ + extractor<tag::median> const weighted_median = {}; + + BOOST_ACCUMULATORS_IGNORE_GLOBAL(weighted_median) +} + +using extract::weighted_median; +// weighted_median(with_p_square_quantile) -> weighted_median +template<> +struct as_feature<tag::weighted_median(with_p_square_quantile)> +{ + typedef tag::weighted_median type; +}; + +// weighted_median(with_density) -> with_density_weighted_median +template<> +struct as_feature<tag::weighted_median(with_density)> +{ + typedef tag::with_density_weighted_median type; +}; + +// weighted_median(with_p_square_cumulative_distribution) -> with_p_square_cumulative_distribution_weighted_median +template<> +struct as_feature<tag::weighted_median(with_p_square_cumulative_distribution)> +{ + typedef tag::with_p_square_cumulative_distribution_weighted_median type; +}; + +}} // namespace boost::accumulators + +#endif diff --git a/boost/accumulators/statistics/weighted_moment.hpp b/boost/accumulators/statistics/weighted_moment.hpp new file mode 100644 index 0000000000..f49c362d95 --- /dev/null +++ b/boost/accumulators/statistics/weighted_moment.hpp @@ -0,0 +1,96 @@ +/////////////////////////////////////////////////////////////////////////////// +// weighted_moment.hpp +// +// Copyright 2006, Eric Niebler, Olivier Gygi. Distributed under the Boost +// Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#ifndef BOOST_ACCUMULATORS_STATISTICS_WEIGHTED_MOMENT_HPP_EAN_15_11_2005 +#define BOOST_ACCUMULATORS_STATISTICS_WEIGHTED_MOMENT_HPP_EAN_15_11_2005 + +#include <boost/config/no_tr1/cmath.hpp> +#include <boost/mpl/int.hpp> +#include <boost/mpl/assert.hpp> +#include <boost/mpl/placeholders.hpp> +#include <boost/preprocessor/arithmetic/inc.hpp> +#include <boost/preprocessor/repetition/repeat_from_to.hpp> +#include <boost/preprocessor/repetition/enum_trailing_params.hpp> +#include <boost/preprocessor/repetition/enum_trailing_binary_params.hpp> +#include <boost/accumulators/framework/accumulator_base.hpp> +#include <boost/accumulators/framework/extractor.hpp> +#include <boost/accumulators/numeric/functional.hpp> +#include <boost/accumulators/framework/parameters/sample.hpp> +#include <boost/accumulators/framework/depends_on.hpp> +#include <boost/accumulators/statistics_fwd.hpp> +#include <boost/accumulators/statistics/count.hpp> +#include <boost/accumulators/statistics/moment.hpp> // for pow() +#include <boost/accumulators/statistics/sum.hpp> + +namespace boost { namespace accumulators +{ + +namespace impl +{ + /////////////////////////////////////////////////////////////////////////////// + // weighted_moment_impl + template<typename N, typename Sample, typename Weight> + struct weighted_moment_impl + : accumulator_base // TODO: also depends_on sum of powers + { + BOOST_MPL_ASSERT_RELATION(N::value, >, 0); + typedef typename numeric::functional::multiplies<Sample, Weight>::result_type weighted_sample; + // for boost::result_of + typedef typename numeric::functional::average<weighted_sample, Weight>::result_type result_type; + + template<typename Args> + weighted_moment_impl(Args const &args) + : sum(args[sample | Sample()] * numeric::one<Weight>::value) + { + } + + template<typename Args> + void operator ()(Args const &args) + { + this->sum += args[weight] * numeric::pow(args[sample], N()); + } + + template<typename Args> + result_type result(Args const &args) const + { + return numeric::average(this->sum, sum_of_weights(args)); + } + + private: + weighted_sample sum; + }; + +} // namespace impl + +/////////////////////////////////////////////////////////////////////////////// +// tag::weighted_moment +// +namespace tag +{ + template<int N> + struct weighted_moment + : depends_on<count, sum_of_weights> + { + /// INTERNAL ONLY + /// + typedef accumulators::impl::weighted_moment_impl<mpl::int_<N>, mpl::_1, mpl::_2> impl; + }; +} + +/////////////////////////////////////////////////////////////////////////////// +// extract::weighted_moment +// +namespace extract +{ + BOOST_ACCUMULATORS_DEFINE_EXTRACTOR(tag, weighted_moment, (int)) +} + +using extract::weighted_moment; + +}} // namespace boost::accumulators + +#endif diff --git a/boost/accumulators/statistics/weighted_p_square_cumulative_distribution.hpp b/boost/accumulators/statistics/weighted_p_square_cumulative_distribution.hpp new file mode 100644 index 0000000000..290f090fe5 --- /dev/null +++ b/boost/accumulators/statistics/weighted_p_square_cumulative_distribution.hpp @@ -0,0 +1,262 @@ +/////////////////////////////////////////////////////////////////////////////// +// weighted_p_square_cumulative_distribution.hpp +// +// Copyright 2006 Daniel Egloff, Olivier Gygi. Distributed under the Boost +// Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#ifndef BOOST_ACCUMULATORS_STATISTICS_WEIGHTED_P_SQUARE_CUMULATIVE_DISTRIBUTION_HPP_DE_01_01_2006 +#define BOOST_ACCUMULATORS_STATISTICS_WEIGHTED_P_SQUARE_CUMULATIVE_DISTRIBUTION_HPP_DE_01_01_2006 + +#include <vector> +#include <functional> +#include <boost/parameter/keyword.hpp> +#include <boost/mpl/placeholders.hpp> +#include <boost/range.hpp> +#include <boost/accumulators/framework/accumulator_base.hpp> +#include <boost/accumulators/framework/extractor.hpp> +#include <boost/accumulators/numeric/functional.hpp> +#include <boost/accumulators/framework/parameters/sample.hpp> +#include <boost/accumulators/statistics_fwd.hpp> +#include <boost/accumulators/statistics/count.hpp> +#include <boost/accumulators/statistics/sum.hpp> +#include <boost/accumulators/statistics/p_square_cumulative_distribution.hpp> // for named parameter p_square_cumulative_distribution_num_cells + +namespace boost { namespace accumulators +{ + +namespace impl +{ + /////////////////////////////////////////////////////////////////////////////// + // weighted_p_square_cumulative_distribution_impl + // cumulative distribution calculation (as histogram) + /** + @brief Histogram calculation of the cumulative distribution with the \f$P^2\f$ algorithm for weighted samples + + A histogram of the sample cumulative distribution is computed dynamically without storing samples + based on the \f$ P^2 \f$ algorithm for weighted samples. The returned histogram has a specifiable + amount (num_cells) equiprobable (and not equal-sized) cells. + + Note that applying importance sampling results in regions to be more and other regions to be less + accurately estimated than without importance sampling, i.e., with unweighted samples. + + For further details, see + + R. Jain and I. Chlamtac, The P^2 algorithmus for dynamic calculation of quantiles and + histograms without storing observations, Communications of the ACM, + Volume 28 (October), Number 10, 1985, p. 1076-1085. + + @param p_square_cumulative_distribution_num_cells + */ + template<typename Sample, typename Weight> + struct weighted_p_square_cumulative_distribution_impl + : accumulator_base + { + typedef typename numeric::functional::multiplies<Sample, Weight>::result_type weighted_sample; + typedef typename numeric::functional::average<weighted_sample, std::size_t>::result_type float_type; + typedef std::vector<std::pair<float_type, float_type> > histogram_type; + typedef std::vector<float_type> array_type; + // for boost::result_of + typedef iterator_range<typename histogram_type::iterator> result_type; + + template<typename Args> + weighted_p_square_cumulative_distribution_impl(Args const &args) + : num_cells(args[p_square_cumulative_distribution_num_cells]) + , heights(num_cells + 1) + , actual_positions(num_cells + 1) + , desired_positions(num_cells + 1) + , histogram(num_cells + 1) + , is_dirty(true) + { + } + + template<typename Args> + void operator ()(Args const &args) + { + this->is_dirty = true; + + std::size_t cnt = count(args); + std::size_t sample_cell = 1; // k + std::size_t b = this->num_cells; + + // accumulate num_cells + 1 first samples + if (cnt <= b + 1) + { + this->heights[cnt - 1] = args[sample]; + this->actual_positions[cnt - 1] = args[weight]; + + // complete the initialization of heights by sorting + if (cnt == b + 1) + { + //std::sort(this->heights.begin(), this->heights.end()); + + // TODO: we need to sort the initial samples (in heights) in ascending order and + // sort their weights (in actual_positions) the same way. The following lines do + // it, but there must be a better and more efficient way of doing this. + typename array_type::iterator it_begin, it_end, it_min; + + it_begin = this->heights.begin(); + it_end = this->heights.end(); + + std::size_t pos = 0; + + while (it_begin != it_end) + { + it_min = std::min_element(it_begin, it_end); + std::size_t d = std::distance(it_begin, it_min); + std::swap(*it_begin, *it_min); + std::swap(this->actual_positions[pos], this->actual_positions[pos + d]); + ++it_begin; + ++pos; + } + + // calculate correct initial actual positions + for (std::size_t i = 1; i < b; ++i) + { + this->actual_positions[i] += this->actual_positions[i - 1]; + } + } + } + else + { + // find cell k such that heights[k-1] <= args[sample] < heights[k] and adjust extreme values + if (args[sample] < this->heights[0]) + { + this->heights[0] = args[sample]; + this->actual_positions[0] = args[weight]; + sample_cell = 1; + } + else if (this->heights[b] <= args[sample]) + { + this->heights[b] = args[sample]; + sample_cell = b; + } + else + { + typename array_type::iterator it; + it = std::upper_bound( + this->heights.begin() + , this->heights.end() + , args[sample] + ); + + sample_cell = std::distance(this->heights.begin(), it); + } + + // increment positions of markers above sample_cell + for (std::size_t i = sample_cell; i < b + 1; ++i) + { + this->actual_positions[i] += args[weight]; + } + + // determine desired marker positions + for (std::size_t i = 1; i < b + 1; ++i) + { + this->desired_positions[i] = this->actual_positions[0] + + numeric::average((i-1) * (sum_of_weights(args) - this->actual_positions[0]), b); + } + + // adjust heights of markers 2 to num_cells if necessary + for (std::size_t i = 1; i < b; ++i) + { + // offset to desire position + float_type d = this->desired_positions[i] - this->actual_positions[i]; + + // offset to next position + float_type dp = this->actual_positions[i + 1] - this->actual_positions[i]; + + // offset to previous position + float_type dm = this->actual_positions[i - 1] - this->actual_positions[i]; + + // height ds + float_type hp = (this->heights[i + 1] - this->heights[i]) / dp; + float_type hm = (this->heights[i - 1] - this->heights[i]) / dm; + + if ( ( d >= 1. && dp > 1. ) || ( d <= -1. && dm < -1. ) ) + { + short sign_d = static_cast<short>(d / std::abs(d)); + + // try adjusting heights[i] using p-squared formula + float_type h = this->heights[i] + sign_d / (dp - dm) * ( (sign_d - dm) * hp + (dp - sign_d) * hm ); + + if ( this->heights[i - 1] < h && h < this->heights[i + 1] ) + { + this->heights[i] = h; + } + else + { + // use linear formula + if (d>0) + { + this->heights[i] += hp; + } + if (d<0) + { + this->heights[i] -= hm; + } + } + this->actual_positions[i] += sign_d; + } + } + } + } + + template<typename Args> + result_type result(Args const &args) const + { + if (this->is_dirty) + { + this->is_dirty = false; + + // creates a vector of std::pair where each pair i holds + // the values heights[i] (x-axis of histogram) and + // actual_positions[i] / sum_of_weights (y-axis of histogram) + + for (std::size_t i = 0; i < this->histogram.size(); ++i) + { + this->histogram[i] = std::make_pair(this->heights[i], numeric::average(this->actual_positions[i], sum_of_weights(args))); + } + } + + return make_iterator_range(this->histogram); + } + + private: + std::size_t num_cells; // number of cells b + array_type heights; // q_i + array_type actual_positions; // n_i + array_type desired_positions; // n'_i + mutable histogram_type histogram; // histogram + mutable bool is_dirty; + }; + +} // namespace detail + +/////////////////////////////////////////////////////////////////////////////// +// tag::weighted_p_square_cumulative_distribution +// +namespace tag +{ + struct weighted_p_square_cumulative_distribution + : depends_on<count, sum_of_weights> + , p_square_cumulative_distribution_num_cells + { + typedef accumulators::impl::weighted_p_square_cumulative_distribution_impl<mpl::_1, mpl::_2> impl; + }; +} + +/////////////////////////////////////////////////////////////////////////////// +// extract::weighted_p_square_cumulative_distribution +// +namespace extract +{ + extractor<tag::weighted_p_square_cumulative_distribution> const weighted_p_square_cumulative_distribution = {}; + + BOOST_ACCUMULATORS_IGNORE_GLOBAL(weighted_p_square_cumulative_distribution) +} + +using extract::weighted_p_square_cumulative_distribution; + +}} // namespace boost::accumulators + +#endif diff --git a/boost/accumulators/statistics/weighted_p_square_quantile.hpp b/boost/accumulators/statistics/weighted_p_square_quantile.hpp new file mode 100644 index 0000000000..5dc84f4764 --- /dev/null +++ b/boost/accumulators/statistics/weighted_p_square_quantile.hpp @@ -0,0 +1,255 @@ +/////////////////////////////////////////////////////////////////////////////// +// weighted_p_square_quantile.hpp +// +// Copyright 2005 Daniel Egloff. Distributed under the Boost +// Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#ifndef BOOST_ACCUMULATORS_STATISTICS_WEIGHTED_P_SQUARE_QUANTILE_HPP_DE_01_01_2006 +#define BOOST_ACCUMULATORS_STATISTICS_WEIGHTED_P_SQUARE_QUANTILE_HPP_DE_01_01_2006 + +#include <cmath> +#include <functional> +#include <boost/array.hpp> +#include <boost/parameter/keyword.hpp> +#include <boost/mpl/placeholders.hpp> +#include <boost/type_traits/is_same.hpp> +#include <boost/accumulators/framework/accumulator_base.hpp> +#include <boost/accumulators/framework/extractor.hpp> +#include <boost/accumulators/numeric/functional.hpp> +#include <boost/accumulators/framework/parameters/sample.hpp> +#include <boost/accumulators/statistics_fwd.hpp> +#include <boost/accumulators/statistics/count.hpp> +#include <boost/accumulators/statistics/sum.hpp> +#include <boost/accumulators/statistics/parameters/quantile_probability.hpp> + +namespace boost { namespace accumulators +{ + +namespace impl { + /////////////////////////////////////////////////////////////////////////////// + // weighted_p_square_quantile_impl + // single quantile estimation with weighted samples + /** + @brief Single quantile estimation with the \f$P^2\f$ algorithm for weighted samples + + This version of the \f$P^2\f$ algorithm extends the \f$P^2\f$ algorithm to support weighted samples. + The \f$P^2\f$ algorithm estimates a quantile dynamically without storing samples. Instead of + storing the whole sample cumulative distribution, only five points (markers) are stored. The heights + of these markers are the minimum and the maximum of the samples and the current estimates of the + \f$(p/2)\f$-, \f$p\f$ - and \f$(1+p)/2\f$ -quantiles. Their positions are equal to the number + of samples that are smaller or equal to the markers. Each time a new sample is added, the + positions of the markers are updated and if necessary their heights are adjusted using a piecewise- + parabolic formula. + + For further details, see + + R. Jain and I. Chlamtac, The P^2 algorithmus for dynamic calculation of quantiles and + histograms without storing observations, Communications of the ACM, + Volume 28 (October), Number 10, 1985, p. 1076-1085. + + @param quantile_probability + */ + template<typename Sample, typename Weight, typename Impl> + struct weighted_p_square_quantile_impl + : accumulator_base + { + typedef typename numeric::functional::multiplies<Sample, Weight>::result_type weighted_sample; + typedef typename numeric::functional::average<weighted_sample, std::size_t>::result_type float_type; + typedef array<float_type, 5> array_type; + // for boost::result_of + typedef float_type result_type; + + template<typename Args> + weighted_p_square_quantile_impl(Args const &args) + : p(is_same<Impl, for_median>::value ? 0.5 : args[quantile_probability | 0.5]) + , heights() + , actual_positions() + , desired_positions() + { + } + + template<typename Args> + void operator ()(Args const &args) + { + std::size_t cnt = count(args); + + // accumulate 5 first samples + if (cnt <= 5) + { + this->heights[cnt - 1] = args[sample]; + + // In this initialization phase, actual_positions stores the weights of the + // inital samples that are needed at the end of the initialization phase to + // compute the correct initial positions of the markers. + this->actual_positions[cnt - 1] = args[weight]; + + // complete the initialization of heights and actual_positions by sorting + if (cnt == 5) + { + // TODO: we need to sort the initial samples (in heights) in ascending order and + // sort their weights (in actual_positions) the same way. The following lines do + // it, but there must be a better and more efficient way of doing this. + typename array_type::iterator it_begin, it_end, it_min; + + it_begin = this->heights.begin(); + it_end = this->heights.end(); + + std::size_t pos = 0; + + while (it_begin != it_end) + { + it_min = std::min_element(it_begin, it_end); + std::size_t d = std::distance(it_begin, it_min); + std::swap(*it_begin, *it_min); + std::swap(this->actual_positions[pos], this->actual_positions[pos + d]); + ++it_begin; + ++pos; + } + + // calculate correct initial actual positions + for (std::size_t i = 1; i < 5; ++i) + { + this->actual_positions[i] += this->actual_positions[i - 1]; + } + } + } + else + { + std::size_t sample_cell = 1; // k + + // find cell k such that heights[k-1] <= args[sample] < heights[k] and adjust extreme values + if (args[sample] < this->heights[0]) + { + this->heights[0] = args[sample]; + this->actual_positions[0] = args[weight]; + sample_cell = 1; + } + else if (this->heights[4] <= args[sample]) + { + this->heights[4] = args[sample]; + sample_cell = 4; + } + else + { + typedef typename array_type::iterator iterator; + iterator it = std::upper_bound( + this->heights.begin() + , this->heights.end() + , args[sample] + ); + + sample_cell = std::distance(this->heights.begin(), it); + } + + // increment positions of markers above sample_cell + for (std::size_t i = sample_cell; i < 5; ++i) + { + this->actual_positions[i] += args[weight]; + } + + // update desired positions for all markers + this->desired_positions[0] = this->actual_positions[0]; + this->desired_positions[1] = (sum_of_weights(args) - this->actual_positions[0]) + * this->p/2. + this->actual_positions[0]; + this->desired_positions[2] = (sum_of_weights(args) - this->actual_positions[0]) + * this->p + this->actual_positions[0]; + this->desired_positions[3] = (sum_of_weights(args) - this->actual_positions[0]) + * (1. + this->p)/2. + this->actual_positions[0]; + this->desired_positions[4] = sum_of_weights(args); + + // adjust height and actual positions of markers 1 to 3 if necessary + for (std::size_t i = 1; i <= 3; ++i) + { + // offset to desired positions + float_type d = this->desired_positions[i] - this->actual_positions[i]; + + // offset to next position + float_type dp = this->actual_positions[i + 1] - this->actual_positions[i]; + + // offset to previous position + float_type dm = this->actual_positions[i - 1] - this->actual_positions[i]; + + // height ds + float_type hp = (this->heights[i + 1] - this->heights[i]) / dp; + float_type hm = (this->heights[i - 1] - this->heights[i]) / dm; + + if ( ( d >= 1. && dp > 1. ) || ( d <= -1. && dm < -1. ) ) + { + short sign_d = static_cast<short>(d / std::abs(d)); + + // try adjusting heights[i] using p-squared formula + float_type h = this->heights[i] + sign_d / (dp - dm) * ( (sign_d - dm) * hp + (dp - sign_d) * hm ); + + if ( this->heights[i - 1] < h && h < this->heights[i + 1] ) + { + this->heights[i] = h; + } + else + { + // use linear formula + if (d>0) + { + this->heights[i] += hp; + } + if (d<0) + { + this->heights[i] -= hm; + } + } + this->actual_positions[i] += sign_d; + } + } + } + } + + result_type result(dont_care) const + { + return this->heights[2]; + } + + private: + float_type p; // the quantile probability p + array_type heights; // q_i + array_type actual_positions; // n_i + array_type desired_positions; // n'_i + }; + +} // namespace impl + +/////////////////////////////////////////////////////////////////////////////// +// tag::weighted_p_square_quantile +// +namespace tag +{ + struct weighted_p_square_quantile + : depends_on<count, sum_of_weights> + { + typedef accumulators::impl::weighted_p_square_quantile_impl<mpl::_1, mpl::_2, regular> impl; + }; + struct weighted_p_square_quantile_for_median + : depends_on<count, sum_of_weights> + { + typedef accumulators::impl::weighted_p_square_quantile_impl<mpl::_1, mpl::_2, for_median> impl; + }; +} + +/////////////////////////////////////////////////////////////////////////////// +// extract::weighted_p_square_quantile +// extract::weighted_p_square_quantile_for_median +// +namespace extract +{ + extractor<tag::weighted_p_square_quantile> const weighted_p_square_quantile = {}; + extractor<tag::weighted_p_square_quantile_for_median> const weighted_p_square_quantile_for_median = {}; + + BOOST_ACCUMULATORS_IGNORE_GLOBAL(weighted_p_square_quantile) + BOOST_ACCUMULATORS_IGNORE_GLOBAL(weighted_p_square_quantile_for_median) +} + +using extract::weighted_p_square_quantile; +using extract::weighted_p_square_quantile_for_median; + +}} // namespace boost::accumulators + +#endif diff --git a/boost/accumulators/statistics/weighted_peaks_over_threshold.hpp b/boost/accumulators/statistics/weighted_peaks_over_threshold.hpp new file mode 100644 index 0000000000..6d3017b3ec --- /dev/null +++ b/boost/accumulators/statistics/weighted_peaks_over_threshold.hpp @@ -0,0 +1,288 @@ +/////////////////////////////////////////////////////////////////////////////// +// weighted_peaks_over_threshold.hpp +// +// Copyright 2006 Daniel Egloff, Olivier Gygi. Distributed under the Boost +// Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#ifndef BOOST_ACCUMULATORS_STATISTICS_WEIGHTED_PEAKS_OVER_THRESHOLD_HPP_DE_01_01_2006 +#define BOOST_ACCUMULATORS_STATISTICS_WEIGHTED_PEAKS_OVER_THRESHOLD_HPP_DE_01_01_2006 + +#include <vector> +#include <limits> +#include <numeric> +#include <functional> +#include <boost/range.hpp> +#include <boost/mpl/if.hpp> +#include <boost/mpl/placeholders.hpp> +#include <boost/parameter/keyword.hpp> +#include <boost/tuple/tuple.hpp> +#include <boost/accumulators/numeric/functional.hpp> +#include <boost/accumulators/framework/accumulator_base.hpp> +#include <boost/accumulators/framework/extractor.hpp> +#include <boost/accumulators/framework/parameters/sample.hpp> +#include <boost/accumulators/framework/depends_on.hpp> +#include <boost/accumulators/statistics_fwd.hpp> +#include <boost/accumulators/statistics/parameters/quantile_probability.hpp> +#include <boost/accumulators/statistics/peaks_over_threshold.hpp> // for named parameters pot_threshold_value and pot_threshold_probability +#include <boost/accumulators/statistics/sum.hpp> +#include <boost/accumulators/statistics/tail_variate.hpp> + +#ifdef _MSC_VER +# pragma warning(push) +# pragma warning(disable: 4127) // conditional expression is constant +#endif + +namespace boost { namespace accumulators +{ + +namespace impl +{ + + /////////////////////////////////////////////////////////////////////////////// + // weighted_peaks_over_threshold_impl + // works with an explicit threshold value and does not depend on order statistics of weighted samples + /** + @brief Weighted Peaks over Threshold Method for Weighted Quantile and Weighted Tail Mean Estimation + + @sa peaks_over_threshold_impl + + @param quantile_probability + @param pot_threshold_value + */ + template<typename Sample, typename Weight, typename LeftRight> + struct weighted_peaks_over_threshold_impl + : accumulator_base + { + typedef typename numeric::functional::multiplies<Weight, Sample>::result_type weighted_sample; + typedef typename numeric::functional::average<weighted_sample, std::size_t>::result_type float_type; + // for boost::result_of + typedef boost::tuple<float_type, float_type, float_type> result_type; + + template<typename Args> + weighted_peaks_over_threshold_impl(Args const &args) + : sign_((is_same<LeftRight, left>::value) ? -1 : 1) + , mu_(sign_ * numeric::average(args[sample | Sample()], (std::size_t)1)) + , sigma2_(numeric::average(args[sample | Sample()], (std::size_t)1)) + , w_sum_(numeric::average(args[weight | Weight()], (std::size_t)1)) + , threshold_(sign_ * args[pot_threshold_value]) + , fit_parameters_(boost::make_tuple(0., 0., 0.)) + , is_dirty_(true) + { + } + + template<typename Args> + void operator ()(Args const &args) + { + this->is_dirty_ = true; + + if (this->sign_ * args[sample] > this->threshold_) + { + this->mu_ += args[weight] * args[sample]; + this->sigma2_ += args[weight] * args[sample] * args[sample]; + this->w_sum_ += args[weight]; + } + } + + template<typename Args> + result_type result(Args const &args) const + { + if (this->is_dirty_) + { + this->is_dirty_ = false; + + this->mu_ = this->sign_ * numeric::average(this->mu_, this->w_sum_); + this->sigma2_ = numeric::average(this->sigma2_, this->w_sum_); + this->sigma2_ -= this->mu_ * this->mu_; + + float_type threshold_probability = numeric::average(sum_of_weights(args) - this->w_sum_, sum_of_weights(args)); + + float_type tmp = numeric::average(( this->mu_ - this->threshold_ )*( this->mu_ - this->threshold_ ), this->sigma2_); + float_type xi_hat = 0.5 * ( 1. - tmp ); + float_type beta_hat = 0.5 * ( this->mu_ - this->threshold_ ) * ( 1. + tmp ); + float_type beta_bar = beta_hat * std::pow(1. - threshold_probability, xi_hat); + float_type u_bar = this->threshold_ - beta_bar * ( std::pow(1. - threshold_probability, -xi_hat) - 1.)/xi_hat; + this->fit_parameters_ = boost::make_tuple(u_bar, beta_bar, xi_hat); + } + + return this->fit_parameters_; + } + + private: + short sign_; // for left tail fitting, mirror the extreme values + mutable float_type mu_; // mean of samples above threshold + mutable float_type sigma2_; // variance of samples above threshold + mutable float_type w_sum_; // sum of weights of samples above threshold + float_type threshold_; + mutable result_type fit_parameters_; // boost::tuple that stores fit parameters + mutable bool is_dirty_; + }; + + /////////////////////////////////////////////////////////////////////////////// + // weighted_peaks_over_threshold_prob_impl + // determines threshold from a given threshold probability using order statistics + /** + @brief Peaks over Threshold Method for Quantile and Tail Mean Estimation + + @sa weighted_peaks_over_threshold_impl + + @param quantile_probability + @param pot_threshold_probability + */ + template<typename Sample, typename Weight, typename LeftRight> + struct weighted_peaks_over_threshold_prob_impl + : accumulator_base + { + typedef typename numeric::functional::multiplies<Weight, Sample>::result_type weighted_sample; + typedef typename numeric::functional::average<weighted_sample, std::size_t>::result_type float_type; + // for boost::result_of + typedef boost::tuple<float_type, float_type, float_type> result_type; + + template<typename Args> + weighted_peaks_over_threshold_prob_impl(Args const &args) + : sign_((is_same<LeftRight, left>::value) ? -1 : 1) + , mu_(sign_ * numeric::average(args[sample | Sample()], (std::size_t)1)) + , sigma2_(numeric::average(args[sample | Sample()], (std::size_t)1)) + , threshold_probability_(args[pot_threshold_probability]) + , fit_parameters_(boost::make_tuple(0., 0., 0.)) + , is_dirty_(true) + { + } + + void operator ()(dont_care) + { + this->is_dirty_ = true; + } + + template<typename Args> + result_type result(Args const &args) const + { + if (this->is_dirty_) + { + this->is_dirty_ = false; + + float_type threshold = sum_of_weights(args) + * ( ( is_same<LeftRight, left>::value ) ? this->threshold_probability_ : 1. - this->threshold_probability_ ); + + std::size_t n = 0; + Weight sum = Weight(0); + + while (sum < threshold) + { + if (n < static_cast<std::size_t>(tail_weights(args).size())) + { + mu_ += *(tail_weights(args).begin() + n) * *(tail(args).begin() + n); + sigma2_ += *(tail_weights(args).begin() + n) * *(tail(args).begin() + n) * (*(tail(args).begin() + n)); + sum += *(tail_weights(args).begin() + n); + n++; + } + else + { + if (std::numeric_limits<float_type>::has_quiet_NaN) + { + return boost::make_tuple( + std::numeric_limits<float_type>::quiet_NaN() + , std::numeric_limits<float_type>::quiet_NaN() + , std::numeric_limits<float_type>::quiet_NaN() + ); + } + else + { + std::ostringstream msg; + msg << "index n = " << n << " is not in valid range [0, " << tail(args).size() << ")"; + boost::throw_exception(std::runtime_error(msg.str())); + return boost::make_tuple(Sample(0), Sample(0), Sample(0)); + } + } + } + + float_type u = *(tail(args).begin() + n - 1) * this->sign_; + + + this->mu_ = this->sign_ * numeric::average(this->mu_, sum); + this->sigma2_ = numeric::average(this->sigma2_, sum); + this->sigma2_ -= this->mu_ * this->mu_; + + if (is_same<LeftRight, left>::value) + this->threshold_probability_ = 1. - this->threshold_probability_; + + float_type tmp = numeric::average(( this->mu_ - u )*( this->mu_ - u ), this->sigma2_); + float_type xi_hat = 0.5 * ( 1. - tmp ); + float_type beta_hat = 0.5 * ( this->mu_ - u ) * ( 1. + tmp ); + float_type beta_bar = beta_hat * std::pow(1. - threshold_probability_, xi_hat); + float_type u_bar = u - beta_bar * ( std::pow(1. - threshold_probability_, -xi_hat) - 1.)/xi_hat; + this->fit_parameters_ = boost::make_tuple(u_bar, beta_bar, xi_hat); + + } + + return this->fit_parameters_; + } + + private: + short sign_; // for left tail fitting, mirror the extreme values + mutable float_type mu_; // mean of samples above threshold u + mutable float_type sigma2_; // variance of samples above threshold u + mutable float_type threshold_probability_; + mutable result_type fit_parameters_; // boost::tuple that stores fit parameters + mutable bool is_dirty_; + }; + +} // namespace impl + +/////////////////////////////////////////////////////////////////////////////// +// tag::weighted_peaks_over_threshold +// +namespace tag +{ + template<typename LeftRight> + struct weighted_peaks_over_threshold + : depends_on<sum_of_weights> + , pot_threshold_value + { + /// INTERNAL ONLY + typedef accumulators::impl::weighted_peaks_over_threshold_impl<mpl::_1, mpl::_2, LeftRight> impl; + }; + + template<typename LeftRight> + struct weighted_peaks_over_threshold_prob + : depends_on<sum_of_weights, tail_weights<LeftRight> > + , pot_threshold_probability + { + /// INTERNAL ONLY + typedef accumulators::impl::weighted_peaks_over_threshold_prob_impl<mpl::_1, mpl::_2, LeftRight> impl; + }; +} + +/////////////////////////////////////////////////////////////////////////////// +// extract::weighted_peaks_over_threshold +// +namespace extract +{ + extractor<tag::abstract_peaks_over_threshold> const weighted_peaks_over_threshold = {}; + + BOOST_ACCUMULATORS_IGNORE_GLOBAL(weighted_peaks_over_threshold) +} + +using extract::weighted_peaks_over_threshold; + +// weighted_peaks_over_threshold<LeftRight>(with_threshold_value) -> weighted_peaks_over_threshold<LeftRight> +template<typename LeftRight> +struct as_feature<tag::weighted_peaks_over_threshold<LeftRight>(with_threshold_value)> +{ + typedef tag::weighted_peaks_over_threshold<LeftRight> type; +}; + +// weighted_peaks_over_threshold<LeftRight>(with_threshold_probability) -> weighted_peaks_over_threshold_prob<LeftRight> +template<typename LeftRight> +struct as_feature<tag::weighted_peaks_over_threshold<LeftRight>(with_threshold_probability)> +{ + typedef tag::weighted_peaks_over_threshold_prob<LeftRight> type; +}; + +}} // namespace boost::accumulators + +#ifdef _MSC_VER +# pragma warning(pop) +#endif + +#endif diff --git a/boost/accumulators/statistics/weighted_skewness.hpp b/boost/accumulators/statistics/weighted_skewness.hpp new file mode 100644 index 0000000000..6ccbc45ca1 --- /dev/null +++ b/boost/accumulators/statistics/weighted_skewness.hpp @@ -0,0 +1,101 @@ +/////////////////////////////////////////////////////////////////////////////// +// weighted_skewness.hpp +// +// Copyright 2006 Olivier Gygi, Daniel Egloff. Distributed under the Boost +// Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#ifndef BOOST_ACCUMULATORS_STATISTICS_WEIGHTED_SKEWNESS_HPP_EAN_28_10_2005 +#define BOOST_ACCUMULATORS_STATISTICS_WEIGHTED_SKEWNESS_HPP_EAN_28_10_2005 + +#include <limits> +#include <boost/mpl/placeholders.hpp> +#include <boost/accumulators/framework/accumulator_base.hpp> +#include <boost/accumulators/framework/extractor.hpp> +#include <boost/accumulators/framework/parameters/sample.hpp> +#include <boost/accumulators/numeric/functional.hpp> +#include <boost/accumulators/framework/depends_on.hpp> +#include <boost/accumulators/statistics_fwd.hpp> +#include <boost/accumulators/statistics/weighted_moment.hpp> +#include <boost/accumulators/statistics/weighted_mean.hpp> + +namespace boost { namespace accumulators +{ + +namespace impl +{ + /////////////////////////////////////////////////////////////////////////////// + // weighted_skewness_impl + /** + @brief Skewness estimation for weighted samples + + The skewness of a sample distribution is defined as the ratio of the 3rd central moment and the \f$ 3/2 \f$-th power $ + of the 2nd central moment (the variance) of the samples. The skewness can also be expressed by the simple moments: + + \f[ + \hat{g}_1 = + \frac + {\widehat{m}_n^{(3)}-3\widehat{m}_n^{(2)}\hat{\mu}_n+2\hat{\mu}_n^3} + {\left(\widehat{m}_n^{(2)} - \hat{\mu}_n^{2}\right)^{3/2}} + \f] + + where \f$ \widehat{m}_n^{(i)} \f$ are the \f$ i \f$-th moment and \f$ \hat{\mu}_n \f$ the mean (first moment) of the + \f$ n \f$ samples. + + The skewness estimator for weighted samples is formally identical to the estimator for unweighted samples, except that + the weighted counterparts of all measures it depends on are to be taken. + */ + template<typename Sample, typename Weight> + struct weighted_skewness_impl + : accumulator_base + { + typedef typename numeric::functional::multiplies<Sample, Weight>::result_type weighted_sample; + // for boost::result_of + typedef typename numeric::functional::average<weighted_sample, weighted_sample>::result_type result_type; + + weighted_skewness_impl(dont_care) {} + + template<typename Args> + result_type result(Args const &args) const + { + return numeric::average( + accumulators::weighted_moment<3>(args) + - 3. * accumulators::weighted_moment<2>(args) * weighted_mean(args) + + 2. * weighted_mean(args) * weighted_mean(args) * weighted_mean(args) + , ( accumulators::weighted_moment<2>(args) - weighted_mean(args) * weighted_mean(args) ) + * std::sqrt( accumulators::weighted_moment<2>(args) - weighted_mean(args) * weighted_mean(args) ) + ); + } + }; + +} // namespace impl + +/////////////////////////////////////////////////////////////////////////////// +// tag::weighted_skewness +// +namespace tag +{ + struct weighted_skewness + : depends_on<weighted_mean, weighted_moment<2>, weighted_moment<3> > + { + /// INTERNAL ONLY + /// + typedef accumulators::impl::weighted_skewness_impl<mpl::_1, mpl::_2> impl; + }; +} + +/////////////////////////////////////////////////////////////////////////////// +// extract::weighted_skewness +// +namespace extract +{ + extractor<tag::weighted_skewness> const weighted_skewness = {}; + + BOOST_ACCUMULATORS_IGNORE_GLOBAL(weighted_skewness) +} + +using extract::weighted_skewness; + +}} // namespace boost::accumulators + +#endif diff --git a/boost/accumulators/statistics/weighted_sum.hpp b/boost/accumulators/statistics/weighted_sum.hpp new file mode 100644 index 0000000000..27153906d0 --- /dev/null +++ b/boost/accumulators/statistics/weighted_sum.hpp @@ -0,0 +1,116 @@ +/////////////////////////////////////////////////////////////////////////////// +// weighted_sum.hpp +// +// Copyright 2006 Eric Niebler, Olivier Gygi. Distributed under the Boost +// Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#ifndef BOOST_ACCUMULATORS_STATISTICS_WEIGHTED_SUM_HPP_EAN_28_10_2005 +#define BOOST_ACCUMULATORS_STATISTICS_WEIGHTED_SUM_HPP_EAN_28_10_2005 + +#include <boost/mpl/placeholders.hpp> +#include <boost/accumulators/framework/accumulator_base.hpp> +#include <boost/accumulators/framework/extractor.hpp> +#include <boost/accumulators/numeric/functional.hpp> +#include <boost/accumulators/framework/parameters/sample.hpp> +#include <boost/accumulators/framework/parameters/weight.hpp> +#include <boost/accumulators/framework/accumulators/external_accumulator.hpp> +#include <boost/accumulators/framework/depends_on.hpp> +#include <boost/accumulators/statistics_fwd.hpp> + +namespace boost { namespace accumulators +{ + +namespace impl +{ + /////////////////////////////////////////////////////////////////////////////// + // weighted_sum_impl + template<typename Sample, typename Weight, typename Tag> + struct weighted_sum_impl + : accumulator_base + { + typedef typename numeric::functional::multiplies<Sample, Weight>::result_type weighted_sample; + + // for boost::result_of + typedef weighted_sample result_type; + + template<typename Args> + weighted_sum_impl(Args const &args) + : weighted_sum_( + args[parameter::keyword<Tag>::get() | Sample()] + * numeric::one<Weight>::value + ) + { + } + + template<typename Args> + void operator ()(Args const &args) + { + // what about overflow? + this->weighted_sum_ += args[parameter::keyword<Tag>::get()] * args[weight]; + } + + result_type result(dont_care) const + { + return this->weighted_sum_; + } + + private: + + weighted_sample weighted_sum_; + }; + +} // namespace impl + +/////////////////////////////////////////////////////////////////////////////// +// tag::weighted_sum +// +namespace tag +{ + struct weighted_sum + : depends_on<> + { + /// INTERNAL ONLY + /// + typedef accumulators::impl::weighted_sum_impl<mpl::_1, mpl::_2, tag::sample> impl; + }; + + template<typename VariateType, typename VariateTag> + struct weighted_sum_of_variates + : depends_on<> + { + /// INTERNAL ONLY + /// + typedef accumulators::impl::weighted_sum_impl<VariateType, mpl::_2, VariateTag> impl; + }; + + struct abstract_weighted_sum_of_variates + : depends_on<> + { + }; +} + +/////////////////////////////////////////////////////////////////////////////// +// extract::weighted_sum +// +namespace extract +{ + extractor<tag::weighted_sum> const weighted_sum = {}; + extractor<tag::abstract_weighted_sum_of_variates> const weighted_sum_of_variates = {}; + + BOOST_ACCUMULATORS_IGNORE_GLOBAL(weighted_sum) + BOOST_ACCUMULATORS_IGNORE_GLOBAL(weighted_sum_of_variates) +} + +using extract::weighted_sum; +using extract::weighted_sum_of_variates; + +template<typename VariateType, typename VariateTag> +struct feature_of<tag::weighted_sum_of_variates<VariateType, VariateTag> > + : feature_of<tag::abstract_weighted_sum_of_variates> +{ +}; + +}} // namespace boost::accumulators + +#endif diff --git a/boost/accumulators/statistics/weighted_sum_kahan.hpp b/boost/accumulators/statistics/weighted_sum_kahan.hpp new file mode 100644 index 0000000000..fbb0303acc --- /dev/null +++ b/boost/accumulators/statistics/weighted_sum_kahan.hpp @@ -0,0 +1,138 @@ +/////////////////////////////////////////////////////////////////////////////// +// weighted_sum_kahan.hpp +// +// Copyright 2011 Simon West. Distributed under the Boost +// Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#ifndef BOOST_ACCUMULATORS_STATISTICS_WEIGHTED_SUM_KAHAN_HPP_EAN_11_05_2011 +#define BOOST_ACCUMULATORS_STATISTICS_WEIGHTED_SUM_KAHAN_HPP_EAN_11_05_2011 + +#include <boost/mpl/placeholders.hpp> +#include <boost/accumulators/framework/accumulator_base.hpp> +#include <boost/accumulators/framework/extractor.hpp> +#include <boost/accumulators/numeric/functional.hpp> +#include <boost/accumulators/framework/parameters/sample.hpp> +#include <boost/accumulators/framework/parameters/weight.hpp> +#include <boost/accumulators/framework/accumulators/external_accumulator.hpp> +#include <boost/accumulators/framework/depends_on.hpp> +#include <boost/accumulators/statistics_fwd.hpp> +#include <boost/accumulators/statistics/weighted_sum.hpp> +#include <boost/numeric/conversion/cast.hpp> + +namespace boost { namespace accumulators +{ + +namespace impl +{ +#if _MSC_VER > 1400 +# pragma float_control(push) +# pragma float_control(precise, on) +#endif + + /////////////////////////////////////////////////////////////////////////////// + // weighted_sum_kahan_impl + template<typename Sample, typename Weight, typename Tag> + struct weighted_sum_kahan_impl + : accumulator_base + { + typedef typename numeric::functional::multiplies<Sample, Weight>::result_type weighted_sample; + + // for boost::result_of + typedef weighted_sample result_type; + + template<typename Args> + weighted_sum_kahan_impl(Args const &args) + : weighted_sum_( + args[parameter::keyword<Tag>::get() | Sample()] * numeric::one<Weight>::value), + compensation(boost::numeric_cast<weighted_sample>(0.0)) + { + } + + template<typename Args> + void +#if BOOST_ACCUMULATORS_GCC_VERSION > 40305 + __attribute__((__optimize__("no-associative-math"))) +#endif + operator ()(Args const &args) + { + const weighted_sample myTmp1 = args[parameter::keyword<Tag>::get()] * args[weight] - this->compensation; + const weighted_sample myTmp2 = this->weighted_sum_ + myTmp1; + this->compensation = (myTmp2 - this->weighted_sum_) - myTmp1; + this->weighted_sum_ = myTmp2; + + } + + result_type result(dont_care) const + { + return this->weighted_sum_; + } + + private: + weighted_sample weighted_sum_; + weighted_sample compensation; + }; + +#if _MSC_VER > 1400 +# pragma float_control(pop) +#endif + +} // namespace impl + +/////////////////////////////////////////////////////////////////////////////// +// tag::weighted_sum_kahan +// tag::weighted_sum_of_variates_kahan +// +namespace tag +{ + struct weighted_sum_kahan + : depends_on<> + { + /// INTERNAL ONLY + /// + typedef accumulators::impl::weighted_sum_kahan_impl<mpl::_1, mpl::_2, tag::sample> impl; + }; + + template<typename VariateType, typename VariateTag> + struct weighted_sum_of_variates_kahan + : depends_on<> + { + /// INTERNAL ONLY + /// + typedef accumulators::impl::weighted_sum_kahan_impl<VariateType, mpl::_2, VariateTag> impl; + }; + +} + +/////////////////////////////////////////////////////////////////////////////// +// extract::weighted_sum_kahan +// extract::weighted_sum_of_variates_kahan +// +namespace extract +{ + extractor<tag::weighted_sum_kahan> const weighted_sum_kahan = {}; + extractor<tag::abstract_weighted_sum_of_variates> const weighted_sum_of_variates_kahan = {}; + + BOOST_ACCUMULATORS_IGNORE_GLOBAL(weighted_sum_kahan) + BOOST_ACCUMULATORS_IGNORE_GLOBAL(weighted_sum_of_variates_kahan) +} + +using extract::weighted_sum_kahan; +using extract::weighted_sum_of_variates_kahan; + +// weighted_sum(kahan) -> weighted_sum_kahan +template<> +struct as_feature<tag::weighted_sum(kahan)> +{ + typedef tag::weighted_sum_kahan type; +}; + +template<typename VariateType, typename VariateTag> +struct feature_of<tag::weighted_sum_of_variates_kahan<VariateType, VariateTag> > + : feature_of<tag::abstract_weighted_sum_of_variates> +{ +}; + +}} // namespace boost::accumulators + +#endif diff --git a/boost/accumulators/statistics/weighted_tail_mean.hpp b/boost/accumulators/statistics/weighted_tail_mean.hpp new file mode 100644 index 0000000000..5bdee8aadc --- /dev/null +++ b/boost/accumulators/statistics/weighted_tail_mean.hpp @@ -0,0 +1,169 @@ +/////////////////////////////////////////////////////////////////////////////// +// weighted_tail_mean.hpp +// +// Copyright 2006 Daniel Egloff, Olivier Gygi. Distributed under the Boost +// Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#ifndef BOOST_ACCUMULATORS_STATISTICS_WEIGHTED_TAIL_MEAN_HPP_DE_01_01_2006 +#define BOOST_ACCUMULATORS_STATISTICS_WEIGHTED_TAIL_MEAN_HPP_DE_01_01_2006 + +#include <numeric> +#include <vector> +#include <limits> +#include <functional> +#include <sstream> +#include <stdexcept> +#include <boost/throw_exception.hpp> +#include <boost/parameter/keyword.hpp> +#include <boost/mpl/placeholders.hpp> +#include <boost/type_traits/is_same.hpp> +#include <boost/accumulators/numeric/functional.hpp> +#include <boost/accumulators/framework/accumulator_base.hpp> +#include <boost/accumulators/framework/extractor.hpp> +#include <boost/accumulators/framework/parameters/sample.hpp> +#include <boost/accumulators/statistics_fwd.hpp> +#include <boost/accumulators/statistics/tail.hpp> +#include <boost/accumulators/statistics/tail_mean.hpp> +#include <boost/accumulators/statistics/parameters/quantile_probability.hpp> + +#ifdef _MSC_VER +# pragma warning(push) +# pragma warning(disable: 4127) // conditional expression is constant +#endif + +namespace boost { namespace accumulators +{ + +namespace impl +{ + + /////////////////////////////////////////////////////////////////////////////// + // coherent_weighted_tail_mean_impl + // + // TODO + + /////////////////////////////////////////////////////////////////////////////// + // non_coherent_weighted_tail_mean_impl + // + /** + @brief Estimation of the (non-coherent) weighted tail mean based on order statistics (for both left and right tails) + + + + An estimation of the non-coherent, weighted tail mean \f$\widehat{NCTM}_{n,\alpha}(X)\f$ is given by the weighted mean + of the + + \f[ + \lambda = \inf\left\{ l \left| \frac{1}{\bar{w}_n}\sum_{i=1}^{l} w_i \geq \alpha \right. \right\} + \f] + + smallest samples (left tail) or the weighted mean of the + + \f[ + n + 1 - \rho = n + 1 - \sup\left\{ r \left| \frac{1}{\bar{w}_n}\sum_{i=r}^{n} w_i \geq (1 - \alpha) \right. \right\} + \f] + + largest samples (right tail) above a quantile \f$\hat{q}_{\alpha}\f$ of level \f$\alpha\f$, \f$n\f$ being the total number of sample + and \f$\bar{w}_n\f$ the sum of all \f$n\f$ weights: + + \f[ + \widehat{NCTM}_{n,\alpha}^{\mathrm{left}}(X) = \frac{\sum_{i=1}^{\lambda} w_i X_{i:n}}{\sum_{i=1}^{\lambda} w_i}, + \f] + + \f[ + \widehat{NCTM}_{n,\alpha}^{\mathrm{right}}(X) = \frac{\sum_{i=\rho}^n w_i X_{i:n}}{\sum_{i=\rho}^n w_i}. + \f] + + @param quantile_probability + */ + template<typename Sample, typename Weight, typename LeftRight> + struct non_coherent_weighted_tail_mean_impl + : accumulator_base + { + typedef typename numeric::functional::multiplies<Sample, Weight>::result_type weighted_sample; + typedef typename numeric::functional::average<Weight, std::size_t>::result_type float_type; + // for boost::result_of + typedef typename numeric::functional::average<weighted_sample, std::size_t>::result_type result_type; + + non_coherent_weighted_tail_mean_impl(dont_care) {} + + template<typename Args> + result_type result(Args const &args) const + { + float_type threshold = sum_of_weights(args) + * ( ( is_same<LeftRight, left>::value ) ? args[quantile_probability] : 1. - args[quantile_probability] ); + + std::size_t n = 0; + Weight sum = Weight(0); + + while (sum < threshold) + { + if (n < static_cast<std::size_t>(tail_weights(args).size())) + { + sum += *(tail_weights(args).begin() + n); + n++; + } + else + { + if (std::numeric_limits<result_type>::has_quiet_NaN) + { + return std::numeric_limits<result_type>::quiet_NaN(); + } + else + { + std::ostringstream msg; + msg << "index n = " << n << " is not in valid range [0, " << tail(args).size() << ")"; + boost::throw_exception(std::runtime_error(msg.str())); + return result_type(0); + } + } + } + + return numeric::average( + std::inner_product( + tail(args).begin() + , tail(args).begin() + n + , tail_weights(args).begin() + , weighted_sample(0) + ) + , sum + ); + } + }; + +} // namespace impl + + +/////////////////////////////////////////////////////////////////////////////// +// tag::non_coherent_weighted_tail_mean<> +// +namespace tag +{ + template<typename LeftRight> + struct non_coherent_weighted_tail_mean + : depends_on<sum_of_weights, tail_weights<LeftRight> > + { + typedef accumulators::impl::non_coherent_weighted_tail_mean_impl<mpl::_1, mpl::_2, LeftRight> impl; + }; +} + +/////////////////////////////////////////////////////////////////////////////// +// extract::non_coherent_weighted_tail_mean; +// +namespace extract +{ + extractor<tag::abstract_non_coherent_tail_mean> const non_coherent_weighted_tail_mean = {}; + + BOOST_ACCUMULATORS_IGNORE_GLOBAL(non_coherent_weighted_tail_mean) +} + +using extract::non_coherent_weighted_tail_mean; + +}} // namespace boost::accumulators + +#ifdef _MSC_VER +# pragma warning(pop) +#endif + +#endif diff --git a/boost/accumulators/statistics/weighted_tail_quantile.hpp b/boost/accumulators/statistics/weighted_tail_quantile.hpp new file mode 100644 index 0000000000..88c32d1f61 --- /dev/null +++ b/boost/accumulators/statistics/weighted_tail_quantile.hpp @@ -0,0 +1,146 @@ +/////////////////////////////////////////////////////////////////////////////// +// weighted_tail_quantile.hpp +// +// Copyright 2006 Daniel Egloff, Olivier Gygi. Distributed under the Boost +// Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#ifndef BOOST_ACCUMULATORS_STATISTICS_WEIGHTED_TAIL_QUANTILE_HPP_DE_01_01_2006 +#define BOOST_ACCUMULATORS_STATISTICS_WEIGHTED_TAIL_QUANTILE_HPP_DE_01_01_2006 + +#include <vector> +#include <limits> +#include <functional> +#include <sstream> +#include <stdexcept> +#include <boost/throw_exception.hpp> +#include <boost/parameter/keyword.hpp> +#include <boost/mpl/placeholders.hpp> +#include <boost/mpl/if.hpp> +#include <boost/type_traits/is_same.hpp> +#include <boost/accumulators/numeric/functional.hpp> +#include <boost/accumulators/framework/depends_on.hpp> +#include <boost/accumulators/framework/accumulator_base.hpp> +#include <boost/accumulators/framework/extractor.hpp> +#include <boost/accumulators/framework/parameters/sample.hpp> +#include <boost/accumulators/statistics_fwd.hpp> +#include <boost/accumulators/statistics/tail.hpp> +#include <boost/accumulators/statistics/tail_quantile.hpp> +#include <boost/accumulators/statistics/parameters/quantile_probability.hpp> + +#ifdef _MSC_VER +# pragma warning(push) +# pragma warning(disable: 4127) // conditional expression is constant +#endif + +namespace boost { namespace accumulators +{ + +namespace impl +{ + /////////////////////////////////////////////////////////////////////////////// + // weighted_tail_quantile_impl + // Tail quantile estimation based on order statistics of weighted samples + /** + @brief Tail quantile estimation based on order statistics of weighted samples (for both left and right tails) + + An estimator \f$\hat{q}\f$ of tail quantiles with level \f$\alpha\f$ based on order statistics + \f$X_{1:n} \leq X_{2:n} \leq\dots\leq X_{n:n}\f$ of weighted samples are given by \f$X_{\lambda:n}\f$ (left tail) + and \f$X_{\rho:n}\f$ (right tail), where + + \f[ + \lambda = \inf\left\{ l \left| \frac{1}{\bar{w}_n}\sum_{i=1}^{l} w_i \geq \alpha \right. \right\} + \f] + + and + + \f[ + \rho = \sup\left\{ r \left| \frac{1}{\bar{w}_n}\sum_{i=r}^{n} w_i \geq (1 - \alpha) \right. \right\}, + \f] + + \f$n\f$ being the number of samples and \f$\bar{w}_n\f$ the sum of all weights. + + @param quantile_probability + */ + template<typename Sample, typename Weight, typename LeftRight> + struct weighted_tail_quantile_impl + : accumulator_base + { + typedef typename numeric::functional::average<Weight, std::size_t>::result_type float_type; + // for boost::result_of + typedef Sample result_type; + + weighted_tail_quantile_impl(dont_care) {} + + template<typename Args> + result_type result(Args const &args) const + { + float_type threshold = sum_of_weights(args) + * ( ( is_same<LeftRight, left>::value ) ? args[quantile_probability] : 1. - args[quantile_probability] ); + + std::size_t n = 0; + Weight sum = Weight(0); + + while (sum < threshold) + { + if (n < static_cast<std::size_t>(tail_weights(args).size())) + { + sum += *(tail_weights(args).begin() + n); + n++; + } + else + { + if (std::numeric_limits<result_type>::has_quiet_NaN) + { + return std::numeric_limits<result_type>::quiet_NaN(); + } + else + { + std::ostringstream msg; + msg << "index n = " << n << " is not in valid range [0, " << tail(args).size() << ")"; + boost::throw_exception(std::runtime_error(msg.str())); + return Sample(0); + } + } + } + + // Note that the cached samples of the left are sorted in ascending order, + // whereas the samples of the right tail are sorted in descending order + return *(boost::begin(tail(args)) + n - 1); + } + }; +} // namespace impl + +/////////////////////////////////////////////////////////////////////////////// +// tag::weighted_tail_quantile<> +// +namespace tag +{ + template<typename LeftRight> + struct weighted_tail_quantile + : depends_on<sum_of_weights, tail_weights<LeftRight> > + { + /// INTERNAL ONLY + typedef accumulators::impl::weighted_tail_quantile_impl<mpl::_1, mpl::_2, LeftRight> impl; + }; +} + +/////////////////////////////////////////////////////////////////////////////// +// extract::weighted_tail_quantile +// +namespace extract +{ + extractor<tag::quantile> const weighted_tail_quantile = {}; + + BOOST_ACCUMULATORS_IGNORE_GLOBAL(weighted_tail_quantile) +} + +using extract::weighted_tail_quantile; + +}} // namespace boost::accumulators + +#ifdef _MSC_VER +# pragma warning(pop) +#endif + +#endif diff --git a/boost/accumulators/statistics/weighted_tail_variate_means.hpp b/boost/accumulators/statistics/weighted_tail_variate_means.hpp new file mode 100644 index 0000000000..da181896a1 --- /dev/null +++ b/boost/accumulators/statistics/weighted_tail_variate_means.hpp @@ -0,0 +1,242 @@ +/////////////////////////////////////////////////////////////////////////////// +// weighted_tail_variate_means.hpp +// +// Copyright 2006 Daniel Egloff, Olivier Gygi. Distributed under the Boost +// Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#ifndef BOOST_ACCUMULATORS_STATISTICS_WEIGHTED_TAIL_VARIATE_MEANS_HPP_DE_01_01_2006 +#define BOOST_ACCUMULATORS_STATISTICS_WEIGHTED_TAIL_VARIATE_MEANS_HPP_DE_01_01_2006 + +#include <numeric> +#include <vector> +#include <limits> +#include <functional> +#include <sstream> +#include <stdexcept> +#include <boost/throw_exception.hpp> +#include <boost/parameter/keyword.hpp> +#include <boost/mpl/placeholders.hpp> +#include <boost/type_traits/is_same.hpp> +#include <boost/accumulators/numeric/functional.hpp> +#include <boost/accumulators/framework/accumulator_base.hpp> +#include <boost/accumulators/framework/extractor.hpp> +#include <boost/accumulators/framework/parameters/sample.hpp> +#include <boost/accumulators/statistics_fwd.hpp> +#include <boost/accumulators/statistics/tail.hpp> +#include <boost/accumulators/statistics/tail_variate.hpp> +#include <boost/accumulators/statistics/tail_variate_means.hpp> +#include <boost/accumulators/statistics/weighted_tail_mean.hpp> +#include <boost/accumulators/statistics/parameters/quantile_probability.hpp> + +#ifdef _MSC_VER +# pragma warning(push) +# pragma warning(disable: 4127) // conditional expression is constant +#endif + +namespace boost +{ + // for _BinaryOperatrion2 in std::inner_product below + // mutliplies two values and promotes the result to double + namespace numeric { namespace functional + { + /////////////////////////////////////////////////////////////////////////////// + // numeric::functional::multiply_and_promote_to_double + template<typename T, typename U> + struct multiply_and_promote_to_double + : multiplies<T, double const> + { + }; + }} +} + +namespace boost { namespace accumulators +{ + +namespace impl +{ + /** + @brief Estimation of the absolute and relative weighted tail variate means (for both left and right tails) + + For all \f$j\f$-th variates associated to the + + \f[ + \lambda = \inf\left\{ l \left| \frac{1}{\bar{w}_n}\sum_{i=1}^{l} w_i \geq \alpha \right. \right\} + \f] + + smallest samples (left tail) or the weighted mean of the + + \f[ + n + 1 - \rho = n + 1 - \sup\left\{ r \left| \frac{1}{\bar{w}_n}\sum_{i=r}^{n} w_i \geq (1 - \alpha) \right. \right\} + \f] + + largest samples (right tail), the absolute weighted tail means \f$\widehat{ATM}_{n,\alpha}(X, j)\f$ + are computed and returned as an iterator range. Alternatively, the relative weighted tail means + \f$\widehat{RTM}_{n,\alpha}(X, j)\f$ are returned, which are the absolute weighted tail means + normalized with the weighted (non-coherent) sample tail mean \f$\widehat{NCTM}_{n,\alpha}(X)\f$. + + \f[ + \widehat{ATM}_{n,\alpha}^{\mathrm{right}}(X, j) = + \frac{1}{\sum_{i=\rho}^n w_i} + \sum_{i=\rho}^n w_i \xi_{j,i} + \f] + + \f[ + \widehat{ATM}_{n,\alpha}^{\mathrm{left}}(X, j) = + \frac{1}{\sum_{i=1}^{\lambda}} + \sum_{i=1}^{\lambda} w_i \xi_{j,i} + \f] + + \f[ + \widehat{RTM}_{n,\alpha}^{\mathrm{right}}(X, j) = + \frac{\sum_{i=\rho}^n w_i \xi_{j,i}} + {\sum_{i=\rho}^n w_i \widehat{NCTM}_{n,\alpha}^{\mathrm{right}}(X)} + \f] + + \f[ + \widehat{RTM}_{n,\alpha}^{\mathrm{left}}(X, j) = + \frac{\sum_{i=1}^{\lambda} w_i \xi_{j,i}} + {\sum_{i=1}^{\lambda} w_i \widehat{NCTM}_{n,\alpha}^{\mathrm{left}}(X)} + \f] + */ + + /////////////////////////////////////////////////////////////////////////////// + // weighted_tail_variate_means_impl + // by default: absolute weighted_tail_variate_means + template<typename Sample, typename Weight, typename Impl, typename LeftRight, typename VariateType> + struct weighted_tail_variate_means_impl + : accumulator_base + { + typedef typename numeric::functional::average<Weight, Weight>::result_type float_type; + typedef typename numeric::functional::average<typename numeric::functional::multiplies<VariateType, Weight>::result_type, Weight>::result_type array_type; + // for boost::result_of + typedef iterator_range<typename array_type::iterator> result_type; + + weighted_tail_variate_means_impl(dont_care) {} + + template<typename Args> + result_type result(Args const &args) const + { + float_type threshold = sum_of_weights(args) + * ( ( is_same<LeftRight, left>::value ) ? args[quantile_probability] : 1. - args[quantile_probability] ); + + std::size_t n = 0; + Weight sum = Weight(0); + + while (sum < threshold) + { + if (n < static_cast<std::size_t>(tail_weights(args).size())) + { + sum += *(tail_weights(args).begin() + n); + n++; + } + else + { + if (std::numeric_limits<float_type>::has_quiet_NaN) + { + std::fill( + this->tail_means_.begin() + , this->tail_means_.end() + , std::numeric_limits<float_type>::quiet_NaN() + ); + } + else + { + std::ostringstream msg; + msg << "index n = " << n << " is not in valid range [0, " << tail(args).size() << ")"; + boost::throw_exception(std::runtime_error(msg.str())); + } + } + } + + std::size_t num_variates = tail_variate(args).begin()->size(); + + this->tail_means_.clear(); + this->tail_means_.resize(num_variates, Sample(0)); + + this->tail_means_ = std::inner_product( + tail_variate(args).begin() + , tail_variate(args).begin() + n + , tail_weights(args).begin() + , this->tail_means_ + , numeric::functional::plus<array_type const, array_type const>() + , numeric::functional::multiply_and_promote_to_double<VariateType const, Weight const>() + ); + + float_type factor = sum * ( (is_same<Impl, relative>::value) ? non_coherent_weighted_tail_mean(args) : 1. ); + + std::transform( + this->tail_means_.begin() + , this->tail_means_.end() + , this->tail_means_.begin() + , std::bind2nd(numeric::functional::divides<typename array_type::value_type const, float_type const>(), factor) + ); + + return make_iterator_range(this->tail_means_); + } + + private: + + mutable array_type tail_means_; + + }; + +} // namespace impl + +/////////////////////////////////////////////////////////////////////////////// +// tag::absolute_weighted_tail_variate_means +// tag::relative_weighted_tail_variate_means +// +namespace tag +{ + template<typename LeftRight, typename VariateType, typename VariateTag> + struct absolute_weighted_tail_variate_means + : depends_on<non_coherent_weighted_tail_mean<LeftRight>, tail_variate<VariateType, VariateTag, LeftRight>, tail_weights<LeftRight> > + { + typedef accumulators::impl::weighted_tail_variate_means_impl<mpl::_1, mpl::_2, absolute, LeftRight, VariateType> impl; + }; + template<typename LeftRight, typename VariateType, typename VariateTag> + struct relative_weighted_tail_variate_means + : depends_on<non_coherent_weighted_tail_mean<LeftRight>, tail_variate<VariateType, VariateTag, LeftRight>, tail_weights<LeftRight> > + { + typedef accumulators::impl::weighted_tail_variate_means_impl<mpl::_1, mpl::_2, relative, LeftRight, VariateType> impl; + }; +} + +/////////////////////////////////////////////////////////////////////////////// +// extract::weighted_tail_variate_means +// extract::relative_weighted_tail_variate_means +// +namespace extract +{ + extractor<tag::abstract_absolute_tail_variate_means> const weighted_tail_variate_means = {}; + extractor<tag::abstract_relative_tail_variate_means> const relative_weighted_tail_variate_means = {}; + + BOOST_ACCUMULATORS_IGNORE_GLOBAL(weighted_tail_variate_means) + BOOST_ACCUMULATORS_IGNORE_GLOBAL(relative_weighted_tail_variate_means) +} + +using extract::weighted_tail_variate_means; +using extract::relative_weighted_tail_variate_means; + +// weighted_tail_variate_means<LeftRight, VariateType, VariateTag>(absolute) -> absolute_weighted_tail_variate_means<LeftRight, VariateType, VariateTag> +template<typename LeftRight, typename VariateType, typename VariateTag> +struct as_feature<tag::weighted_tail_variate_means<LeftRight, VariateType, VariateTag>(absolute)> +{ + typedef tag::absolute_weighted_tail_variate_means<LeftRight, VariateType, VariateTag> type; +}; + +// weighted_tail_variate_means<LeftRight, VariateType, VariateTag>(relative) -> relative_weighted_tail_variate_means<LeftRight, VariateType, VariateTag> +template<typename LeftRight, typename VariateType, typename VariateTag> +struct as_feature<tag::weighted_tail_variate_means<LeftRight, VariateType, VariateTag>(relative)> +{ + typedef tag::relative_weighted_tail_variate_means<LeftRight, VariateType, VariateTag> type; +}; + +}} // namespace boost::accumulators + +#ifdef _MSC_VER +# pragma warning(pop) +#endif + +#endif diff --git a/boost/accumulators/statistics/weighted_variance.hpp b/boost/accumulators/statistics/weighted_variance.hpp new file mode 100644 index 0000000000..e91fdc1bc6 --- /dev/null +++ b/boost/accumulators/statistics/weighted_variance.hpp @@ -0,0 +1,186 @@ +/////////////////////////////////////////////////////////////////////////////// +// weighted_variance.hpp +// +// Copyright 2005 Daniel Egloff, Eric Niebler. Distributed under the Boost +// Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#ifndef BOOST_ACCUMULATORS_STATISTICS_WEIGHTED_VARIANCE_HPP_EAN_28_10_2005 +#define BOOST_ACCUMULATORS_STATISTICS_WEIGHTED_VARIANCE_HPP_EAN_28_10_2005 + +#include <boost/mpl/placeholders.hpp> +#include <boost/accumulators/framework/accumulator_base.hpp> +#include <boost/accumulators/framework/extractor.hpp> +#include <boost/accumulators/numeric/functional.hpp> +#include <boost/accumulators/framework/parameters/sample.hpp> +#include <boost/accumulators/framework/depends_on.hpp> +#include <boost/accumulators/statistics_fwd.hpp> +#include <boost/accumulators/statistics/count.hpp> +#include <boost/accumulators/statistics/variance.hpp> +#include <boost/accumulators/statistics/weighted_sum.hpp> +#include <boost/accumulators/statistics/weighted_mean.hpp> +#include <boost/accumulators/statistics/weighted_moment.hpp> + +namespace boost { namespace accumulators +{ + +namespace impl +{ + //! Lazy calculation of variance of weighted samples. + /*! + The default implementation of the variance of weighted samples is based on the second moment + \f$\widehat{m}_n^{(2)}\f$ (weighted_moment<2>) and the mean\f$ \hat{\mu}_n\f$ (weighted_mean): + \f[ + \hat{\sigma}_n^2 = \widehat{m}_n^{(2)}-\hat{\mu}_n^2, + \f] + where \f$n\f$ is the number of samples. + */ + template<typename Sample, typename Weight, typename MeanFeature> + struct lazy_weighted_variance_impl + : accumulator_base + { + typedef typename numeric::functional::multiplies<Sample, Weight>::result_type weighted_sample; + // for boost::result_of + typedef typename numeric::functional::average<weighted_sample, Weight>::result_type result_type; + + lazy_weighted_variance_impl(dont_care) {} + + template<typename Args> + result_type result(Args const &args) const + { + extractor<MeanFeature> const some_mean = {}; + result_type tmp = some_mean(args); + return accumulators::weighted_moment<2>(args) - tmp * tmp; + } + }; + + //! Iterative calculation of variance of weighted samples. + /*! + Iterative calculation of variance of weighted samples: + \f[ + \hat{\sigma}_n^2 = + \frac{\bar{w}_n - w_n}{\bar{w}_n}\hat{\sigma}_{n - 1}^2 + + \frac{w_n}{\bar{w}_n - w_n}\left(X_n - \hat{\mu}_n\right)^2 + ,\quad n\ge2,\quad\hat{\sigma}_0^2 = 0. + \f] + where \f$\bar{w}_n\f$ is the sum of the \f$n\f$ weights \f$w_i\f$ and \f$\hat{\mu}_n\f$ + the estimate of the mean of the weighted smaples. Note that the sample variance is not defined for + \f$n <= 1\f$. + */ + template<typename Sample, typename Weight, typename MeanFeature, typename Tag> + struct weighted_variance_impl + : accumulator_base + { + typedef typename numeric::functional::multiplies<Sample, Weight>::result_type weighted_sample; + // for boost::result_of + typedef typename numeric::functional::average<weighted_sample, Weight>::result_type result_type; + + template<typename Args> + weighted_variance_impl(Args const &args) + : weighted_variance(numeric::average(args[sample | Sample()], numeric::one<Weight>::value)) + { + } + + template<typename Args> + void operator ()(Args const &args) + { + std::size_t cnt = count(args); + + if(cnt > 1) + { + extractor<MeanFeature> const some_mean = {}; + + result_type tmp = args[parameter::keyword<Tag>::get()] - some_mean(args); + + this->weighted_variance = + numeric::average(this->weighted_variance * (sum_of_weights(args) - args[weight]), sum_of_weights(args)) + + numeric::average(tmp * tmp * args[weight], sum_of_weights(args) - args[weight] ); + } + } + + result_type result(dont_care) const + { + return this->weighted_variance; + } + + private: + result_type weighted_variance; + }; + +} // namespace impl + +/////////////////////////////////////////////////////////////////////////////// +// tag::weighted_variance +// tag::immediate_weighted_variance +// +namespace tag +{ + struct lazy_weighted_variance + : depends_on<weighted_moment<2>, weighted_mean> + { + /// INTERNAL ONLY + /// + typedef accumulators::impl::lazy_weighted_variance_impl<mpl::_1, mpl::_2, weighted_mean> impl; + }; + + struct weighted_variance + : depends_on<count, immediate_weighted_mean> + { + /// INTERNAL ONLY + /// + typedef accumulators::impl::weighted_variance_impl<mpl::_1, mpl::_2, immediate_weighted_mean, sample> impl; + }; +} + +/////////////////////////////////////////////////////////////////////////////// +// extract::weighted_variance +// extract::immediate_weighted_variance +// +namespace extract +{ + extractor<tag::lazy_weighted_variance> const lazy_weighted_variance = {}; + extractor<tag::weighted_variance> const weighted_variance = {}; + + BOOST_ACCUMULATORS_IGNORE_GLOBAL(lazy_weighted_variance) + BOOST_ACCUMULATORS_IGNORE_GLOBAL(weighted_variance) +} + +using extract::lazy_weighted_variance; +using extract::weighted_variance; + +// weighted_variance(lazy) -> lazy_weighted_variance +template<> +struct as_feature<tag::weighted_variance(lazy)> +{ + typedef tag::lazy_weighted_variance type; +}; + +// weighted_variance(immediate) -> weighted_variance +template<> +struct as_feature<tag::weighted_variance(immediate)> +{ + typedef tag::weighted_variance type; +}; + +//////////////////////////////////////////////////////////////////////////// +//// droppable_accumulator<weighted_variance_impl> +//// need to specialize droppable lazy weighted_variance to cache the result at the +//// point the accumulator is dropped. +///// INTERNAL ONLY +///// +//template<typename Sample, typename Weight, typename MeanFeature> +//struct droppable_accumulator<impl::weighted_variance_impl<Sample, Weight, MeanFeature> > +// : droppable_accumulator_base< +// with_cached_result<impl::weighted_variance_impl<Sample, Weight, MeanFeature> > +// > +//{ +// template<typename Args> +// droppable_accumulator(Args const &args) +// : droppable_accumulator::base(args) +// { +// } +//}; + +}} // namespace boost::accumulators + +#endif diff --git a/boost/accumulators/statistics/with_error.hpp b/boost/accumulators/statistics/with_error.hpp new file mode 100644 index 0000000000..adafc8e0d8 --- /dev/null +++ b/boost/accumulators/statistics/with_error.hpp @@ -0,0 +1,44 @@ +/////////////////////////////////////////////////////////////////////////////// +// with_error.hpp +// +// Copyright 2005 Eric Niebler. Distributed under the Boost +// Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#ifndef BOOST_ACCUMULATORS_STATISTICS_WITH_ERROR_HPP_EAN_01_11_2005 +#define BOOST_ACCUMULATORS_STATISTICS_WITH_ERROR_HPP_EAN_01_11_2005 + +#include <boost/preprocessor/repetition/enum_params.hpp> +#include <boost/mpl/vector.hpp> +#include <boost/mpl/transform_view.hpp> +#include <boost/mpl/placeholders.hpp> +#include <boost/accumulators/statistics_fwd.hpp> +#include <boost/accumulators/statistics/error_of.hpp> + +namespace boost { namespace accumulators +{ + +namespace detail +{ + template<typename Feature> + struct error_of_tag + { + typedef tag::error_of<Feature> type; + }; +} + +/////////////////////////////////////////////////////////////////////////////// +// with_error +// +template<BOOST_PP_ENUM_PARAMS(BOOST_ACCUMULATORS_MAX_FEATURES, typename Feature)> +struct with_error + : mpl::transform_view< + mpl::vector<BOOST_PP_ENUM_PARAMS(BOOST_ACCUMULATORS_MAX_FEATURES, Feature)> + , detail::error_of_tag<mpl::_1> + > +{ +}; + +}} // namespace boost::accumulators + +#endif |