1
/* Copyright 2012 10gen Inc.
3
* Licensed under the Apache License, Version 2.0 (the "License");
4
* you may not use this file except in compliance with the License.
5
* You may obtain a copy of the License at
7
* http://www.apache.org/licenses/LICENSE-2.0
9
* Unless required by applicable law or agreed to in writing, software
10
* distributed under the License is distributed on an "AS IS" BASIS,
11
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
* See the License for the specific language governing permissions and
13
* limitations under the License.
20
#include "mongo/db/jsobj.h"
21
#include "mongo/util/assert_util.h"
24
* These classes provide online descriptive statistics estimator capable
25
* of computing the mean, standard deviation and quantiles.
26
* Exactness is traded for the ability to obtain reasonable estimates
27
* without the need to store all the samples or perform multiple passes
30
* NOTEs on the estimator accessors provide information about accuracy
31
* of the approximation.
33
* The implementation of the estimators is heavily inspired by the algorithms used in
34
* boost.accumulators (www.boost.org/libs/accumulators/).
35
* It differs by being tailored for typical descriptive statistics use cases
36
* thus providing a simpler (even though less flexible) interface.
41
* Collects count, minimum and maximum, calculates mean and standard deviation.
43
* The 'Sample' template parameter is the type of the samples. It does not affect the calculated
44
* mean and standard deviation as all values are converted to double.
45
* However, setting the correct sample type prevents unnecessary casting or precision loss
48
template <class Sample>
49
class BasicEstimators {
54
* Update estimators with another observed value.
56
BasicEstimators& operator <<(const Sample sample);
59
* @return number of observations so far
61
inline size_t count() const { return _count; }
64
* @return mean of the observations seen so far
65
* NOTE: exact (within the limits of IEEE floating point precision).
67
inline double mean() const { return _sum / _count; }
70
* @return standard deviation of the observations so far
71
* NOTE: exact (within the limits of IEEE floating point precision).
73
inline double stddev() const { return std::sqrt(_diff / _count); }
76
* @return minimum observed value so far
79
inline Sample min() const { return _min; }
82
* @return maximum observed value so far
85
inline Sample max() const { return _max; }
88
* Appends the basic estimators to the provided BSONObjBuilder.
90
void appendBasicToBSONObjBuilder(BSONObjBuilder& b) const;
95
double _diff; // sum of squares of differences from the (then-current) mean
101
* Computes 'NumQuantiles' quantiles.
103
* The quantiles at probability 0 and 1 (minimum and maximum observations) are always computed.
104
* Thus DistributionEstimators<3> computes the the 1st, 2nd and 3rd quartiles (probabilities
105
* .25, .50, .75) and the default 0th and 5th (min and max).
107
* The quantile estimators are mean square consistent (they become a better approximation of the
108
* actual quantiles as the sample size grows).
110
template <std::size_t NumQuantiles>
111
class DistributionEstimators {
113
DistributionEstimators();
115
DistributionEstimators& operator <<(const double sample);
118
* Number of computed quantiles, excluding minimum and maximum.
120
static const size_t numberOfQuantiles = NumQuantiles;
123
* Updates the estimators with another observed value.
125
inline double quantile(std::size_t i) const {
126
massert(16476, "the requested value is out of the range of the computed quantiles",
127
i <= NumQuantiles + 1);
128
return this->_heights[2 * i];
132
* @return true when enough value has been observed to output sensible quantiles
134
inline bool quantilesReady() const {
135
return _count >= NumMarkers;
139
* @return estimated minimum
141
* NOTE: use SimpleEstimators::min for an exact value.
143
inline double min() const {
148
* @return estimated maximum
150
* NOTE: use SimpleEstimators::max for an exact value.
152
inline double max() const {
153
return quantile(NumQuantiles + 1);
157
* @return estimated median (2nd quartile)
159
inline double median() const {
164
* @return probability associated with the i-th quantile
166
inline double probability(std::size_t i) const {
167
return i * 1. / (NumQuantiles + 1);
171
* @return value for the nearest available quantile for probability 'prob'
173
inline double icdf(double prob) const {
174
int quant = static_cast<int>(prob * (NumQuantiles + 1) + 0.5);
175
return quantile(quant);
179
* Appends the quantiles to the provided BSONArrayBuilder.
180
* REQUIRES e.quantilesReady() == true
182
void appendQuantilesToBSONArrayBuilder(BSONArrayBuilder& arr) const;
185
inline double _positions_increments(std::size_t i) const;
188
enum { NumMarkers = 2 * NumQuantiles + 3 };
189
double _heights[NumMarkers]; // q_i
190
double _actual_positions[NumMarkers]; // n_i
191
double _desired_positions[NumMarkers]; // d_i
195
* Provides the funcionality of both BasicEstimators and DistributionEstimators.
197
template <class Sample, std::size_t NumQuantiles>
198
class SummaryEstimators :
199
// Multiple-inheritance
200
public BasicEstimators<Sample>,
201
public DistributionEstimators<NumQuantiles> {
203
// Dispatch samples to the inherited estimators
204
inline SummaryEstimators& operator<<(const Sample sample) {
205
this->BasicEstimators<Sample>::operator<<(sample);
206
this->DistributionEstimators<NumQuantiles>::operator<<(sample);
210
// Expose the exact values
211
inline Sample min() const {
212
return this->BasicEstimators<Sample>::min();
215
inline Sample max() const {
216
return this->BasicEstimators<Sample>::max();
220
* @return a summary of the computed estimators as a BSONObj.
222
BSONObj statisticSummaryToBSONObj() const;
227
#include "mongo/util/descriptive_stats-inl.h"