@ -2998,6 +2998,7 @@ type groupedAggregation struct {
hasHistogram bool // Has at least 1 histogram sample aggregated.
incompatibleHistograms bool // If true, group has seen mixed exponential and custom buckets, or incompatible custom buckets.
groupAggrComplete bool // Used by LIMITK to short-cut series loop when we've reached K elem on every group.
incrementalMean bool // True after reverting to incremental calculation of the mean value.
}
// aggregation evaluates sum, avg, count, stdvar, stddev or quantile at one timestep on inputMatrix.
@ -3096,21 +3097,38 @@ func (ev *evaluator) aggregation(e *parser.AggregateExpr, q float64, inputMatrix
}
case parser . AVG :
// For the average calculation, we use incremental mean
// calculation. In particular in combination with Kahan
// summation (which we do for floats, but not yet for
// histograms, see issue #14105), this is quite accurate
// and only breaks in extreme cases (see testdata for
// avg_over_time). One might assume that simple direct
// mean calculation works better in some cases, but so
// far, our conclusion is that we fare best with the
// incremental approach plus Kahan summation (for
// floats). For a relevant discussion, see
// For the average calculation of histograms, we use
// incremental mean calculation without the help of
// Kahan summation (but this should change, see
// https://github.com/prometheus/prometheus/issues/14105
// ). For floats, we improve the accuracy with the help
// of Kahan summation. For a while, we assumed that
// incremental mean calculation combined with Kahan
// summation (see
// https://stackoverflow.com/questions/61665473/is-it-beneficial-for-precision-to-calculate-the-incremental-mean-average
// Additional note: For even better numerical accuracy,
// we would need to process the values in a particular
// order, but that would be very hard to implement given
// how the PromQL engine works.
// for inspiration) is generally the preferred solution.
// However, it then turned out that direct mean
// calculation (still in combination with Kahan
// summation) is often more accurate. See discussion in
// https://github.com/prometheus/prometheus/issues/16714
// . The problem with the direct mean calculation is
// that it can overflow float64 for inputs on which the
// incremental mean calculation works just fine. Our
// current approach is therefore to use direct mean
// calculation as long as we do not overflow (or
// underflow) the running sum. Once the latter would
// happen, we switch to incremental mean calculation.
// This seems to work reasonably well, but note that a
// deeper understanding would be needed to find out if
// maybe an earlier switch to incremental mean
// calculation would be better in terms of accuracy.
// Also, we could apply a number of additional means to
// improve the accuracy, like processing the values in a
// particular order. For now, we decided that the
// current implementation is accurate enough for
// practical purposes, in particular given that changing
// the order of summation would be hard, given how the
// PromQL engine implements aggregations.
group . groupCount ++
if h != nil {
group . hasHistogram = true
@ -3135,6 +3153,22 @@ func (ev *evaluator) aggregation(e *parser.AggregateExpr, q float64, inputMatrix
// point in copying the histogram in that case.
} else {
group . hasFloat = true
if ! group . incrementalMean {
newV , newC := kahanSumInc ( f , group . floatValue , group . floatKahanC )
if ! math . IsInf ( newV , 0 ) {
// The sum doesn't overflow, so we propagate it to the
// group struct and continue with the regular
// calculation of the mean value.
group . floatValue , group . floatKahanC = newV , newC
break
}
// If we are here, we know that the sum _would_ overflow. So
// instead of continue to sum up, we revert to incremental
// calculation of the mean value from here on.
group . incrementalMean = true
group . floatMean = group . floatValue / ( group . groupCount - 1 )
group . floatKahanC /= group . groupCount - 1
}
q := ( group . groupCount - 1 ) / group . groupCount
group . floatMean , group . floatKahanC = kahanSumInc (
f / group . groupCount ,
@ -3212,8 +3246,10 @@ func (ev *evaluator) aggregation(e *parser.AggregateExpr, q float64, inputMatrix
continue
case aggr . hasHistogram :
aggr . histogramValue = aggr . histogramValue . Compact ( 0 )
default :
case aggr . incrementalMean :
aggr . floatValue = aggr . floatMean + aggr . floatKahanC
default :
aggr . floatValue = aggr . floatValue / aggr . groupCount + aggr . floatKahanC / aggr . groupCount
}
case parser . COUNT :