mirror of https://github.com/grafana/loki
You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
332 lines
8.8 KiB
332 lines
8.8 KiB
package logql
|
|
|
|
import (
|
|
"container/heap"
|
|
"fmt"
|
|
|
|
"github.com/axiomhq/hyperloglog"
|
|
|
|
"github.com/prometheus/prometheus/model/labels"
|
|
"github.com/prometheus/prometheus/promql"
|
|
promql_parser "github.com/prometheus/prometheus/promql/parser"
|
|
|
|
"github.com/grafana/loki/v3/pkg/logproto"
|
|
"github.com/grafana/loki/v3/pkg/logql/sketch"
|
|
"github.com/grafana/loki/v3/pkg/logql/syntax"
|
|
)
|
|
|
|
const (
|
|
CountMinSketchVectorType = "CountMinSketchVector"
|
|
|
|
epsilon = 0.0001
|
|
// delta of 0.01 results in a sketch size of 27183 * 7 * 4 bytes = 761,124 bytes, 0.05 would yield 543,660 bytes
|
|
delta = 0.01
|
|
)
|
|
|
|
// CountMinSketchVector tracks the count or sum of values of a metric, ie list of label value pairs. It's storage for
|
|
// the values is upper bound bu delta and epsilon. To limit the storage for labels see HeapCountMinSketchVector.
|
|
// The main use case is for a topk approximation.
|
|
type CountMinSketchVector struct {
|
|
T int64
|
|
F *sketch.CountMinSketch
|
|
|
|
Metrics []labels.Labels
|
|
}
|
|
|
|
func (CountMinSketchVector) SampleVector() promql.Vector {
|
|
return promql.Vector{}
|
|
}
|
|
|
|
func (CountMinSketchVector) QuantileSketchVec() ProbabilisticQuantileVector {
|
|
return ProbabilisticQuantileVector{}
|
|
}
|
|
|
|
func (v CountMinSketchVector) CountMinSketchVec() CountMinSketchVector {
|
|
return v
|
|
}
|
|
|
|
func (v *CountMinSketchVector) Merge(right *CountMinSketchVector) (*CountMinSketchVector, error) {
|
|
// The underlying CMS implementation already merges the HLL sketches that are part of that structure.
|
|
err := v.F.Merge(right.F)
|
|
if err != nil {
|
|
return v, err
|
|
}
|
|
|
|
// Merge labels without duplication. Note: the CMS does not limit the number of labels as the
|
|
// HeapCountMinSketchVector does.
|
|
processed := map[string]struct{}{}
|
|
for _, l := range v.Metrics {
|
|
processed[l.String()] = struct{}{}
|
|
}
|
|
|
|
for _, r := range right.Metrics {
|
|
if _, duplicate := processed[r.String()]; !duplicate {
|
|
processed[r.String()] = struct{}{}
|
|
v.Metrics = append(v.Metrics, r)
|
|
}
|
|
}
|
|
|
|
return v, nil
|
|
}
|
|
|
|
func (CountMinSketchVector) String() string {
|
|
return "CountMinSketchVector()"
|
|
}
|
|
|
|
func (CountMinSketchVector) Type() promql_parser.ValueType { return CountMinSketchVectorType }
|
|
|
|
func (v CountMinSketchVector) ToProto() (*logproto.CountMinSketchVector, error) {
|
|
p := &logproto.CountMinSketchVector{
|
|
TimestampMs: v.T,
|
|
Metrics: make([]*logproto.Labels, len(v.Metrics)),
|
|
Sketch: &logproto.CountMinSketch{
|
|
Depth: v.F.Depth,
|
|
Width: v.F.Width,
|
|
},
|
|
}
|
|
|
|
// insert the hll sketch
|
|
hllBytes, err := v.F.HyperLogLog.MarshalBinary()
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
p.Sketch.Hyperloglog = hllBytes
|
|
|
|
// Serialize CMS
|
|
p.Sketch.Counters = make([]float64, 0, v.F.Depth*v.F.Width)
|
|
for row := uint32(0); row < v.F.Depth; row++ {
|
|
p.Sketch.Counters = append(p.Sketch.Counters, v.F.Counters[row]...)
|
|
}
|
|
|
|
// Serialize metric labels
|
|
for i, metric := range v.Metrics {
|
|
p.Metrics[i] = &logproto.Labels{
|
|
Metric: make([]*logproto.LabelPair, len(metric)),
|
|
}
|
|
for j, pair := range metric {
|
|
p.Metrics[i].Metric[j] = &logproto.LabelPair{
|
|
Name: pair.Name,
|
|
Value: pair.Value,
|
|
}
|
|
}
|
|
}
|
|
|
|
return p, nil
|
|
}
|
|
|
|
func CountMinSketchVectorFromProto(p *logproto.CountMinSketchVector) (CountMinSketchVector, error) {
|
|
vec := CountMinSketchVector{
|
|
T: p.TimestampMs,
|
|
Metrics: make([]labels.Labels, len(p.Metrics)),
|
|
}
|
|
|
|
// Deserialize CMS
|
|
var err error
|
|
vec.F, err = sketch.NewCountMinSketch(p.Sketch.Width, p.Sketch.Depth)
|
|
if err != nil {
|
|
return vec, err
|
|
}
|
|
|
|
hll := hyperloglog.New()
|
|
if err := hll.UnmarshalBinary(p.Sketch.Hyperloglog); err != nil {
|
|
return vec, err
|
|
}
|
|
vec.F.HyperLogLog = hll
|
|
|
|
for row := 0; row < int(vec.F.Depth); row++ {
|
|
s := row * int(vec.F.Width)
|
|
e := s + int(vec.F.Width)
|
|
copy(vec.F.Counters[row], p.Sketch.Counters[s:e])
|
|
}
|
|
|
|
// Deserialize metric labels
|
|
for i, in := range p.Metrics {
|
|
lbls := make(labels.Labels, len(in.Metric))
|
|
for j, labelPair := range in.Metric {
|
|
lbls[j].Name = labelPair.Name
|
|
lbls[j].Value = labelPair.Value
|
|
}
|
|
vec.Metrics[i] = lbls
|
|
}
|
|
|
|
return vec, nil
|
|
}
|
|
|
|
// HeapCountMinSketchVector is a CountMinSketchVector that keeps the number of metrics to a defined maximum.
|
|
type HeapCountMinSketchVector struct {
|
|
CountMinSketchVector
|
|
|
|
// internal set of observed events
|
|
observed map[string]struct{}
|
|
maxLabels int
|
|
}
|
|
|
|
func NewHeapCountMinSketchVector(ts int64, metricsLength, maxLabels int) HeapCountMinSketchVector {
|
|
f, _ := sketch.NewCountMinSketchFromErrorAndProbability(epsilon, delta)
|
|
|
|
if metricsLength >= maxLabels {
|
|
metricsLength = maxLabels
|
|
}
|
|
|
|
return HeapCountMinSketchVector{
|
|
CountMinSketchVector: CountMinSketchVector{
|
|
T: ts,
|
|
F: f,
|
|
Metrics: make([]labels.Labels, 0, metricsLength),
|
|
},
|
|
observed: make(map[string]struct{}),
|
|
maxLabels: maxLabels,
|
|
}
|
|
}
|
|
|
|
func (v *HeapCountMinSketchVector) Add(metric labels.Labels, value float64) {
|
|
// TODO: we save a lot of allocations by reusing the buffer inside metric.String
|
|
metricString := metric.String()
|
|
v.F.Add(metricString, value)
|
|
|
|
// Add our metric if we haven't seen it
|
|
if _, ok := v.observed[metricString]; !ok {
|
|
heap.Push(v, metric)
|
|
v.observed[metricString] = struct{}{}
|
|
} else if v.Metrics[0].String() == metricString {
|
|
// The smalles element has been updated to fix the heap.
|
|
heap.Fix(v, 0)
|
|
}
|
|
|
|
// The maximum number of labels has been reached, so drop the smallest element.
|
|
if len(v.Metrics) > v.maxLabels {
|
|
metric := heap.Pop(v).(labels.Labels)
|
|
delete(v.observed, metric.String())
|
|
}
|
|
}
|
|
|
|
func (v HeapCountMinSketchVector) Len() int {
|
|
return len(v.Metrics)
|
|
}
|
|
|
|
func (v HeapCountMinSketchVector) Less(i, j int) bool {
|
|
left := v.F.Count(v.Metrics[i].String())
|
|
right := v.F.Count(v.Metrics[j].String())
|
|
return left < right
|
|
}
|
|
|
|
func (v HeapCountMinSketchVector) Swap(i, j int) {
|
|
v.Metrics[i], v.Metrics[j] = v.Metrics[j], v.Metrics[i]
|
|
}
|
|
|
|
func (v *HeapCountMinSketchVector) Push(x any) {
|
|
v.Metrics = append(v.Metrics, x.(labels.Labels))
|
|
}
|
|
|
|
func (v *HeapCountMinSketchVector) Pop() any {
|
|
old := v.Metrics
|
|
n := len(old)
|
|
x := old[n-1]
|
|
v.Metrics = old[0 : n-1]
|
|
return x
|
|
}
|
|
|
|
// JoinCountMinSketchVector joins the results from stepEvaluator into a CountMinSketchVector.
|
|
func JoinCountMinSketchVector(_ bool, r StepResult, stepEvaluator StepEvaluator, params Params) (promql_parser.Value, error) {
|
|
vec := r.CountMinSketchVec()
|
|
if stepEvaluator.Error() != nil {
|
|
return nil, stepEvaluator.Error()
|
|
}
|
|
|
|
if GetRangeType(params) != InstantType {
|
|
return nil, fmt.Errorf("count min sketches are only supported on instant queries")
|
|
}
|
|
|
|
return vec, nil
|
|
}
|
|
|
|
func newCountMinSketchVectorAggEvaluator(nextEvaluator StepEvaluator, expr *syntax.VectorAggregationExpr, maxLabels int) (*countMinSketchVectorAggEvaluator, error) {
|
|
if expr.Grouping.Groups != nil {
|
|
return nil, fmt.Errorf("count min sketch vector aggregation does not support any grouping")
|
|
}
|
|
|
|
return &countMinSketchVectorAggEvaluator{
|
|
nextEvaluator: nextEvaluator,
|
|
expr: expr,
|
|
buf: make([]byte, 0, 1024),
|
|
lb: labels.NewBuilder(nil),
|
|
maxLabels: maxLabels,
|
|
}, nil
|
|
}
|
|
|
|
// countMinSketchVectorAggEvaluator processes sample vectors and aggregates them in a count min sketch with a heap.
|
|
type countMinSketchVectorAggEvaluator struct {
|
|
nextEvaluator StepEvaluator
|
|
expr *syntax.VectorAggregationExpr
|
|
buf []byte
|
|
lb *labels.Builder
|
|
maxLabels int
|
|
}
|
|
|
|
func (e *countMinSketchVectorAggEvaluator) Next() (bool, int64, StepResult) {
|
|
next, ts, r := e.nextEvaluator.Next()
|
|
|
|
if !next {
|
|
return false, 0, CountMinSketchVector{}
|
|
}
|
|
vec := r.SampleVector()
|
|
|
|
result := NewHeapCountMinSketchVector(ts, len(vec), e.maxLabels)
|
|
for _, s := range vec {
|
|
result.Add(s.Metric, s.F)
|
|
}
|
|
return next, ts, result
|
|
}
|
|
|
|
func (e *countMinSketchVectorAggEvaluator) Explain(parent Node) {
|
|
b := parent.Child("CountMinSketchVectorAgg")
|
|
e.nextEvaluator.Explain(b)
|
|
}
|
|
|
|
func (e *countMinSketchVectorAggEvaluator) Close() error {
|
|
return e.nextEvaluator.Close()
|
|
}
|
|
|
|
func (e *countMinSketchVectorAggEvaluator) Error() error {
|
|
return e.nextEvaluator.Error()
|
|
}
|
|
|
|
// CountMinSketchVectorStepEvaluator evaluates a count min sketch into a promql.Vector.
|
|
type CountMinSketchVectorStepEvaluator struct {
|
|
exhausted bool
|
|
vec *CountMinSketchVector
|
|
}
|
|
|
|
var _ StepEvaluator = NewQuantileSketchVectorStepEvaluator(nil, 0)
|
|
|
|
func NewCountMinSketchVectorStepEvaluator(vec *CountMinSketchVector) *CountMinSketchVectorStepEvaluator {
|
|
return &CountMinSketchVectorStepEvaluator{
|
|
exhausted: false,
|
|
vec: vec,
|
|
}
|
|
}
|
|
|
|
func (e *CountMinSketchVectorStepEvaluator) Next() (bool, int64, StepResult) {
|
|
if e.exhausted {
|
|
return false, 0, SampleVector{}
|
|
}
|
|
|
|
vec := make(promql.Vector, len(e.vec.Metrics))
|
|
|
|
for i, labels := range e.vec.Metrics {
|
|
|
|
f := e.vec.F.Count(labels.String())
|
|
|
|
vec[i] = promql.Sample{
|
|
T: e.vec.T,
|
|
F: float64(f),
|
|
Metric: labels,
|
|
}
|
|
}
|
|
|
|
return true, e.vec.T, SampleVector(vec)
|
|
}
|
|
|
|
func (*CountMinSketchVectorStepEvaluator) Close() error { return nil }
|
|
|
|
func (*CountMinSketchVectorStepEvaluator) Error() error { return nil }
|
|
|