Like Prometheus, but for logs.
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 
 
 
loki/pkg/engine/internal/executor/vector_aggregate.go

196 lines
5.1 KiB

package executor
import (
"context"
"errors"
"fmt"
"time"
"github.com/apache/arrow-go/v18/arrow"
"github.com/apache/arrow-go/v18/arrow/array"
"github.com/grafana/loki/v3/pkg/engine/internal/assertions"
"github.com/grafana/loki/v3/pkg/engine/internal/semconv"
"github.com/grafana/loki/v3/pkg/engine/internal/planner/physical"
"github.com/grafana/loki/v3/pkg/engine/internal/types"
"github.com/grafana/loki/v3/pkg/xcap"
)
type vectorAggregationOptions struct {
grouping physical.Grouping
operation types.VectorAggregationType
maxQuerySeries int // maximum number of unique series allowed (0 means no limit)
}
// vectorAggregationPipeline is a pipeline that performs vector aggregations.
//
// It reads from the input pipeline, groups the data by specified columns,
// and applies the aggregation function on each group.
type vectorAggregationPipeline struct {
inputs []Pipeline
inputsExhausted bool // indicates if all inputs are exhausted
aggregator *aggregator
evaluator *expressionEvaluator
grouping physical.Grouping
opts vectorAggregationOptions
tsEval evalFunc // used to evaluate the timestamp column
valueEval evalFunc // used to evaluate the value column
identCache *semconv.IdentifierCache
}
var (
vectorAggregationOperations = map[types.VectorAggregationType]aggregationOperation{
types.VectorAggregationTypeSum: aggregationOperationSum,
types.VectorAggregationTypeCount: aggregationOperationCount,
types.VectorAggregationTypeAvg: aggregationOperationAvg,
types.VectorAggregationTypeMax: aggregationOperationMax,
types.VectorAggregationTypeMin: aggregationOperationMin,
}
)
func newVectorAggregationPipeline(inputs []Pipeline, evaluator *expressionEvaluator, opts vectorAggregationOptions) (*vectorAggregationPipeline, error) {
if len(inputs) == 0 {
return nil, fmt.Errorf("vector aggregation expects at least one input")
}
op, ok := vectorAggregationOperations[opts.operation]
if !ok {
panic(fmt.Sprintf("unknown vector aggregation operation: %v", opts.operation))
}
agg := newAggregator(0, op)
agg.SetMaxSeries(opts.maxQuerySeries)
return &vectorAggregationPipeline{
inputs: inputs,
evaluator: evaluator,
grouping: opts.grouping,
opts: opts,
aggregator: agg,
tsEval: evaluator.newFunc(&physical.ColumnExpr{
Ref: types.ColumnRef{
Column: types.ColumnNameBuiltinTimestamp,
Type: types.ColumnTypeBuiltin,
},
}),
valueEval: evaluator.newFunc(&physical.ColumnExpr{
Ref: types.ColumnRef{
Column: types.ColumnNameGeneratedValue,
Type: types.ColumnTypeGenerated,
},
}),
identCache: semconv.NewIdentifierCache(),
}, nil
}
// Open opens all input pipelines.
func (v *vectorAggregationPipeline) Open(ctx context.Context) error {
return openInputsConcurrently(ctx, v.inputs)
}
// Read reads the next value into its state.
func (v *vectorAggregationPipeline) Read(ctx context.Context) (arrow.RecordBatch, error) {
if v.inputsExhausted {
return nil, EOF
}
rec, err := v.read(ctx)
assertions.CheckColumnDuplicates(rec)
assertions.CheckLabelValuesDuplicates(rec)
return rec, err
}
func (v *vectorAggregationPipeline) read(ctx context.Context) (arrow.RecordBatch, error) {
var (
inputReadTime time.Duration
startedAt = time.Now()
labelValuesCache = newLabelValuesCache()
fieldsCache = newFieldsCache()
)
v.aggregator.Reset() // reset before reading new inputs
inputsExhausted := false
for !inputsExhausted {
inputsExhausted = true
for _, input := range v.inputs {
inputStart := time.Now()
record, err := input.Read(ctx)
inputReadTime += time.Since(inputStart)
if err != nil {
if errors.Is(err, EOF) {
continue
}
return nil, err
}
inputsExhausted = false
if record.NumRows() == 0 {
// Nothing to process
continue
}
assertions.CheckLabelValuesDuplicates(record)
// extract timestamp column
tsVec, err := v.tsEval(record)
if err != nil {
return nil, err
}
tsCol := tsVec.(*array.Timestamp)
// extract value column
valueVec, err := v.valueEval(record)
if err != nil {
return nil, err
}
valueArr := valueVec.(*array.Float64)
arrays, groupingFields, err := collectGroupingColumns(record, v.grouping, v.evaluator, v.identCache)
if err != nil {
return nil, err
}
v.aggregator.AddLabels(groupingFields)
for row := range int(record.NumRows()) {
if valueArr.IsNull(row) {
continue
}
labelValues := labelValuesCache.getLabelValues(arrays, row)
labels := fieldsCache.getFields(arrays, groupingFields, row)
if err := v.aggregator.Add(tsCol.Value(row).ToTime(arrow.Nanosecond), valueArr.Value(row), labels, labelValues); err != nil {
return nil, err
}
}
}
}
v.inputsExhausted = true
rec, err := v.aggregator.BuildRecord()
if region := xcap.RegionFromContext(ctx); region != nil {
computeTime := time.Since(startedAt) - inputReadTime
region.Record(xcap.StatPipelineExecDuration.Observe(computeTime.Seconds()))
}
return rec, err
}
// Close closes the resources of the pipeline.
func (v *vectorAggregationPipeline) Close() {
v.aggregator.Reset()
for _, input := range v.inputs {
input.Close()
}
}