Like Prometheus, but for logs.
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 
 
 
loki/pkg/engine/internal/executor/range_aggregation_bench_tes...

190 lines
4.9 KiB

package executor
import (
"context"
"errors"
"testing"
"time"
"github.com/apache/arrow-go/v18/arrow"
"github.com/apache/arrow-go/v18/arrow/memory"
"github.com/grafana/loki/v3/pkg/engine/internal/assertions"
"github.com/grafana/loki/v3/pkg/engine/internal/planner/physical"
"github.com/grafana/loki/v3/pkg/engine/internal/semconv"
"github.com/grafana/loki/v3/pkg/engine/internal/types"
"github.com/grafana/loki/v3/pkg/util/arrowtest"
)
// BenchmarkRangeAggregationPipeline measures pipeline.Read for each window strategy.
// The pipeline and input batches are built once per subbenchmark; each iteration only
// resets cursors and aggregator state so Read can run again.
func BenchmarkRangeAggregationPipeline(b *testing.B) {
old := assertions.Enabled
assertions.Enabled = false
b.Cleanup(func() { assertions.Enabled = old })
groupBy := buildRangeAggregationGrouping()
schema, rows := buildRangeAggregationInput()
inputRecords := buildInputRecords(b, schema, rows)
b.Cleanup(func() {
for _, rec := range inputRecords {
rec.Release()
}
})
cases := []struct {
name string
opts rangeAggregationOptions
}{
{
name: "case=instant",
opts: rangeAggregationOptions{
grouping: groupBy,
startTs: time.Unix(1000, 0),
endTs: time.Unix(1000, 0),
rangeInterval: 1000 * time.Second,
step: 0,
operation: types.RangeAggregationTypeCount,
},
},
{
name: "case=aligned",
opts: rangeAggregationOptions{
grouping: groupBy,
startTs: time.Unix(10, 0),
endTs: time.Unix(40, 0),
rangeInterval: 10 * time.Second,
step: 10 * time.Second,
operation: types.RangeAggregationTypeCount,
},
},
{
name: "case=gapped",
opts: rangeAggregationOptions{
grouping: groupBy,
startTs: time.Unix(10, 0),
endTs: time.Unix(40, 0),
rangeInterval: 5 * time.Second,
step: 10 * time.Second,
operation: types.RangeAggregationTypeCount,
},
},
{
name: "case=overlapping",
opts: rangeAggregationOptions{
grouping: groupBy,
startTs: time.Unix(10, 0),
endTs: time.Unix(40, 0),
rangeInterval: 5 * time.Minute,
step: 10 * time.Second,
operation: types.RangeAggregationTypeCount,
},
},
}
ctx := context.Background()
evaluator := newExpressionEvaluator()
for _, tc := range cases {
b.Run(tc.name, func(b *testing.B) {
input := NewBufferedPipeline(inputRecords...)
pipeline, err := newRangeAggregationPipeline([]Pipeline{input}, evaluator, tc.opts)
if err != nil {
b.Fatal(err)
}
if err := pipeline.Open(ctx); err != nil {
b.Fatal(err)
}
b.ReportAllocs()
b.ResetTimer()
for i := 0; i < b.N; i++ {
// Read all the records each iteration
for {
rec, err := pipeline.Read(ctx)
if err != nil {
if errors.Is(err, EOF) {
resetRangeAggregationPipeline(pipeline, input)
break
}
b.Fatal(err)
}
if rec != nil {
rec.Release()
}
}
}
})
}
}
// resetRangeAggregationPipeline rewinds a range aggregation pipeline so Read can be
// invoked again with the same inputs. rangeAggregationPipeline is single-shot by
// default (inputsExhausted); this is intended for benchmarks and tests only.
func resetRangeAggregationPipeline(p *rangeAggregationPipeline, input *BufferedPipeline) {
p.inputsExhausted = false
p.aggregator.Reset()
input.Reset()
}
func buildInputRecords(b *testing.B, schema *arrow.Schema, rows []arrowtest.Rows) []arrow.RecordBatch {
b.Helper()
records := make([]arrow.RecordBatch, len(rows))
for i, r := range rows {
records[i] = r.Record(memory.DefaultAllocator, schema)
}
return records
}
func buildRangeAggregationGrouping() physical.Grouping {
return physical.Grouping{
Columns: []physical.ColumnExpression{
&physical.ColumnExpr{
Ref: types.ColumnRef{
Column: "env",
Type: types.ColumnTypeAmbiguous,
},
},
&physical.ColumnExpr{
Ref: types.ColumnRef{
Column: "service",
Type: types.ColumnTypeAmbiguous,
},
},
},
Without: false,
}
}
func buildRangeAggregationInput() (*arrow.Schema, []arrowtest.Rows) {
fields := []arrow.Field{
semconv.FieldFromFQN(colTs, false),
semconv.FieldFromFQN(colEnv, false),
semconv.FieldFromFQN(colSvc, false),
}
schema := arrow.NewSchema(fields, nil)
const (
rowsPerBatch = 1024
batches = 8
)
rows := make([]arrowtest.Rows, batches)
base := time.Unix(12, 0).UTC()
for batch := range batches {
batchRows := make(arrowtest.Rows, rowsPerBatch)
for i := range rowsPerBatch {
offset := batch*rowsPerBatch + i
batchRows[i] = arrowtest.Row{
colTs: base.Add(time.Duration(offset) * time.Second),
colEnv: []string{"prod", "dev", "staging"}[offset%3],
colSvc: []string{"app1", "app2", "app3", "app4"}[offset%4],
}
}
rows[batch] = batchRows
}
return schema, rows
}