package executor import ( "slices" "testing" "time" "github.com/apache/arrow-go/v18/arrow" "github.com/stretchr/testify/require" "github.com/grafana/loki/v3/pkg/engine/internal/assertions" "github.com/grafana/loki/v3/pkg/engine/internal/planner/physical" "github.com/grafana/loki/v3/pkg/engine/internal/semconv" "github.com/grafana/loki/v3/pkg/engine/internal/types" "github.com/grafana/loki/v3/pkg/util/arrowtest" ) const arrowTimestampFormat = "2006-01-02T15:04:05.000000000Z" var ( colTs = "timestamp_ns.builtin.timestamp" colEnv = "utf8.label.env" colSvc = "utf8.label.service" colLvl = "utf8.metadata.severity" colVal = "float64.generated.value" ) func init() { assertions.Enabled = true } func TestRangeAggregationPipeline_instant(t *testing.T) { // input schema with timestamp, partition-by columns and non-partition columns fields := []arrow.Field{ semconv.FieldFromFQN(colTs, false), semconv.FieldFromFQN(colEnv, false), semconv.FieldFromFQN(colSvc, false), semconv.FieldFromFQN(colLvl, true), } schema := arrow.NewSchema(fields, nil) rowsPipelineA := []arrowtest.Rows{ { {colTs: time.Unix(20, 0).UTC(), colEnv: "prod", colSvc: "app1", colLvl: "error"}, // included {colTs: time.Unix(15, 0).UTC(), colEnv: "prod", colSvc: "app1", colLvl: "info"}, {colTs: time.Unix(10, 0).UTC(), colEnv: "prod", colSvc: "app1", colLvl: "error"}, // excluded, open interval {colTs: time.Unix(12, 0).UTC(), colEnv: "prod", colSvc: "app2", colLvl: "error"}, {colTs: time.Unix(12, 0).UTC(), colEnv: "dev", colSvc: "", colLvl: "error"}, }, } rowsPipelineB := []arrowtest.Rows{ { {colTs: time.Unix(15, 0).UTC(), colEnv: "prod", colSvc: "app2", colLvl: "info"}, {colTs: time.Unix(12, 0).UTC(), colEnv: "prod", colSvc: "app2", colLvl: "error"}, }, { {colTs: time.Unix(15, 0).UTC(), colEnv: "prod", colSvc: "app3", colLvl: "info"}, {colTs: time.Unix(12, 0).UTC(), colEnv: "prod", colSvc: "app3", colLvl: "error"}, {colTs: time.Unix(5, 0).UTC(), colEnv: "dev", colSvc: "app2", colLvl: "error"}, // excluded, out of range }, } opts := rangeAggregationOptions{ grouping: physical.Grouping{ Columns: []physical.ColumnExpression{ &physical.ColumnExpr{ Ref: types.ColumnRef{ Column: "env", Type: types.ColumnTypeAmbiguous, }, }, &physical.ColumnExpr{ Ref: types.ColumnRef{ Column: "service", Type: types.ColumnTypeAmbiguous, }, }, }, Without: false, }, startTs: time.Unix(20, 0).UTC(), endTs: time.Unix(20, 0).UTC(), rangeInterval: 10 * time.Second, operation: types.RangeAggregationTypeCount, } inputA := NewArrowtestPipeline(schema, rowsPipelineA...) inputB := NewArrowtestPipeline(schema, rowsPipelineB...) pipeline, err := newRangeAggregationPipeline([]Pipeline{inputA, inputB}, newExpressionEvaluator(), opts) require.NoError(t, err) defer pipeline.Close() // Read the pipeline output record, err := pipeline.Read(t.Context()) require.NoError(t, err) expect := arrowtest.Rows{ {colTs: time.Unix(20, 0).UTC(), colVal: float64(2), "utf8.ambiguous.env": "prod", "utf8.ambiguous.service": "app1"}, {colTs: time.Unix(20, 0).UTC(), colVal: float64(3), "utf8.ambiguous.env": "prod", "utf8.ambiguous.service": "app2"}, {colTs: time.Unix(20, 0).UTC(), colVal: float64(2), "utf8.ambiguous.env": "prod", "utf8.ambiguous.service": "app3"}, // Empty service label must be preserved in the aggregation result, not dropped or turned into NULL. // Pipeline stages like `| json` can produce parsed labels with empty values. {colTs: time.Unix(20, 0).UTC(), colVal: float64(1), "utf8.ambiguous.env": "dev", "utf8.ambiguous.service": ""}, } rows, err := arrowtest.RecordRows(record) require.NoError(t, err, "should be able to convert record back to rows") require.Equal(t, len(expect), len(rows), "number of rows should match") require.ElementsMatch(t, expect, rows) } func TestRangeAggregationPipeline(t *testing.T) { // Test RangeAggregationPipeline for range queries (step > 0). // 1. Overlapping windows (range > step) - data points can appear in multiple windows // 2. Aligned windows (step = range) - each data point appears in exactly one window // 3. Non-overlapping windows (step > range) - gaps between windows var ( fields = []arrow.Field{ semconv.FieldFromFQN(colTs, false), semconv.FieldFromFQN(colEnv, false), semconv.FieldFromFQN(colSvc, false), semconv.FieldFromFQN(colLvl, true), } schema = arrow.NewSchema(fields, nil) // two records from pipeline A and one from pipeline B rowsPipelineA = []arrowtest.Rows{ { // time.Unix(0, 0) is not part of any window, it falls on the open interval of the first window {colTs: time.Unix(0, 0).UTC(), colEnv: "prod", colSvc: "app1", colLvl: "info"}, {colTs: time.Unix(2, 0).UTC(), colEnv: "prod", colSvc: "app1", colLvl: "warn"}, {colTs: time.Unix(4, 0).UTC(), colEnv: "prod", colSvc: "app1", colLvl: "info"}, {colTs: time.Unix(5, 0).UTC(), colEnv: "prod", colSvc: "app2", colLvl: "error"}, }, { {colTs: time.Unix(6, 0).UTC(), colEnv: "dev", colSvc: "app1", colLvl: "info"}, {colTs: time.Unix(8, 0).UTC(), colEnv: "prod", colSvc: "app1", colLvl: "error"}, {colTs: time.Unix(10, 0).UTC(), colEnv: "prod", colSvc: "app2", colLvl: "info"}, {colTs: time.Unix(12, 0).UTC(), colEnv: "prod", colSvc: "app1", colLvl: "info"}, {colTs: time.Unix(15, 0).UTC(), colEnv: "prod", colSvc: "app2", colLvl: "error"}, }, } rowsPiplelineB = []arrowtest.Rows{{ {colTs: time.Unix(20, 0).UTC(), colEnv: "dev", colSvc: "app1", colLvl: "info"}, {colTs: time.Unix(25, 0).UTC(), colEnv: "dev", colSvc: "app2", colLvl: "error"}, {colTs: time.Unix(28, 0).UTC(), colEnv: "dev", colSvc: "app1", colLvl: "info"}, {colTs: time.Unix(30, 0).UTC(), colEnv: "dev", colSvc: "app2", colLvl: "info"}, }} ) groupBy := []physical.ColumnExpression{ &physical.ColumnExpr{ Ref: types.ColumnRef{ Column: "env", Type: types.ColumnTypeAmbiguous, }, }, &physical.ColumnExpr{ Ref: types.ColumnRef{ Column: "service", Type: types.ColumnTypeAmbiguous, }, }, } t.Run("aligned windows", func(t *testing.T) { opts := rangeAggregationOptions{ grouping: physical.Grouping{ Columns: groupBy, Without: false, }, startTs: time.Unix(10, 0), endTs: time.Unix(40, 0), rangeInterval: 10 * time.Second, step: 10 * time.Second, operation: types.RangeAggregationTypeCount, } inputA := NewArrowtestPipeline(schema, rowsPipelineA...) inputB := NewArrowtestPipeline(schema, rowsPiplelineB...) pipeline, err := newRangeAggregationPipeline([]Pipeline{inputA, inputB}, newExpressionEvaluator(), opts) require.NoError(t, err) defer pipeline.Close() record, err := pipeline.Read(t.Context()) require.NoError(t, err) expect := arrowtest.Rows{ // time.Unix(10, 0) {colTs: time.Unix(10, 0).UTC(), "utf8.ambiguous.env": "prod", "utf8.ambiguous.service": "app1", colVal: float64(3)}, {colTs: time.Unix(10, 0).UTC(), "utf8.ambiguous.env": "prod", "utf8.ambiguous.service": "app2", colVal: float64(2)}, {colTs: time.Unix(10, 0).UTC(), "utf8.ambiguous.env": "dev", "utf8.ambiguous.service": "app1", colVal: float64(1)}, // time.Unix(20, 0) {colTs: time.Unix(20, 0).UTC(), "utf8.ambiguous.env": "prod", "utf8.ambiguous.service": "app1", colVal: float64(1)}, {colTs: time.Unix(20, 0).UTC(), "utf8.ambiguous.env": "prod", "utf8.ambiguous.service": "app2", colVal: float64(1)}, {colTs: time.Unix(20, 0).UTC(), "utf8.ambiguous.env": "dev", "utf8.ambiguous.service": "app1", colVal: float64(1)}, // time.Unix(30, 0) {colTs: time.Unix(30, 0).UTC(), "utf8.ambiguous.env": "dev", "utf8.ambiguous.service": "app2", colVal: float64(2)}, {colTs: time.Unix(30, 0).UTC(), "utf8.ambiguous.env": "dev", "utf8.ambiguous.service": "app1", colVal: float64(1)}, } rows, err := arrowtest.RecordRows(record) require.NoError(t, err, "should be able to convert record back to rows") require.Equal(t, len(expect), len(rows), "number of rows should match") // rows are expected to be sorted by timestamp. // for a given timestamp, no ordering is enforced based on labels. require.True(t, slices.IsSortedFunc(rows, func(a, b arrowtest.Row) int { return a[colTs].(time.Time).Compare(b[colTs].(time.Time)) })) require.ElementsMatch(t, expect, rows) }) t.Run("overlapping windows", func(t *testing.T) { opts := rangeAggregationOptions{ grouping: physical.Grouping{ Columns: groupBy, Without: false, }, startTs: time.Unix(10, 0), endTs: time.Unix(40, 0), rangeInterval: 10 * time.Second, step: 5 * time.Second, operation: types.RangeAggregationTypeCount, } inputA := NewArrowtestPipeline(schema, rowsPipelineA...) inputB := NewArrowtestPipeline(schema, rowsPiplelineB...) pipeline, err := newRangeAggregationPipeline([]Pipeline{inputA, inputB}, newExpressionEvaluator(), opts) require.NoError(t, err) defer pipeline.Close() record, err := pipeline.Read(t.Context()) require.NoError(t, err) expect := arrowtest.Rows{ // time.Unix(10, 0) {colTs: time.Unix(10, 0).UTC(), "utf8.ambiguous.env": "prod", "utf8.ambiguous.service": "app1", colVal: float64(3)}, {colTs: time.Unix(10, 0).UTC(), "utf8.ambiguous.env": "prod", "utf8.ambiguous.service": "app2", colVal: float64(2)}, {colTs: time.Unix(10, 0).UTC(), "utf8.ambiguous.env": "dev", "utf8.ambiguous.service": "app1", colVal: float64(1)}, // time.Unix(15, 0) {colTs: time.Unix(15, 0).UTC(), "utf8.ambiguous.env": "prod", "utf8.ambiguous.service": "app2", colVal: float64(2)}, {colTs: time.Unix(15, 0).UTC(), "utf8.ambiguous.env": "prod", "utf8.ambiguous.service": "app1", colVal: float64(2)}, {colTs: time.Unix(15, 0).UTC(), "utf8.ambiguous.env": "dev", "utf8.ambiguous.service": "app1", colVal: float64(1)}, // time.Unix(20, 0) {colTs: time.Unix(20, 0).UTC(), "utf8.ambiguous.env": "prod", "utf8.ambiguous.service": "app1", colVal: float64(1)}, {colTs: time.Unix(20, 0).UTC(), "utf8.ambiguous.env": "prod", "utf8.ambiguous.service": "app2", colVal: float64(1)}, {colTs: time.Unix(20, 0).UTC(), "utf8.ambiguous.env": "dev", "utf8.ambiguous.service": "app1", colVal: float64(1)}, // time.Unix(25, 0) {colTs: time.Unix(25, 0).UTC(), "utf8.ambiguous.env": "dev", "utf8.ambiguous.service": "app1", colVal: float64(1)}, {colTs: time.Unix(25, 0).UTC(), "utf8.ambiguous.env": "dev", "utf8.ambiguous.service": "app2", colVal: float64(1)}, // time.Unix(30, 0) {colTs: time.Unix(30, 0).UTC(), "utf8.ambiguous.env": "dev", "utf8.ambiguous.service": "app2", colVal: float64(2)}, {colTs: time.Unix(30, 0).UTC(), "utf8.ambiguous.env": "dev", "utf8.ambiguous.service": "app1", colVal: float64(1)}, // time.Unix(35, 0) {colTs: time.Unix(35, 0).UTC(), "utf8.ambiguous.env": "dev", "utf8.ambiguous.service": "app2", colVal: float64(1)}, {colTs: time.Unix(35, 0).UTC(), "utf8.ambiguous.env": "dev", "utf8.ambiguous.service": "app1", colVal: float64(1)}, } rows, err := arrowtest.RecordRows(record) require.NoError(t, err, "should be able to convert record back to rows") require.Equal(t, len(expect), len(rows), "number of rows should match") // rows are expected to be sorted by timestamp. // for a given timestamp, no ordering is enforced based on labels. require.True(t, slices.IsSortedFunc(rows, func(a, b arrowtest.Row) int { t.Log(a, b) return a[colTs].(time.Time).Compare(b[colTs].(time.Time)) })) require.ElementsMatch(t, expect, rows) }) t.Run("non-overlapping windows", func(t *testing.T) { opts := rangeAggregationOptions{ grouping: physical.Grouping{ Columns: groupBy, Without: false, }, startTs: time.Unix(10, 0), endTs: time.Unix(40, 0), rangeInterval: 5 * time.Second, step: 10 * time.Second, operation: types.RangeAggregationTypeCount, } inputA := NewArrowtestPipeline(schema, rowsPipelineA...) inputB := NewArrowtestPipeline(schema, rowsPiplelineB...) pipeline, err := newRangeAggregationPipeline([]Pipeline{inputA, inputB}, newExpressionEvaluator(), opts) require.NoError(t, err) defer pipeline.Close() record, err := pipeline.Read(t.Context()) require.NoError(t, err) expect := arrowtest.Rows{ // time.Unix(10, 0) {colTs: time.Unix(10, 0).UTC(), "utf8.ambiguous.env": "prod", "utf8.ambiguous.service": "app1", colVal: float64(1)}, {colTs: time.Unix(10, 0).UTC(), "utf8.ambiguous.env": "prod", "utf8.ambiguous.service": "app2", colVal: float64(1)}, {colTs: time.Unix(10, 0).UTC(), "utf8.ambiguous.env": "dev", "utf8.ambiguous.service": "app1", colVal: float64(1)}, // time.Unix(20, 0) {colTs: time.Unix(20, 0).UTC(), "utf8.ambiguous.env": "dev", "utf8.ambiguous.service": "app1", colVal: float64(1)}, // time.Unix(30, 0) {colTs: time.Unix(30, 0).UTC(), "utf8.ambiguous.env": "dev", "utf8.ambiguous.service": "app2", colVal: float64(1)}, {colTs: time.Unix(30, 0).UTC(), "utf8.ambiguous.env": "dev", "utf8.ambiguous.service": "app1", colVal: float64(1)}, } rows, err := arrowtest.RecordRows(record) require.NoError(t, err, "should be able to convert record back to rows") require.Equal(t, len(expect), len(rows), "number of rows should match") // rows are expected to be sorted by timestamp. // for a given timestamp, no ordering is enforced based on labels. require.True(t, slices.IsSortedFunc(rows, func(a, b arrowtest.Row) int { return a[colTs].(time.Time).Compare(b[colTs].(time.Time)) })) require.ElementsMatch(t, expect, rows) }) } func TestMatcher(t *testing.T) { t.Run("exactMatcher", func(t *testing.T) { opts := rangeAggregationOptions{ startTs: time.Unix(1000, 0), endTs: time.Unix(1000, 0), rangeInterval: 1000 * time.Second, // covers time range from 0 - 1000 step: 0, // instant query operation: types.RangeAggregationTypeCount, } // Create a single window for instant query windows := []window{ {start: time.Unix(0, 0), end: time.Unix(1000, 0)}, } f := newMatcherFactoryFromOpts(opts) matcher := f.createExactMatcher(windows) tests := []struct { name string timestamp time.Time expected []window }{ { name: "timestamp exactly at lowerbound (exclusive boundary)", timestamp: f.bounds.start, expected: nil, // should return nil as lowerbound is exclusive }, { name: "timestamp greater than upperbound", timestamp: f.bounds.end.Add(1 * time.Nanosecond), expected: nil, // should return nil as lowerbound is exclusive }, { name: "timestamp exactly at upperbound (inclusive boundary)", timestamp: f.bounds.end, expected: []window{windows[0]}, // should return window as upperbound is inclusive }, { name: "timestamp just after lowerbound", timestamp: f.bounds.start.Add(1 * time.Nanosecond), expected: []window{windows[0]}, }, { name: "timestamp just before upperbound", timestamp: f.bounds.end.Add(-1 * time.Nanosecond), expected: []window{windows[0]}, }, { name: "timestamp within range", timestamp: time.Unix(500, 0), expected: []window{windows[0]}, }, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { result := matcher(tt.timestamp) if tt.expected == nil { require.Nil(t, result, "timestamp %v should not match any window", tt.timestamp) } else { requireEqualWindows(t, tt.expected, result) } }) } t.Run("empty windows slice", func(t *testing.T) { windows := []window{} matcher := f.createExactMatcher(windows) // Should return nil for any timestamp when windows is empty result := matcher(time.Unix(998, 0)) require.Nil(t, result) }) t.Run("multiple windows (should return first)", func(t *testing.T) { windows := []window{ {start: f.bounds.start, end: f.start}, {start: f.bounds.start.Add(100 * time.Second), end: f.start.Add(100 * time.Second)}, {start: f.bounds.start.Add(200 * time.Second), end: f.start.Add(200 * time.Second)}, } matcher := f.createExactMatcher(windows) // Should always return the first window for valid timestamps result := matcher(time.Unix(998, 0)) require.Equal(t, []window{windows[0]}, result) }) }) t.Run("alignedMatcher", func(t *testing.T) { opts := rangeAggregationOptions{ startTs: time.Unix(100, 0), endTs: time.Unix(300, 0), rangeInterval: 100 * time.Second, step: 100 * time.Second, // step == rangeInterval operation: types.RangeAggregationTypeCount, } // Create windows that align with lower/upper bounds and step windows := []window{ {start: time.Unix(0, 0), end: time.Unix(100, 0)}, {start: time.Unix(100, 0), end: time.Unix(200, 0)}, {start: time.Unix(200, 0), end: time.Unix(300, 0)}, } f := newMatcherFactoryFromOpts(opts) matcher := f.createAlignedMatcher(windows) tests := []struct { name string timestamp time.Time expected []window }{ { name: "timestamp exactly at lowerbound (exclusive boundary)", timestamp: f.bounds.start, expected: nil, // should return nil as lowerbound is exclusive }, { name: "timestamp greater than upperbound", timestamp: f.bounds.end.Add(1 * time.Nanosecond), expected: nil, // should return nil as lowerbound is exclusive }, { name: "timestamp exactly at upperbound (inclusive boundary)", timestamp: f.bounds.end, expected: []window{windows[2]}, // should return window as upperbound is inclusive }, { name: "timestamp just after lowerbound", timestamp: f.bounds.start.Add(1 * time.Nanosecond), expected: []window{windows[0]}, }, { name: "timestamp just before upperbound", timestamp: f.bounds.end.Add(-1 * time.Nanosecond), expected: []window{windows[2]}, }, { name: "timestamp within range of window 1 (100-200]", timestamp: time.Unix(150, 0), expected: []window{windows[1]}, }, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { result := matcher(tt.timestamp) if tt.expected == nil { require.Nil(t, result, "timestamp %v should not match any window", tt.timestamp) } else { requireEqualWindows(t, tt.expected, result) } }) } }) t.Run("gappedMatcher", func(t *testing.T) { opts := rangeAggregationOptions{ startTs: time.Unix(100, 0), endTs: time.Unix(360, 0), rangeInterval: 80 * time.Second, step: 100 * time.Second, // step > rangeInterval operation: types.RangeAggregationTypeCount, } // Create windows that align with lower bound and step, but not upper bound windows := []window{ {start: time.Unix(20, 0), end: time.Unix(100, 0)}, {start: time.Unix(120, 0), end: time.Unix(200, 0)}, {start: time.Unix(220, 0), end: time.Unix(300, 0)}, } f := newMatcherFactoryFromOpts(opts) matcher := f.createGappedMatcher(windows) tests := []struct { name string timestamp time.Time expected []window }{ { name: "timestamp exactly at lowerbound (exclusive boundary)", timestamp: f.bounds.start, expected: nil, // should return nil as lowerbound is exclusive }, { name: "timestamp greater than upperbound", timestamp: f.bounds.end.Add(1 * time.Nanosecond), expected: nil, // should return nil as lowerbound is exclusive }, { name: "timestamp in gap", timestamp: time.Unix(110, 0), expected: nil, // should return nil as the timestamp is in the "gap" between windows[0].end and windows[1].start }, { name: "timestamp in gap at exactly start of window 1", timestamp: time.Unix(120, 0), expected: nil, // lower bound is exclusive }, { name: "timestamp outside of the last window but within bounds", timestamp: time.Unix(320, 0), expected: nil, }, { name: "timestamp exactly at end of window 0", timestamp: time.Unix(100, 0), expected: []window{windows[0]}, // should return window as upperbound is inclusive }, { name: "timestamp exactly at end of window 1", timestamp: time.Unix(200, 0), expected: []window{windows[1]}, // should return window as upperbound is inclusive }, { name: "timestamp exactly at end of window 2", timestamp: time.Unix(300, 0), expected: []window{windows[2]}, // should return window as upperbound is inclusive }, { name: "timestamp just before upperbound", timestamp: f.bounds.end.Add(-1 * time.Nanosecond), expected: nil, }, { name: "timestamp just before the last window end", timestamp: time.Unix(300, 0).Add(-1 * time.Nanosecond), expected: []window{windows[2]}, }, { name: "timestamp within range of window 1 (100-200]", timestamp: time.Unix(150, 0), expected: []window{windows[1]}, }, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { result := matcher(tt.timestamp) if tt.expected == nil { require.Nil(t, result, "timestamp %v should not match any window", tt.timestamp) } else { requireEqualWindows(t, tt.expected, result) } }) } }) t.Run("overlappingMatcher", func(t *testing.T) { opts := rangeAggregationOptions{ startTs: time.Unix(100, 0), endTs: time.Unix(300, 0), rangeInterval: 120 * time.Second, step: 100 * time.Second, // step < rangeInterval operation: types.RangeAggregationTypeCount, } // Create windows that align with lower/upper bounds and step windows := []window{ {start: time.Unix(-20, 0), end: time.Unix(100, 0)}, {start: time.Unix(80, 0), end: time.Unix(200, 0)}, {start: time.Unix(180, 0), end: time.Unix(300, 0)}, } f := newMatcherFactoryFromOpts(opts) matcher := f.createOverlappingMatcher(windows) tests := []struct { name string timestamp time.Time expected []window }{ { name: "timestamp exactly at lowerbound (exclusive boundary)", timestamp: f.bounds.start, expected: nil, // should return nil as lowerbound is exclusive }, { name: "timestamp exactly at upperbound (inclusive boundary)", timestamp: f.bounds.end, expected: []window{windows[2]}, // should return window as upperbound is inclusive }, { name: "timestamp exactly at start of window 1", timestamp: time.Unix(80, 0), expected: []window{windows[0]}, }, { name: "timestamp exactly at overlap of window 0 and window 1", timestamp: time.Unix(90, 0), expected: []window{windows[0], windows[1]}, }, { name: "timestamp exactly at end of window 0", timestamp: time.Unix(100, 0), expected: []window{windows[0], windows[1]}, }, { name: "timestamp just before upperbound", timestamp: f.bounds.end.Add(-1 * time.Nanosecond), expected: []window{windows[2]}, }, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { result := matcher(tt.timestamp) if tt.expected == nil { require.Nil(t, result, "timestamp %v should not match any window", tt.timestamp) } else { requireEqualWindows(t, tt.expected, result) } }) } }) } // TestRangeAggregationPipeline_EmptyLabelValues is a regression test: empty parsed label values must be // preserved in the aggregation output so the downstream result builder can include them in the metric // label set (matching classic Loki engine behaviour). func TestRangeAggregationPipeline_EmptyLabelValues(t *testing.T) { fields := []arrow.Field{ semconv.FieldFromFQN(colTs, false), semconv.FieldFromFQN(colEnv, false), semconv.FieldFromFQN(colSvc, false), } schema := arrow.NewSchema(fields, nil) rows := []arrowtest.Rows{ { {colTs: time.Unix(20, 0).UTC(), colEnv: "prod", colSvc: "app1"}, {colTs: time.Unix(15, 0).UTC(), colEnv: "prod", colSvc: "app1"}, // svc="" — a parsed label with empty value; must be preserved, not collapsed with absent svc. {colTs: time.Unix(18, 0).UTC(), colEnv: "dev", colSvc: ""}, }, } opts := rangeAggregationOptions{ grouping: physical.Grouping{ Columns: []physical.ColumnExpression{ &physical.ColumnExpr{Ref: types.ColumnRef{Column: "env", Type: types.ColumnTypeAmbiguous}}, &physical.ColumnExpr{Ref: types.ColumnRef{Column: "service", Type: types.ColumnTypeAmbiguous}}, }, }, startTs: time.Unix(20, 0).UTC(), endTs: time.Unix(20, 0).UTC(), rangeInterval: 10 * time.Second, operation: types.RangeAggregationTypeCount, } input := NewArrowtestPipeline(schema, rows...) pipeline, err := newRangeAggregationPipeline([]Pipeline{input}, newExpressionEvaluator(), opts) require.NoError(t, err) defer pipeline.Close() record, err := pipeline.Read(t.Context()) require.NoError(t, err) result, err := arrowtest.RecordRows(record) require.NoError(t, err) expect := arrowtest.Rows{ {colTs: time.Unix(20, 0).UTC(), colVal: float64(2), "utf8.ambiguous.env": "prod", "utf8.ambiguous.service": "app1"}, // svc="" must appear as empty string in the output row, not nil (NULL). {colTs: time.Unix(20, 0).UTC(), colVal: float64(1), "utf8.ambiguous.env": "dev", "utf8.ambiguous.service": ""}, } require.Equal(t, len(expect), len(result)) require.ElementsMatch(t, expect, result) } // TestRangeAggregationPipeline_MissingGroupingColumn verifies that a "by" grouping column // absent from the Arrow schema is eliminated from the series. // // Before the fix, an absent column fell back to a scalar "" (non-null), which was included // in the aggregation label set with empty value causing a mismatch in response compared to chunks engine. func TestRangeAggregationPipeline_MissingGroupingColumn(t *testing.T) { colDetectedLevel := "utf8.metadata.detected_level" colLevel := "utf8.label.level" // schemaWithLevel simulates a data object whose Arrow schema includes a "level" index // label (because other streams stored in the same object use it). For the stream under // test, level is absent for every row, so all values are null. schemaWithLevel := arrow.NewSchema([]arrow.Field{ semconv.FieldFromFQN(colTs, false), semconv.FieldFromFQN(colDetectedLevel, true), semconv.FieldFromFQN(colLevel, true), }, nil) // schemaWithoutLevel simulates a data object that has no "level" column at all // (none of the streams stored there carry that label). schemaWithoutLevel := arrow.NewSchema([]arrow.Field{ semconv.FieldFromFQN(colTs, false), semconv.FieldFromFQN(colDetectedLevel, true), }, nil) rowsWithLevel := arrowtest.Rows{ // level is null — the schema has the column, but this stream never sets it. {colTs: time.Unix(20, 0).UTC(), colDetectedLevel: "info", colLevel: nil}, {colTs: time.Unix(18, 0).UTC(), colDetectedLevel: "info", colLevel: nil}, } rowsWithoutLevel := arrowtest.Rows{ // level column is absent entirely from this batch's schema. {colTs: time.Unix(20, 0).UTC(), colDetectedLevel: "info"}, {colTs: time.Unix(15, 0).UTC(), colDetectedLevel: "info"}, } opts := rangeAggregationOptions{ grouping: physical.Grouping{ Columns: []physical.ColumnExpression{ // "by (level, detected_level)" — both referenced as ambiguous because the // LogQL planner does not know the column type at planning time. &physical.ColumnExpr{Ref: types.ColumnRef{Column: "level", Type: types.ColumnTypeAmbiguous}}, &physical.ColumnExpr{Ref: types.ColumnRef{Column: "detected_level", Type: types.ColumnTypeAmbiguous}}, }, }, startTs: time.Unix(20, 0).UTC(), endTs: time.Unix(20, 0).UTC(), rangeInterval: 10 * time.Second, operation: types.RangeAggregationTypeCount, } inputA := NewArrowtestPipeline(schemaWithLevel, rowsWithLevel) inputB := NewArrowtestPipeline(schemaWithoutLevel, rowsWithoutLevel) pipeline, err := newRangeAggregationPipeline([]Pipeline{inputA, inputB}, newExpressionEvaluator(), opts) require.NoError(t, err) defer pipeline.Close() record, err := pipeline.Read(t.Context()) require.NoError(t, err) result, err := arrowtest.RecordRows(record) require.NoError(t, err) // All four rows belong to the same logical series — detected_level="info" with level // absent. They must be merged into a single aggregation bucket, not split into // {detected_level="info"} and {detected_level="info", level=""}. expect := arrowtest.Rows{ { colTs: time.Unix(20, 0).UTC(), colVal: float64(4), // detected_level is present with value "info". "utf8.ambiguous.detected_level": "info", // level is absent in the aggregation key, so the aggregator emits NULL here. "utf8.ambiguous.level": nil, }, } require.Equal(t, len(expect), len(result), "absent grouping column must not split series into separate streams") require.ElementsMatch(t, expect, result) } // TestRangeAggregationPipeline_WithoutGroupsByShortName verifies that "without" // grouping works correctly when columns across records or multiple columns within // a record have the same short name but different FQNs. // // Take the following records with the same short name "status" column: // // Record 1: `utf8.label.status` // Record 2: `utf8.metadata.status` // Record 3: both `utf8.label.status` and `utf8.metadata.status` // // These should be considered as `utf8.ambiguous.status` when grouping. // For record 3, [NewCoalesce] is used to resolve precedence between the two columns. func TestRangeAggregationPipeline_WithoutGroupsByShortName(t *testing.T) { const ( fqnEnv = "utf8.label.env" fqnService = "utf8.label.service" fqnLabelStatus = "utf8.label.status" fqnMetadataStatus = "utf8.metadata.status" ) startTs := time.Unix(20, 0).UTC() endTs := time.Unix(30, 0).UTC() schemaA := arrow.NewSchema([]arrow.Field{ semconv.FieldFromFQN(colTs, false), semconv.FieldFromFQN(fqnService, true), semconv.FieldFromFQN(fqnLabelStatus, true), semconv.FieldFromFQN(fqnEnv, true), }, nil) schemaB := arrow.NewSchema([]arrow.Field{ semconv.FieldFromFQN(colTs, false), semconv.FieldFromFQN(fqnEnv, true), semconv.FieldFromFQN(fqnService, true), semconv.FieldFromFQN(fqnMetadataStatus, true), }, nil) schemaC := arrow.NewSchema([]arrow.Field{ semconv.FieldFromFQN(colTs, false), semconv.FieldFromFQN(fqnMetadataStatus, true), semconv.FieldFromFQN(fqnService, true), semconv.FieldFromFQN(fqnLabelStatus, true), semconv.FieldFromFQN(fqnEnv, true), }, nil) rowsA := arrowtest.Rows{ {colTs: time.Unix(19, 0).UTC(), fqnService: nil, fqnLabelStatus: "200", fqnEnv: "prod"}, // out ts: 20 {colTs: time.Unix(20, 0).UTC(), fqnService: "api", fqnLabelStatus: "500", fqnEnv: "prod"}, // out ts: 20 {colTs: time.Unix(29, 0).UTC(), fqnService: nil, fqnLabelStatus: "200", fqnEnv: "prod"}, // out ts: 30 {colTs: time.Unix(30, 0).UTC(), fqnService: "api", fqnLabelStatus: "500", fqnEnv: "prod"}, // out ts: 30 } rowsB := arrowtest.Rows{ {colTs: time.Unix(18, 0).UTC(), fqnEnv: "prod", fqnService: nil, fqnMetadataStatus: "200"}, // out ts: 20 {colTs: time.Unix(20, 0).UTC(), fqnEnv: "prod", fqnService: "api", fqnMetadataStatus: "500"}, // out ts: 20 {colTs: time.Unix(28, 0).UTC(), fqnEnv: "prod", fqnService: nil, fqnMetadataStatus: "200"}, // out ts: 30 {colTs: time.Unix(30, 0).UTC(), fqnEnv: "prod", fqnService: "api", fqnMetadataStatus: "500"}, // out ts: 30 } rowsC := arrowtest.Rows{ {colTs: time.Unix(17, 0).UTC(), fqnMetadataStatus: "200", fqnService: nil, fqnLabelStatus: nil, fqnEnv: "prod"}, // out ts: 20 {colTs: time.Unix(20, 0).UTC(), fqnMetadataStatus: nil, fqnService: "api", fqnLabelStatus: "500", fqnEnv: "prod"}, // out ts: 20 {colTs: time.Unix(27, 0).UTC(), fqnMetadataStatus: nil, fqnService: nil, fqnLabelStatus: "200", fqnEnv: "prod"}, // out ts: 30 {colTs: time.Unix(30, 0).UTC(), fqnMetadataStatus: "500", fqnService: "api", fqnLabelStatus: nil, fqnEnv: "prod"}, // out ts: 30 } opts := rangeAggregationOptions{ grouping: physical.Grouping{ Columns: []physical.ColumnExpression{ &physical.ColumnExpr{Ref: types.ColumnRef{Column: "env", Type: types.ColumnTypeAmbiguous}}, }, Without: true, }, startTs: startTs, endTs: endTs, rangeInterval: 10 * time.Second, step: 10 * time.Second, operation: types.RangeAggregationTypeCount, } inputA := NewArrowtestPipeline(schemaA, rowsA) inputB := NewArrowtestPipeline(schemaB, rowsB) inputC := NewArrowtestPipeline(schemaC, rowsC) pipeline, err := newRangeAggregationPipeline([]Pipeline{inputA, inputB, inputC}, newExpressionEvaluator(), opts) require.NoError(t, err) defer pipeline.Close() record, err := pipeline.Read(t.Context()) require.NoError(t, err) result, err := arrowtest.RecordRows(record) require.NoError(t, err) expect := arrowtest.Rows{ { colTs: startTs, colVal: float64(3), "utf8.ambiguous.service": nil, "utf8.ambiguous.status": "200", }, { colTs: startTs, colVal: float64(3), "utf8.ambiguous.service": "api", "utf8.ambiguous.status": "500", }, { colTs: endTs, colVal: float64(3), "utf8.ambiguous.service": nil, "utf8.ambiguous.status": "200", }, { colTs: endTs, colVal: float64(3), "utf8.ambiguous.service": "api", "utf8.ambiguous.status": "500", }, } require.Equal(t, len(expect), len(result)) require.ElementsMatch(t, expect, result) } // TestRangeAggregationPipeline_WithoutSortsColumns verifies that "without" grouping // works correctly when columns are in different order across records. // They should be sorted by short name before calling the aggregator. func TestRangeAggregationPipeline_WithoutSortsColumns(t *testing.T) { const ( fqnEnv = "utf8.label.env" fqnService = "utf8.label.service" fqnStatus = "utf8.label.status" ) startTs := time.Unix(20, 0).UTC() endTs := time.Unix(30, 0).UTC() schemaA := arrow.NewSchema([]arrow.Field{ semconv.FieldFromFQN(colTs, false), semconv.FieldFromFQN(fqnService, true), semconv.FieldFromFQN(fqnStatus, true), semconv.FieldFromFQN(fqnEnv, true), }, nil) // different column order to schemaA schemaB := arrow.NewSchema([]arrow.Field{ semconv.FieldFromFQN(colTs, false), semconv.FieldFromFQN(fqnEnv, true), semconv.FieldFromFQN(fqnStatus, true), semconv.FieldFromFQN(fqnService, true), }, nil) // missing "service" column schemaC := arrow.NewSchema([]arrow.Field{ semconv.FieldFromFQN(colTs, false), semconv.FieldFromFQN(fqnStatus, true), semconv.FieldFromFQN(fqnEnv, true), }, nil) rowsA := arrowtest.Rows{ {colTs: time.Unix(19, 0).UTC(), fqnService: nil, fqnStatus: "200", fqnEnv: "prod"}, // out ts: 20 {colTs: time.Unix(20, 0).UTC(), fqnService: "api", fqnStatus: "500", fqnEnv: "prod"}, // out ts: 20 {colTs: time.Unix(29, 0).UTC(), fqnService: nil, fqnStatus: "200", fqnEnv: "prod"}, // out ts: 30 {colTs: time.Unix(30, 0).UTC(), fqnService: "api", fqnStatus: "500", fqnEnv: "prod"}, // out ts: 30 } rowsB := arrowtest.Rows{ {colTs: time.Unix(18, 0).UTC(), fqnEnv: "prod", fqnStatus: "200", fqnService: nil}, // out ts: 20 {colTs: time.Unix(20, 0).UTC(), fqnEnv: "prod", fqnStatus: "500", fqnService: "api"}, // out ts: 20 {colTs: time.Unix(28, 0).UTC(), fqnEnv: "prod", fqnStatus: "200", fqnService: nil}, // out ts: 30 {colTs: time.Unix(30, 0).UTC(), fqnEnv: "prod", fqnStatus: "500", fqnService: "api"}, // out ts: 30 } rowsC := arrowtest.Rows{ {colTs: time.Unix(17, 0).UTC(), fqnStatus: "200", fqnEnv: "prod"}, // out ts: 20 {colTs: time.Unix(27, 0).UTC(), fqnStatus: "200", fqnEnv: "prod"}, // out ts: 30 } opts := rangeAggregationOptions{ grouping: physical.Grouping{ Columns: []physical.ColumnExpression{ &physical.ColumnExpr{Ref: types.ColumnRef{Column: "env", Type: types.ColumnTypeAmbiguous}}, }, Without: true, }, startTs: startTs, endTs: endTs, rangeInterval: 10 * time.Second, step: 10 * time.Second, operation: types.RangeAggregationTypeCount, } inputA := NewArrowtestPipeline(schemaA, rowsA) inputB := NewArrowtestPipeline(schemaB, rowsB) inputC := NewArrowtestPipeline(schemaC, rowsC) pipeline, err := newRangeAggregationPipeline([]Pipeline{inputA, inputB, inputC}, newExpressionEvaluator(), opts) require.NoError(t, err) defer pipeline.Close() record, err := pipeline.Read(t.Context()) require.NoError(t, err) result, err := arrowtest.RecordRows(record) require.NoError(t, err) expect := arrowtest.Rows{ { colTs: startTs, colVal: float64(3), "utf8.ambiguous.service": nil, "utf8.ambiguous.status": "200", }, { colTs: startTs, colVal: float64(2), "utf8.ambiguous.service": "api", "utf8.ambiguous.status": "500", }, { colTs: endTs, colVal: float64(3), "utf8.ambiguous.service": nil, "utf8.ambiguous.status": "200", }, { colTs: endTs, colVal: float64(2), "utf8.ambiguous.service": "api", "utf8.ambiguous.status": "500", }, } require.Equal(t, len(expect), len(result)) require.ElementsMatch(t, expect, result) } // requireEqualWindows asserts that two slices of window structs contain the same elements. func requireEqualWindows(t *testing.T, expected, actual []window) { t.Helper() slices.SortStableFunc(expected, func(a, b window) int { return a.end.Compare(b.end) }) slices.SortStableFunc(actual, func(a, b window) int { return a.end.Compare(b.end) }) require.Equal(t, len(expected), len(actual), "window slices should have the same length") for i := 0; i < len(expected); i++ { require.Equal(t, expected[i].start.UnixNano(), actual[i].start.UnixNano(), "window[%d] start time mismatch: expected %s, actual %s", i, expected[i].start, actual[i].start) require.Equal(t, expected[i].end.UnixNano(), actual[i].end.UnixNano(), "window[%d] end time mismatch: expected %s, actual %s", i, expected[i].end, actual[i].end) } }