From a523685dbdacec06a2ade0fe728964d8fc3bb241 Mon Sep 17 00:00:00 2001 From: Robert Fratto Date: Thu, 9 Apr 2026 09:34:46 -0400 Subject: [PATCH] chore(compute): change selection vector behaviour (#21462) --- pkg/columnar/columnartest/equality.go | 36 +++++--- pkg/columnar/concat_test.go | 16 ++-- pkg/columnar/mask.go | 30 ++++++ pkg/columnar/recordbatch_test.go | 4 +- pkg/compute/ARCHITECTURE.md | 13 +-- pkg/compute/compute_test.go | 11 ++- pkg/compute/equality_bool.go | 14 +-- pkg/compute/equality_null.go | 14 +-- pkg/compute/equality_numeric.go | 14 +-- pkg/compute/equality_utf8.go | 14 +-- pkg/compute/filter.go | 101 +++++++++++++++++++++ pkg/compute/filter_bench_test.go | 84 +++++++++++++++++ pkg/compute/internal/computetest/parser.go | 82 +++++++++++++++-- pkg/compute/logical.go | 19 ++-- pkg/compute/selection.go | 19 ---- pkg/compute/set.go | 35 +++++-- pkg/compute/testdata/README.md | 3 +- pkg/compute/testdata/equality.test | 68 +++++++------- pkg/compute/testdata/filter.test | 36 ++++++++ pkg/compute/testdata/logical.test | 34 +++---- pkg/compute/testdata/selection.test | 36 ++++---- pkg/compute/testdata/set.test | 28 +++--- pkg/compute/testdata/utf8.test | 26 +++--- pkg/compute/utf8.go | 36 ++++++-- pkg/expr/evaluate_test.go | 12 +-- pkg/ingester/stream.go | 3 +- 26 files changed, 557 insertions(+), 231 deletions(-) create mode 100644 pkg/columnar/mask.go create mode 100644 pkg/compute/filter.go create mode 100644 pkg/compute/filter_bench_test.go delete mode 100644 pkg/compute/selection.go create mode 100644 pkg/compute/testdata/filter.test diff --git a/pkg/columnar/columnartest/equality.go b/pkg/columnar/columnartest/equality.go index ef74f875fc..916d2e51d3 100644 --- a/pkg/columnar/columnartest/equality.go +++ b/pkg/columnar/columnartest/equality.go @@ -6,11 +6,16 @@ import ( "github.com/stretchr/testify/require" "github.com/grafana/loki/v3/pkg/columnar" + "github.com/grafana/loki/v3/pkg/memory" ) // RequireDatumsEqual asserts that the provided datum matches the expected // datum, otherwise t fails. -func RequireDatumsEqual(t testing.TB, expect, actual columnar.Datum) { +// +// When mask is non-empty, only positions where the mask bit is set are +// compared. This is useful when some positions have undefined values (e.g., +// unselected rows in a selection vector). +func RequireDatumsEqual(t testing.TB, expect, actual columnar.Datum, mask memory.Bitmap) { t.Helper() if expectScalar, ok := expect.(columnar.Scalar); ok { @@ -18,7 +23,7 @@ func RequireDatumsEqual(t testing.TB, expect, actual columnar.Datum) { RequireScalarsEqual(t, expectScalar, actual.(columnar.Scalar)) } else { require.Implements(t, (*columnar.Array)(nil), actual) - RequireArraysEqual(t, expect.(columnar.Array), actual.(columnar.Array)) + RequireArraysEqual(t, expect.(columnar.Array), actual.(columnar.Array), mask) } } @@ -41,32 +46,34 @@ func RequireScalarsEqual(t testing.TB, expect, actual columnar.Scalar) { // RequireArraysEqual asserts that the provided array matches the expected // array, otherwise t fails. -func RequireArraysEqual(t testing.TB, expect, actual columnar.Array) { +// +// When mask is non-empty, only positions where the mask bit is set are +// compared. +func RequireArraysEqual(t testing.TB, expect, actual columnar.Array, mask memory.Bitmap) { t.Helper() require.Equal(t, expect.Kind(), actual.Kind(), "kind mismatch") require.Equal(t, expect.Len(), actual.Len(), "length mismatch") - require.Equal(t, expect.Nulls(), actual.Nulls(), "null count mismatch") switch expect.Kind() { case columnar.KindNull: - requireNullArraysEqual(t, expect.(*columnar.Null), actual.(*columnar.Null)) + requireNullArraysEqual(t, expect.(*columnar.Null), actual.(*columnar.Null), mask) case columnar.KindBool: - requireArraysEqual(t, expect.(*columnar.Bool), actual.(*columnar.Bool)) + requireArraysEqual(t, expect.(*columnar.Bool), actual.(*columnar.Bool), mask) case columnar.KindInt32: - requireArraysEqual(t, expect.(*columnar.Number[int32]), actual.(*columnar.Number[int32])) + requireArraysEqual(t, expect.(*columnar.Number[int32]), actual.(*columnar.Number[int32]), mask) case columnar.KindInt64: - requireArraysEqual(t, expect.(*columnar.Number[int64]), actual.(*columnar.Number[int64])) + requireArraysEqual(t, expect.(*columnar.Number[int64]), actual.(*columnar.Number[int64]), mask) case columnar.KindUint32: - requireArraysEqual(t, expect.(*columnar.Number[uint32]), actual.(*columnar.Number[uint32])) + requireArraysEqual(t, expect.(*columnar.Number[uint32]), actual.(*columnar.Number[uint32]), mask) case columnar.KindUint64: - requireArraysEqual(t, expect.(*columnar.Number[uint64]), actual.(*columnar.Number[uint64])) + requireArraysEqual(t, expect.(*columnar.Number[uint64]), actual.(*columnar.Number[uint64]), mask) case columnar.KindUTF8: - requireArraysEqual(t, expect.(*columnar.UTF8), actual.(*columnar.UTF8)) + requireArraysEqual(t, expect.(*columnar.UTF8), actual.(*columnar.UTF8), mask) } } -func requireNullArraysEqual(t testing.TB, left, right *columnar.Null) { +func requireNullArraysEqual(t testing.TB, left, right *columnar.Null, mask memory.Bitmap) { // Nothing to do here; the base checks in RequireArraysEqual covers // everything that could differ between two null arrays. t.Helper() @@ -80,10 +87,13 @@ type valueArray[T any] interface { Get(i int) T } -func requireArraysEqual[T any](t testing.TB, left, right valueArray[T]) { +func requireArraysEqual[T any](t testing.TB, left, right valueArray[T], mask memory.Bitmap) { t.Helper() for i := range left.Len() { + if mask.Len() > 0 && !mask.Get(i) { + continue // Skip positions not in the mask. + } require.Equal(t, left.IsNull(i), right.IsNull(i), "null mismatch at index %d", i) if left.IsNull(i) || right.IsNull(i) { continue diff --git a/pkg/columnar/concat_test.go b/pkg/columnar/concat_test.go index c6461691ee..0453bc64ae 100644 --- a/pkg/columnar/concat_test.go +++ b/pkg/columnar/concat_test.go @@ -23,7 +23,7 @@ func TestConcat_Null(t *testing.T) { expect := columnartest.Array(t, columnar.KindNull, &alloc, make([]any, 10+5+32)...) actual, err := columnar.Concat(&alloc, in) require.NoError(t, err) - columnartest.RequireArraysEqual(t, expect, actual) + columnartest.RequireArraysEqual(t, expect, actual, memory.Bitmap{}) } func TestConcat_Bool(t *testing.T) { @@ -38,7 +38,7 @@ func TestConcat_Bool(t *testing.T) { expect := columnartest.Array(t, columnar.KindBool, &alloc, true, false, false, true, false, nil) actual, err := columnar.Concat(&alloc, in) require.NoError(t, err) - columnartest.RequireArraysEqual(t, expect, actual) + columnartest.RequireArraysEqual(t, expect, actual, memory.Bitmap{}) } func TestConcat_Int32(t *testing.T) { @@ -53,7 +53,7 @@ func TestConcat_Int32(t *testing.T) { expect := columnartest.Array(t, columnar.KindInt32, &alloc, 1, 2, 3, 4, 5, nil) actual, err := columnar.Concat(&alloc, in) require.NoError(t, err) - columnartest.RequireArraysEqual(t, expect, actual) + columnartest.RequireArraysEqual(t, expect, actual, memory.Bitmap{}) } func TestConcat_Int64(t *testing.T) { @@ -68,7 +68,7 @@ func TestConcat_Int64(t *testing.T) { expect := columnartest.Array(t, columnar.KindInt64, &alloc, 1, 2, 3, 4, 5, nil) actual, err := columnar.Concat(&alloc, in) require.NoError(t, err) - columnartest.RequireArraysEqual(t, expect, actual) + columnartest.RequireArraysEqual(t, expect, actual, memory.Bitmap{}) } func TestConcat_Uint32(t *testing.T) { @@ -83,7 +83,7 @@ func TestConcat_Uint32(t *testing.T) { expect := columnartest.Array(t, columnar.KindUint32, &alloc, 1, 2, 3, 4, 5, nil) actual, err := columnar.Concat(&alloc, in) require.NoError(t, err) - columnartest.RequireArraysEqual(t, expect, actual) + columnartest.RequireArraysEqual(t, expect, actual, memory.Bitmap{}) } func TestConcat_Uint64(t *testing.T) { @@ -98,7 +98,7 @@ func TestConcat_Uint64(t *testing.T) { expect := columnartest.Array(t, columnar.KindUint64, &alloc, 1, 2, 3, 4, 5, nil) actual, err := columnar.Concat(&alloc, in) require.NoError(t, err) - columnartest.RequireArraysEqual(t, expect, actual) + columnartest.RequireArraysEqual(t, expect, actual, memory.Bitmap{}) } func TestConcat_UTF8(t *testing.T) { @@ -117,7 +117,7 @@ func TestConcat_UTF8(t *testing.T) { actual, err := columnar.Concat(&alloc, in) require.NoError(t, err) - columnartest.RequireArraysEqual(t, expect, actual) + columnartest.RequireArraysEqual(t, expect, actual, memory.Bitmap{}) } func TestConcat_UTF8_Slices(t *testing.T) { @@ -141,7 +141,7 @@ func TestConcat_UTF8_Slices(t *testing.T) { actual, err := columnar.Concat(&alloc, in) require.NoError(t, err) - columnartest.RequireArraysEqual(t, expect, actual) + columnartest.RequireArraysEqual(t, expect, actual, memory.Bitmap{}) } func BenchmarkConcat(b *testing.B) { diff --git a/pkg/columnar/mask.go b/pkg/columnar/mask.go new file mode 100644 index 0000000000..65341f8486 --- /dev/null +++ b/pkg/columnar/mask.go @@ -0,0 +1,30 @@ +package columnar + +import ( + "fmt" + + "github.com/grafana/loki/v3/pkg/memory" +) + +type invalidMaskError struct { + maskLength int + inputLength int +} + +func (e *invalidMaskError) Error() string { + return fmt.Sprintf("mask length %d does not match input length %d", e.maskLength, e.inputLength) +} + +// AllSelected returns true if all elements in the array are selected by the +// mask, or if the mask is the zero value. +// +// If the mask is non-zero, it must have the same length as the array. +func AllSelected(arr Array, mask memory.Bitmap) (bool, error) { + if mask.Len() == 0 { + return true, nil + } + if mask.Len() != arr.Len() { + return false, &invalidMaskError{maskLength: mask.Len(), inputLength: arr.Len()} + } + return mask.SetCount() == arr.Len(), nil +} diff --git a/pkg/columnar/recordbatch_test.go b/pkg/columnar/recordbatch_test.go index a66805ebaa..50a5a17c75 100644 --- a/pkg/columnar/recordbatch_test.go +++ b/pkg/columnar/recordbatch_test.go @@ -31,6 +31,6 @@ func TestRecordBatch_Slice(t *testing.T) { expectAges = columnartest.Array(t, columnar.KindUint64, &alloc, 25, 43) ) - columnartest.RequireArraysEqual(t, expectNames, slice.Column(0)) - columnartest.RequireArraysEqual(t, expectAges, slice.Column(1)) + columnartest.RequireArraysEqual(t, expectNames, slice.Column(0), memory.Bitmap{}) + columnartest.RequireArraysEqual(t, expectAges, slice.Column(1), memory.Bitmap{}) } diff --git a/pkg/compute/ARCHITECTURE.md b/pkg/compute/ARCHITECTURE.md index 54154502bd..d6a35a8e08 100644 --- a/pkg/compute/ARCHITECTURE.md +++ b/pkg/compute/ARCHITECTURE.md @@ -41,17 +41,8 @@ corresponding bit is true are selected. Unselected rows are treated as null. ## Null-Marking Behavior -When a selection vector is applied, unselected rows are marked as null in -the output. This is implemented by ANDing the data's validity bitmap with -the selection bitmap: - - result_validity = data_validity AND selection - -This approach has important properties: - - Already-null rows remain null (null AND true = null) - - Unselected rows become null (valid AND false = null) - - Selected valid rows remain valid (valid AND true = valid) - - No data (array values) copying or array resizing required +When a selection vector is applied, unselected rows are marked left undefined in +the output. ## Dispatch Pattern diff --git a/pkg/compute/compute_test.go b/pkg/compute/compute_test.go index 614905057c..932985ba1e 100644 --- a/pkg/compute/compute_test.go +++ b/pkg/compute/compute_test.go @@ -41,7 +41,13 @@ func TestCompute(t *testing.T) { result, err := evalCaseFunction(t, &alloc, tc) require.NoError(t, err) - columnartest.RequireDatumsEqual(t, tc.Expect, result) + mask := tc.Selection + if tc.Function == "FILTER" { + // Filter materializes the mask, so we don't want to pass + // the mask back down to RequireDatumsEqual. + mask = memory.Bitmap{} + } + columnartest.RequireDatumsEqual(t, tc.Expect, result, mask) }) } @@ -100,6 +106,9 @@ func evalCaseFunction(t *testing.T, alloc *memory.Allocator, tc computetest.Case } return compute.RegexpMatch(alloc, tc.Arguments[0], re, tc.Selection) + case "FILTER": + require.Len(t, tc.Arguments, 1, "FILTER function requires one argument") + return compute.Filter(alloc, tc.Arguments[0], tc.Selection) case "ISMEMBER": require.Len(t, tc.Arguments, 2, "ISMEMBER function requires two arguments") // Second argument should be an array that we convert to a Set diff --git a/pkg/compute/equality_bool.go b/pkg/compute/equality_bool.go index e78f8dead5..b236f1ad0b 100644 --- a/pkg/compute/equality_bool.go +++ b/pkg/compute/equality_bool.go @@ -7,7 +7,7 @@ import ( "github.com/grafana/loki/v3/pkg/memory" ) -func dispatchBoolEquality(alloc *memory.Allocator, kernel boolEqualityKernel, left, right columnar.Datum, selection memory.Bitmap) (columnar.Datum, error) { +func dispatchBoolEquality(alloc *memory.Allocator, kernel boolEqualityKernel, left, right columnar.Datum, _ memory.Bitmap) (columnar.Datum, error) { _, leftScalar := left.(columnar.Scalar) _, rightScalar := right.(columnar.Scalar) @@ -15,17 +15,11 @@ func dispatchBoolEquality(alloc *memory.Allocator, kernel boolEqualityKernel, le case leftScalar && rightScalar: return boolEqualitySS(kernel, left.(*columnar.BoolScalar), right.(*columnar.BoolScalar)), nil case leftScalar && !rightScalar: - out := boolEqualitySA(alloc, kernel, left.(*columnar.BoolScalar), right.(*columnar.Bool)) - return applySelectionToBoolArray(alloc, out, selection) + return boolEqualitySA(alloc, kernel, left.(*columnar.BoolScalar), right.(*columnar.Bool)), nil case !leftScalar && rightScalar: - out := boolEqualityAS(alloc, kernel, left.(*columnar.Bool), right.(*columnar.BoolScalar)) - return applySelectionToBoolArray(alloc, out, selection) + return boolEqualityAS(alloc, kernel, left.(*columnar.Bool), right.(*columnar.BoolScalar)), nil case !leftScalar && !rightScalar: - out, err := boolEqualityAA(alloc, kernel, left.(*columnar.Bool), right.(*columnar.Bool)) - if err != nil { - return nil, err - } - return applySelectionToBoolArray(alloc, out, selection) + return boolEqualityAA(alloc, kernel, left.(*columnar.Bool), right.(*columnar.Bool)) } panic("unreachable") diff --git a/pkg/compute/equality_null.go b/pkg/compute/equality_null.go index ba8aaf7fcb..c696041979 100644 --- a/pkg/compute/equality_null.go +++ b/pkg/compute/equality_null.go @@ -7,7 +7,7 @@ import ( "github.com/grafana/loki/v3/pkg/memory" ) -func dispatchNullEquality(alloc *memory.Allocator, left, right columnar.Datum, selection memory.Bitmap) (columnar.Datum, error) { +func dispatchNullEquality(alloc *memory.Allocator, left, right columnar.Datum, _ memory.Bitmap) (columnar.Datum, error) { _, leftScalar := left.(columnar.Scalar) _, rightScalar := right.(columnar.Scalar) @@ -15,17 +15,11 @@ func dispatchNullEquality(alloc *memory.Allocator, left, right columnar.Datum, s case leftScalar && rightScalar: return nullEqualitySS(left.(*columnar.NullScalar), right.(*columnar.NullScalar)), nil case leftScalar && !rightScalar: - out := nullEqualitySA(alloc, left.(*columnar.NullScalar), right.(*columnar.Null)) - return applySelectionToBoolArray(alloc, out, selection) + return nullEqualitySA(alloc, left.(*columnar.NullScalar), right.(*columnar.Null)), nil case !leftScalar && rightScalar: - out := nullEqualityAS(alloc, left.(*columnar.Null), right.(*columnar.NullScalar)) - return applySelectionToBoolArray(alloc, out, selection) + return nullEqualityAS(alloc, left.(*columnar.Null), right.(*columnar.NullScalar)), nil case !leftScalar && !rightScalar: - out, err := nullEqualityAA(alloc, left.(*columnar.Null), right.(*columnar.Null)) - if err != nil { - return nil, err - } - return applySelectionToBoolArray(alloc, out, selection) + return nullEqualityAA(alloc, left.(*columnar.Null), right.(*columnar.Null)) } panic("unreachable") diff --git a/pkg/compute/equality_numeric.go b/pkg/compute/equality_numeric.go index 1b77fadd4c..1f3dd13dfa 100644 --- a/pkg/compute/equality_numeric.go +++ b/pkg/compute/equality_numeric.go @@ -7,7 +7,7 @@ import ( "github.com/grafana/loki/v3/pkg/memory" ) -func dispatchNumericEquality[T columnar.Numeric](alloc *memory.Allocator, kernel numericEqualityKernel[T], left, right columnar.Datum, selection memory.Bitmap) (columnar.Datum, error) { +func dispatchNumericEquality[T columnar.Numeric](alloc *memory.Allocator, kernel numericEqualityKernel[T], left, right columnar.Datum, _ memory.Bitmap) (columnar.Datum, error) { _, leftScalar := left.(columnar.Scalar) _, rightScalar := right.(columnar.Scalar) @@ -15,17 +15,11 @@ func dispatchNumericEquality[T columnar.Numeric](alloc *memory.Allocator, kernel case leftScalar && rightScalar: return numericEqualitySS(kernel, left.(*columnar.NumberScalar[T]), right.(*columnar.NumberScalar[T])), nil case leftScalar && !rightScalar: - out := numericEqualitySA(alloc, kernel, left.(*columnar.NumberScalar[T]), right.(*columnar.Number[T])) - return applySelectionToBoolArray(alloc, out, selection) + return numericEqualitySA(alloc, kernel, left.(*columnar.NumberScalar[T]), right.(*columnar.Number[T])), nil case !leftScalar && rightScalar: - out := numericEqualityAS(alloc, kernel, left.(*columnar.Number[T]), right.(*columnar.NumberScalar[T])) - return applySelectionToBoolArray(alloc, out, selection) + return numericEqualityAS(alloc, kernel, left.(*columnar.Number[T]), right.(*columnar.NumberScalar[T])), nil case !leftScalar && !rightScalar: - out, err := numericEqualityAA(alloc, kernel, left.(*columnar.Number[T]), right.(*columnar.Number[T])) - if err != nil { - return nil, err - } - return applySelectionToBoolArray(alloc, out, selection) + return numericEqualityAA(alloc, kernel, left.(*columnar.Number[T]), right.(*columnar.Number[T])) } panic("unreachable") diff --git a/pkg/compute/equality_utf8.go b/pkg/compute/equality_utf8.go index 30b5cfee99..4b536f9720 100644 --- a/pkg/compute/equality_utf8.go +++ b/pkg/compute/equality_utf8.go @@ -7,7 +7,7 @@ import ( "github.com/grafana/loki/v3/pkg/memory" ) -func dispatchUTF8Equality(alloc *memory.Allocator, kernel utf8EqualityKernel, left, right columnar.Datum, selection memory.Bitmap) (columnar.Datum, error) { +func dispatchUTF8Equality(alloc *memory.Allocator, kernel utf8EqualityKernel, left, right columnar.Datum, _ memory.Bitmap) (columnar.Datum, error) { _, leftScalar := left.(columnar.Scalar) _, rightScalar := right.(columnar.Scalar) @@ -15,17 +15,11 @@ func dispatchUTF8Equality(alloc *memory.Allocator, kernel utf8EqualityKernel, le case leftScalar && rightScalar: return utf8EqualitySS(kernel, left.(*columnar.UTF8Scalar), right.(*columnar.UTF8Scalar)), nil case leftScalar && !rightScalar: - out := utf8EqualitySA(alloc, kernel, left.(*columnar.UTF8Scalar), right.(*columnar.UTF8)) - return applySelectionToBoolArray(alloc, out, selection) + return utf8EqualitySA(alloc, kernel, left.(*columnar.UTF8Scalar), right.(*columnar.UTF8)), nil case !leftScalar && rightScalar: - out := utf8EqualityAS(alloc, kernel, left.(*columnar.UTF8), right.(*columnar.UTF8Scalar)) - return applySelectionToBoolArray(alloc, out, selection) + return utf8EqualityAS(alloc, kernel, left.(*columnar.UTF8), right.(*columnar.UTF8Scalar)), nil case !leftScalar && !rightScalar: - out, err := utf8EqualityAA(alloc, kernel, left.(*columnar.UTF8), right.(*columnar.UTF8)) - if err != nil { - return nil, err - } - return applySelectionToBoolArray(alloc, out, selection) + return utf8EqualityAA(alloc, kernel, left.(*columnar.UTF8), right.(*columnar.UTF8)) } panic("unreachable") diff --git a/pkg/compute/filter.go b/pkg/compute/filter.go new file mode 100644 index 0000000000..3e62ae5a22 --- /dev/null +++ b/pkg/compute/filter.go @@ -0,0 +1,101 @@ +package compute + +import ( + "fmt" + + "github.com/grafana/loki/v3/pkg/columnar" + "github.com/grafana/loki/v3/pkg/memory" +) + +// Filter selects rows from the input datum where the corresponding bit in mask +// is set, returning a new compacted array containing only the selected rows. +// +// The input must be an [columnar.Array]; Filter returns an error if a +// [columnar.Scalar] is provided. +// +// If mask is empty (Len == 0), all rows are selected and the input is returned +// unchanged. Filter returns an error if the mask length does not match the +// input array length. +func Filter(alloc *memory.Allocator, input columnar.Datum, mask memory.Bitmap) (columnar.Datum, error) { + arr, ok := input.(columnar.Array) + if !ok { + return nil, fmt.Errorf("Filter requires an Array input, got %T", input) + } + + if ok, err := columnar.AllSelected(arr, mask); ok { + return arr, nil + } else if err != nil { + return nil, err + } + + switch src := arr.(type) { + case *columnar.Bool: + return filterBool(alloc, src, mask), nil + case *columnar.Number[int32]: + return filterNumber(alloc, src, mask), nil + case *columnar.Number[int64]: + return filterNumber(alloc, src, mask), nil + case *columnar.Number[uint32]: + return filterNumber(alloc, src, mask), nil + case *columnar.Number[uint64]: + return filterNumber(alloc, src, mask), nil + case *columnar.UTF8: + return filterUTF8(alloc, src, mask), nil + case *columnar.Null: + return filterNull(alloc, mask), nil + default: + return nil, fmt.Errorf("Filter: unsupported array type %T", input) + } +} + +func filterBool(alloc *memory.Allocator, src *columnar.Bool, mask memory.Bitmap) *columnar.Bool { + builder := columnar.NewBoolBuilder(alloc) + builder.Grow(mask.SetCount()) + for i := range mask.IterValues(true) { + if src.IsNull(i) { + builder.AppendNull() + } else { + builder.AppendValue(src.Get(i)) + } + } + return builder.Build() +} + +func filterNumber[T columnar.Numeric](alloc *memory.Allocator, src *columnar.Number[T], mask memory.Bitmap) *columnar.Number[T] { + builder := columnar.NewNumberBuilder[T](alloc) + builder.Grow(mask.SetCount()) + for i := range mask.IterValues(true) { + if src.IsNull(i) { + builder.AppendNull() + } else { + builder.AppendValue(src.Get(i)) + } + } + return builder.Build() +} + +func filterUTF8(alloc *memory.Allocator, src *columnar.UTF8, mask memory.Bitmap) *columnar.UTF8 { + n := mask.SetCount() + builder := columnar.NewUTF8Builder(alloc) + builder.Grow(n) + // Estimate data bytes from the source average. A two-pass approach to + // compute the exact size is ~30% slower due to the extra iteration over + // variable-length values; the estimate avoids most reallocations without + // that cost. + builder.GrowData(src.Size() * n / max(src.Len(), 1)) + for i := range mask.IterValues(true) { + if src.IsNull(i) { + builder.AppendNull() + } else { + builder.AppendValue(src.Get(i)) + } + } + return builder.Build() +} + +func filterNull(alloc *memory.Allocator, mask memory.Bitmap) *columnar.Null { + n := mask.SetCount() + validity := memory.NewBitmap(alloc, n) + validity.AppendCount(false, n) + return columnar.NewNull(validity) +} diff --git a/pkg/compute/filter_bench_test.go b/pkg/compute/filter_bench_test.go new file mode 100644 index 0000000000..1d431b191a --- /dev/null +++ b/pkg/compute/filter_bench_test.go @@ -0,0 +1,84 @@ +package compute_test + +import ( + "testing" + + "github.com/grafana/loki/v3/pkg/compute" + "github.com/grafana/loki/v3/pkg/memory" +) + +var filterSelectivities = map[string]func(*testing.B, *memory.Allocator) memory.Bitmap{ + "selectivity=100": func(*testing.B, *memory.Allocator) memory.Bitmap { return memory.Bitmap{} }, + "selectivity=50": func(b *testing.B, alloc *memory.Allocator) memory.Bitmap { + return makeAlternatingSelection(b, alloc, benchmarkSize) + }, + "selectivity=05": func(b *testing.B, alloc *memory.Allocator) memory.Bitmap { + return makeSparseSelection(b, alloc, benchmarkSize, 0.05) + }, +} + +func BenchmarkFilter_Bool(b *testing.B) { + for name, maskFunc := range filterSelectivities { + b.Run(name, func(b *testing.B) { + var alloc memory.Allocator + input := makeBoolArray(b, &alloc, benchmarkSize) + mask := maskFunc(b, &alloc) + + benchAlloc := memory.NewAllocator(nil) + for b.Loop() { + benchAlloc.Reclaim() + result, err := compute.Filter(benchAlloc, input, mask) + if err != nil { + b.Fatal(err) + } + _ = result + } + + reportArrayBenchMetrics(b, input) + }) + } +} + +func BenchmarkFilter_Int64(b *testing.B) { + for name, maskFunc := range filterSelectivities { + b.Run(name, func(b *testing.B) { + var alloc memory.Allocator + input := makeInt64Array(b, &alloc, benchmarkSize) + mask := maskFunc(b, &alloc) + + benchAlloc := memory.NewAllocator(nil) + for b.Loop() { + benchAlloc.Reclaim() + result, err := compute.Filter(benchAlloc, input, mask) + if err != nil { + b.Fatal(err) + } + _ = result + } + + reportArrayBenchMetrics(b, input) + }) + } +} + +func BenchmarkFilter_UTF8(b *testing.B) { + for name, maskFunc := range filterSelectivities { + b.Run(name, func(b *testing.B) { + var alloc memory.Allocator + input := makeUTF8Array(b, &alloc, benchmarkSize) + mask := maskFunc(b, &alloc) + + benchAlloc := memory.NewAllocator(nil) + for b.Loop() { + benchAlloc.Reclaim() + result, err := compute.Filter(benchAlloc, input, mask) + if err != nil { + b.Fatal(err) + } + _ = result + } + + reportArrayBenchMetrics(b, input) + }) + } +} diff --git a/pkg/compute/internal/computetest/parser.go b/pkg/compute/internal/computetest/parser.go index 888da40a86..6c179c6d6b 100644 --- a/pkg/compute/internal/computetest/parser.go +++ b/pkg/compute/internal/computetest/parser.go @@ -8,7 +8,10 @@ import ( "github.com/grafana/loki/v3/pkg/memory" ) -const nullLit = "null" +const ( + nullLit = "null" + undefinedLit = "_" +) type parser struct { alloc *memory.Allocator @@ -28,6 +31,16 @@ func (p *parser) next() { p.pos, p.tok, p.lit = p.scanner.Scan() } +// isUndefined checks whether the current token is [undefinedLit], representing an +// undefined slot. If so it advances past the token and returns true. +func (p *parser) isUndefined() bool { + if p.tok == tokenIdent && p.lit == undefinedLit { + p.next() + return true + } + return false +} + // expect consumes the next token and returns an error if it doesn't match expected. func (p *parser) expect(expected token) error { if p.tok != expected { @@ -149,6 +162,10 @@ func (p *parser) parseNullArray() (columnar.Datum, error) { builder := columnar.NewNullBuilder(p.alloc) for p.tok != tokenRBrack && p.tok != tokenEOF { + if p.isUndefined() { + builder.AppendNull() + continue + } _, err := p.parseNullScalar() if err != nil { return nil, err @@ -163,6 +180,9 @@ func (p *parser) parseNullArray() (columnar.Datum, error) { } func (p *parser) parseNullScalar() (columnar.Datum, error) { + if p.isUndefined() { + return &columnar.NullScalar{}, nil + } if p.tok != tokenIdent || p.lit != nullLit { return nil, fmt.Errorf("line %d:%d: expected 'null', got %s", p.pos.Line, p.pos.Col, p.lit) } @@ -178,8 +198,11 @@ func (p *parser) parseBoolDatum() (columnar.Datum, error) { return p.parseBoolScalar() } -// parseBoolScalar := "true" | "false" | "null" +// parseBoolScalar := "true" | "false" | "null" | [undefinedLit] func (p *parser) parseBoolScalar() (columnar.Datum, error) { + if p.isUndefined() { + return &columnar.BoolScalar{}, nil + } if p.tok != tokenIdent { return nil, fmt.Errorf("line %d:%d: expected bool value, got %s", p.pos.Line, p.pos.Col, p.tok) } @@ -209,6 +232,11 @@ func (p *parser) parseBoolArray() (*columnar.Bool, error) { builder := columnar.NewBoolBuilder(p.alloc) for p.tok != tokenRBrack && p.tok != tokenEOF { + if p.isUndefined() { + builder.AppendValue(false) + continue + } + scalar, err := p.parseBoolScalar() if err != nil { return nil, err @@ -236,8 +264,11 @@ func (p *parser) parseInt32Datum() (columnar.Datum, error) { return p.parseInt32Scalar() } -// parseInt32Scalar := | "null" +// parseInt32Scalar := | "null" | [undefinedLit] func (p *parser) parseInt32Scalar() (columnar.Datum, error) { + if p.isUndefined() { + return &columnar.NumberScalar[int32]{}, nil + } if p.tok == tokenIdent && p.lit == nullLit { p.next() return &columnar.NumberScalar[int32]{Null: true}, nil @@ -274,6 +305,11 @@ func (p *parser) parseInt32Array() (columnar.Datum, error) { builder := columnar.NewNumberBuilder[int32](p.alloc) for p.tok != tokenRBrack && p.tok != tokenEOF { + if p.isUndefined() { + builder.AppendValue(0) + continue + } + scalar, err := p.parseInt32Scalar() if err != nil { return nil, err @@ -302,8 +338,11 @@ func (p *parser) parseInt64Datum() (columnar.Datum, error) { return p.parseInt64Scalar() } -// parseInt64Scalar := | "null" +// parseInt64Scalar := | "null" | [undefinedLit] func (p *parser) parseInt64Scalar() (columnar.Datum, error) { + if p.isUndefined() { + return &columnar.NumberScalar[int64]{}, nil + } if p.tok == tokenIdent && p.lit == nullLit { p.next() return &columnar.NumberScalar[int64]{Null: true}, nil @@ -340,6 +379,11 @@ func (p *parser) parseInt64Array() (columnar.Datum, error) { builder := columnar.NewNumberBuilder[int64](p.alloc) for p.tok != tokenRBrack && p.tok != tokenEOF { + if p.isUndefined() { + builder.AppendValue(0) + continue + } + scalar, err := p.parseInt64Scalar() if err != nil { return nil, err @@ -368,8 +412,11 @@ func (p *parser) parseUint32Datum() (columnar.Datum, error) { return p.parseUint32Scalar() } -// parseUint32Scalar := | "null" +// parseUint32Scalar := | "null" | [undefinedLit] func (p *parser) parseUint32Scalar() (columnar.Datum, error) { + if p.isUndefined() { + return &columnar.NumberScalar[uint32]{}, nil + } if p.tok == tokenIdent && p.lit == nullLit { p.next() return &columnar.NumberScalar[uint32]{Null: true}, nil @@ -397,6 +444,11 @@ func (p *parser) parseUint32Array() (columnar.Datum, error) { builder := columnar.NewNumberBuilder[uint32](p.alloc) for p.tok != tokenRBrack && p.tok != tokenEOF { + if p.isUndefined() { + builder.AppendValue(0) + continue + } + scalar, err := p.parseUint32Scalar() if err != nil { return nil, err @@ -424,8 +476,11 @@ func (p *parser) parseUint64Datum() (columnar.Datum, error) { return p.parseUint64Scalar() } -// parseUint64Scalar := | "null" +// parseUint64Scalar := | "null" | [undefinedLit] func (p *parser) parseUint64Scalar() (columnar.Datum, error) { + if p.isUndefined() { + return &columnar.NumberScalar[uint64]{}, nil + } if p.tok == tokenIdent && p.lit == nullLit { p.next() return &columnar.NumberScalar[uint64]{Null: true}, nil @@ -453,6 +508,11 @@ func (p *parser) parseUint64Array() (columnar.Datum, error) { builder := columnar.NewNumberBuilder[uint64](p.alloc) for p.tok != tokenRBrack && p.tok != tokenEOF { + if p.isUndefined() { + builder.AppendValue(0) + continue + } + scalar, err := p.parseUint64Scalar() if err != nil { return nil, err @@ -480,8 +540,11 @@ func (p *parser) parseUTF8Datum() (columnar.Datum, error) { return p.parseUTF8Scalar() } -// parseUTF8Scalar := | "null" +// parseUTF8Scalar := | "null" | [undefinedLit] func (p *parser) parseUTF8Scalar() (columnar.Datum, error) { + if p.isUndefined() { + return &columnar.UTF8Scalar{}, nil + } if p.tok == tokenIdent && p.lit == nullLit { p.next() return &columnar.UTF8Scalar{Null: true}, nil @@ -505,6 +568,11 @@ func (p *parser) parseUTF8Array() (columnar.Datum, error) { builder := columnar.NewUTF8Builder(p.alloc) for p.tok != tokenRBrack && p.tok != tokenEOF { + if p.isUndefined() { + builder.AppendValue(nil) + continue + } + scalar, err := p.parseUTF8Scalar() if err != nil { return nil, err diff --git a/pkg/compute/logical.go b/pkg/compute/logical.go index 0b802fe288..92e3429f44 100644 --- a/pkg/compute/logical.go +++ b/pkg/compute/logical.go @@ -15,7 +15,7 @@ import ( // Special cases: // // - The negation of null is null. -func Not(alloc *memory.Allocator, input columnar.Datum, selection memory.Bitmap) (columnar.Datum, error) { +func Not(alloc *memory.Allocator, input columnar.Datum, _ memory.Bitmap) (columnar.Datum, error) { if got, want := input.Kind(), columnar.KindBool; got != want { return nil, fmt.Errorf("invalid input kind %s, expected %s", got, want) } @@ -24,8 +24,7 @@ func Not(alloc *memory.Allocator, input columnar.Datum, selection memory.Bitmap) case *columnar.BoolScalar: return notScalar(input), nil case *columnar.Bool: - out := notArray(alloc, input) - return applySelectionToBoolArray(alloc, out, selection) + return notArray(alloc, input), nil default: panic(fmt.Sprintf("unexpected input type %T", input)) } @@ -84,7 +83,7 @@ func Or(alloc *memory.Allocator, left, right columnar.Datum, selection memory.Bi return dispatchLogical(alloc, logicalOrKernel, left, right, selection) } -func dispatchLogical(alloc *memory.Allocator, kernel logicalKernel, left, right columnar.Datum, selection memory.Bitmap) (columnar.Datum, error) { +func dispatchLogical(alloc *memory.Allocator, kernel logicalKernel, left, right columnar.Datum, _ memory.Bitmap) (columnar.Datum, error) { if got, want := left.Kind(), columnar.KindBool; got != want { return nil, fmt.Errorf("invalid input kind %s, expected %s", got, want) } else if left.Kind() != right.Kind() { @@ -98,17 +97,11 @@ func dispatchLogical(alloc *memory.Allocator, kernel logicalKernel, left, right case leftScalar && rightScalar: return logicalSS(kernel, left.(*columnar.BoolScalar), right.(*columnar.BoolScalar)), nil case leftScalar && !rightScalar: - out := logicalSA(alloc, kernel, left.(*columnar.BoolScalar), right.(*columnar.Bool)) - return applySelectionToBoolArray(alloc, out, selection) + return logicalSA(alloc, kernel, left.(*columnar.BoolScalar), right.(*columnar.Bool)), nil case !leftScalar && rightScalar: - out := logicalAS(alloc, kernel, left.(*columnar.Bool), right.(*columnar.BoolScalar)) - return applySelectionToBoolArray(alloc, out, selection) + return logicalAS(alloc, kernel, left.(*columnar.Bool), right.(*columnar.BoolScalar)), nil case !leftScalar && !rightScalar: - out, err := logicalAA(alloc, kernel, left.(*columnar.Bool), right.(*columnar.Bool)) - if err != nil { - return nil, err - } - return applySelectionToBoolArray(alloc, out, selection) + return logicalAA(alloc, kernel, left.(*columnar.Bool), right.(*columnar.Bool)) } panic("unreachable") diff --git a/pkg/compute/selection.go b/pkg/compute/selection.go deleted file mode 100644 index 0f7b7e2af9..0000000000 --- a/pkg/compute/selection.go +++ /dev/null @@ -1,19 +0,0 @@ -package compute - -import ( - "github.com/grafana/loki/v3/pkg/columnar" - "github.com/grafana/loki/v3/pkg/memory" -) - -// applySelectionToBoolArray applies a selection bitmap to a boolean array, -// marking unselected rows as null in the result. -func applySelectionToBoolArray(alloc *memory.Allocator, arr *columnar.Bool, selection memory.Bitmap) (*columnar.Bool, error) { - if selection.Len() == 0 { - return arr, nil - } - validity, err := computeValidityAA(alloc, arr.Validity(), selection) - if err != nil { - return nil, err - } - return columnar.NewBool(arr.Values(), validity), nil -} diff --git a/pkg/compute/set.go b/pkg/compute/set.go index aca11da4b3..21f8bb70d0 100644 --- a/pkg/compute/set.go +++ b/pkg/compute/set.go @@ -8,9 +8,8 @@ import ( ) // IsMember checks if each item in datum is a member of the values set. -// The selection parameter controls which rows are evaluated: -// - If selection.Len(), all rows are evaluated -// - Otherwise, only rows where selection bit is true are evaluated, non-selected rows result in null +// The selection parameter defines which rows are evaluated. Unselected rows +// have undefined values in the output. func IsMember(alloc *memory.Allocator, datum columnar.Datum, values *columnar.Set, selection memory.Bitmap) (columnar.Datum, error) { if values.Kind() != datum.Kind() { return nil, fmt.Errorf("values set and datum must be the same kind, got %s and %s", values.Kind(), datum.Kind()) @@ -46,7 +45,11 @@ func isMemberUTF8(alloc *memory.Allocator, datum columnar.Datum, values *columna } func isMemberUTF8A(alloc *memory.Allocator, haystack *columnar.UTF8, set *columnar.Set, selection memory.Bitmap) (columnar.Datum, error) { - validity, err := computeValidityAA(alloc, haystack.Validity(), selection) + // Merge selection with validity to determine which valid rows are selected. + // + // This makes the iterTrue loop below faster as we don't have to keep poking + // at bitmaps for each row to check. + rowMask, err := computeValidityAA(alloc, haystack.Validity(), selection) if err != nil { return nil, fmt.Errorf("apply selection to validity: %w", err) } @@ -54,11 +57,18 @@ func isMemberUTF8A(alloc *memory.Allocator, haystack *columnar.UTF8, set *column values := memory.NewBitmap(alloc, haystack.Len()) values.Resize(haystack.Len()) - for i := range iterTrue(validity, haystack.Len()) { + for i := range iterTrue(rowMask, haystack.Len()) { found := set.Has(string(haystack.Get(i))) values.Set(i, found) } + var validity memory.Bitmap + if haystack.Nulls() > 0 { + // Output validity is always based purely on input validity, not + // selection. + validity = memory.NewBitmap(alloc, haystack.Len()) + validity.AppendBitmap(haystack.Validity()) + } return columnar.NewBool(values, validity), nil } @@ -85,7 +95,11 @@ func isMemberNumber[T columnar.Numeric](alloc *memory.Allocator, datum columnar. } func isMemberNumberA[T columnar.Numeric](alloc *memory.Allocator, haystack *columnar.Number[T], set *columnar.Set, selection memory.Bitmap) (columnar.Datum, error) { - validity, err := computeValidityAA(alloc, haystack.Validity(), selection) + // Merge selection with validity to determine which valid rows are selected. + // + // This makes the iterTrue loop below faster as we don't have to keep poking + // at bitmaps for each row to check. + rowMask, err := computeValidityAA(alloc, haystack.Validity(), selection) if err != nil { return nil, fmt.Errorf("apply selection to validity: %w", err) } @@ -93,11 +107,18 @@ func isMemberNumberA[T columnar.Numeric](alloc *memory.Allocator, haystack *colu values := memory.NewBitmap(alloc, haystack.Len()) values.Resize(haystack.Len()) - for i := range iterTrue(validity, haystack.Len()) { + for i := range iterTrue(rowMask, haystack.Len()) { found := set.Has(haystack.Get(i)) values.Set(i, found) } + var validity memory.Bitmap + if haystack.Nulls() > 0 { + // Output validity is always based purely on input validity, not + // selection. + validity = memory.NewBitmap(alloc, haystack.Len()) + validity.AppendBitmap(haystack.Validity()) + } return columnar.NewBool(values, validity), nil } diff --git a/pkg/compute/testdata/README.md b/pkg/compute/testdata/README.md index 0d428dfc97..43c4ec6648 100644 --- a/pkg/compute/testdata/README.md +++ b/pkg/compute/testdata/README.md @@ -18,7 +18,7 @@ TypedValue := Type ":" Value Type := "bool" | "int32" | "int64" | "uint32" | "uint64" | "utf8" | "null" Value := Scalar | Array -Scalar := | "null" +Scalar := | "null" | "_" Array := "[" Scalar* "]" TERMINATOR := "\n" @@ -36,6 +36,7 @@ ignored. - **uint64**: Unsigned 64-bit integers (e.g., `0`, `456`) - **utf8**: UTF-8 strings, must be quoted (e.g., `"hello"`, `"test string"`). Escape sequences are supported. - **null**: Explicit null type for null-only values +- **`_`**: Undefined value. Represents a position where the value is undefined, such as an unselected row in a selection vector. ## Adding new compute functions diff --git a/pkg/compute/testdata/equality.test b/pkg/compute/testdata/equality.test index 5f8cb75bf9..c24adae788 100644 --- a/pkg/compute/testdata/equality.test +++ b/pkg/compute/testdata/equality.test @@ -607,46 +607,46 @@ GTE utf8:["b" "b" "c" "z" null null null] utf8:["a" "b" "z" "z" "foo" "bar" nu # # (Array, Array) -EQ bool:[true false true false] bool:[true true true true] select:[true false true false] -> bool:[true null true null] # Partial -EQ bool:[true false true] bool:[true true false] select:[false false false] -> bool:[null null null] # No selection -EQ bool:[true false true] bool:[true false false] select:[false true false] -> bool:[null true null] # Single selection +EQ bool:[true false true false] bool:[true true true true] select:[true false true false] -> bool:[true _ true _] # Partial +EQ bool:[true false true] bool:[true true false] select:[false false false] -> bool:[_ _ _] # No selection +EQ bool:[true false true] bool:[true false false] select:[false true false] -> bool:[_ true _] # Single selection EQ bool:[true false] bool:[true true] select:[true true] -> bool:[true false] # Full selection -EQ int32:[10 20 30 40] int32:[10 21 30 41] select:[true false true false] -> bool:[true null true null] -EQ int32:[1 2] int32:[1 3] select:[false false] -> bool:[null null] -EQ int64:[10 20 30 40] int64:[10 21 30 41] select:[true false true false] -> bool:[true null true null] -EQ int64:[1 2] int64:[1 3] select:[false false] -> bool:[null null] -EQ uint32:[10 20 30 40] uint32:[10 21 30 41] select:[true false true false] -> bool:[true null true null] -EQ uint64:[10 20 30 40] uint64:[10 21 30 41] select:[true false true false] -> bool:[true null true null] -EQ utf8:["foo" "bar" "baz" "qux"] utf8:["foo" "BAR" "baz" "QUX"] select:[true false true false] -> bool:[true null true null] -EQ utf8:["a" "b" "c"] utf8:["a" "x" "c"] select:[false false false] -> bool:[null null null] -EQ null:[null null null] null:[null null null] select:[true false true] -> bool:[null null null] +EQ int32:[10 20 30 40] int32:[10 21 30 41] select:[true false true false] -> bool:[true _ true _] +EQ int32:[1 2] int32:[1 3] select:[false false] -> bool:[_ _] +EQ int64:[10 20 30 40] int64:[10 21 30 41] select:[true false true false] -> bool:[true _ true _] +EQ int64:[1 2] int64:[1 3] select:[false false] -> bool:[_ _] +EQ uint32:[10 20 30 40] uint32:[10 21 30 41] select:[true false true false] -> bool:[true _ true _] +EQ uint64:[10 20 30 40] uint64:[10 21 30 41] select:[true false true false] -> bool:[true _ true _] +EQ utf8:["foo" "bar" "baz" "qux"] utf8:["foo" "BAR" "baz" "QUX"] select:[true false true false] -> bool:[true _ true _] +EQ utf8:["a" "b" "c"] utf8:["a" "x" "c"] select:[false false false] -> bool:[_ _ _] +EQ null:[null null null] null:[null null null] select:[true false true] -> bool:[null _ null] # (Scalar, Array) -EQ int32:10 int32:[10 20 10 30] select:[true false true false] -> bool:[true null true null] -EQ int64:10 int64:[10 20 10 30] select:[true false true false] -> bool:[true null true null] -EQ utf8:"test" utf8:["test" "foo" "test"] select:[false false false] -> bool:[null null null] +EQ int32:10 int32:[10 20 10 30] select:[true false true false] -> bool:[true _ true _] +EQ int64:10 int64:[10 20 10 30] select:[true false true false] -> bool:[true _ true _] +EQ utf8:"test" utf8:["test" "foo" "test"] select:[false false false] -> bool:[_ _ _] # (Array, Scalar) -EQ int32:[5 10 5 15] int32:5 select:[true false true false] -> bool:[true null true null] -EQ int64:[5 10 5 15] int64:5 select:[true false true false] -> bool:[true null true null] +EQ int32:[5 10 5 15] int32:5 select:[true false true false] -> bool:[true _ true _] +EQ int64:[5 10 5 15] int64:5 select:[true false true false] -> bool:[true _ true _] # Other operators -NEQ int32:[10 20 30] int32:[10 21 30] select:[true false true] -> bool:[false null false] -NEQ int64:[10 20 30] int64:[10 21 30] select:[true false true] -> bool:[false null false] -NEQ bool:[true false true] bool:[false false false] select:[false true false] -> bool:[null false null] -LT int32:[5 10 15 20] int32:[10 10 10 10] select:[true false true false] -> bool:[true null false null] -LT int64:[5 10 15 20] int64:[10 10 10 10] select:[true false true false] -> bool:[true null false null] -LT uint32:[15 25] uint32:[20 20] select:[false false] -> bool:[null null] -LT uint64:[15 25] uint64:[20 20] select:[false false] -> bool:[null null] -LT utf8:["a" "b" "c" "d"] utf8:["b" "b" "b" "b"] select:[true false true false] -> bool:[true null false null] -LTE int32:[10 20 10] int32:[10 15 10] select:[true false true] -> bool:[true null true] -LTE int64:[10 20 10] int64:[10 15 10] select:[true false true] -> bool:[true null true] +NEQ int32:[10 20 30] int32:[10 21 30] select:[true false true] -> bool:[false _ false] +NEQ int64:[10 20 30] int64:[10 21 30] select:[true false true] -> bool:[false _ false] +NEQ bool:[true false true] bool:[false false false] select:[false true false] -> bool:[_ false _] +LT int32:[5 10 15 20] int32:[10 10 10 10] select:[true false true false] -> bool:[true _ false _] +LT int64:[5 10 15 20] int64:[10 10 10 10] select:[true false true false] -> bool:[true _ false _] +LT uint32:[15 25] uint32:[20 20] select:[false false] -> bool:[_ _] +LT uint64:[15 25] uint64:[20 20] select:[false false] -> bool:[_ _] +LT utf8:["a" "b" "c" "d"] utf8:["b" "b" "b" "b"] select:[true false true false] -> bool:[true _ false _] +LTE int32:[10 20 10] int32:[10 15 10] select:[true false true] -> bool:[true _ true] +LTE int64:[10 20 10] int64:[10 15 10] select:[true false true] -> bool:[true _ true] LTE uint32:[10 20] uint32:[10 30] select:[true true] -> bool:[true true] LTE uint64:[10 20] uint64:[10 30] select:[true true] -> bool:[true true] -GT int32:[15 5 20] int32:[10 10 10] select:[true false true] -> bool:[true null true] -GT int64:[15 5 20] int64:[10 10 10] select:[true false true] -> bool:[true null true] -GT utf8:["x" "y" "z"] utf8:["y" "y" "y"] select:[false true false] -> bool:[null false null] -GTE int32:[10 5 10] int32:[10 10 5] select:[true false true] -> bool:[true null true] -GTE int64:[10 5 10] int64:[10 10 5] select:[true false true] -> bool:[true null true] -GTE uint32:[10 5] uint32:[5 5] select:[false false] -> bool:[null null] -GTE uint64:[10 5] uint64:[5 5] select:[false false] -> bool:[null null] +GT int32:[15 5 20] int32:[10 10 10] select:[true false true] -> bool:[true _ true] +GT int64:[15 5 20] int64:[10 10 10] select:[true false true] -> bool:[true _ true] +GT utf8:["x" "y" "z"] utf8:["y" "y" "y"] select:[false true false] -> bool:[_ false _] +GTE int32:[10 5 10] int32:[10 10 5] select:[true false true] -> bool:[true _ true] +GTE int64:[10 5 10] int64:[10 10 5] select:[true false true] -> bool:[true _ true] +GTE uint32:[10 5] uint32:[5 5] select:[false false] -> bool:[_ _] +GTE uint64:[10 5] uint64:[5 5] select:[false false] -> bool:[_ _] diff --git a/pkg/compute/testdata/filter.test b/pkg/compute/testdata/filter.test new file mode 100644 index 0000000000..df79130943 --- /dev/null +++ b/pkg/compute/testdata/filter.test @@ -0,0 +1,36 @@ +# +# FILTER (compute.Filter) +# +# FILTER takes an array and a selection mask, returning only the rows where +# the mask is true. +# + +# Bool arrays +FILTER bool:[true false true false] select:[true false true false] -> bool:[true true] +FILTER bool:[true false true false] select:[true true true true] -> bool:[true false true false] +FILTER bool:[true false true false] select:[false false false false] -> bool:[] +FILTER bool:[true false null true] select:[true false true false] -> bool:[true null] +FILTER bool:[null null null] select:[true true true] -> bool:[null null null] + +# Int32 arrays +FILTER int32:[1 2 3 4 5] select:[true false true false true] -> int32:[1 3 5] +FILTER int32:[10 20 30] select:[false false false] -> int32:[] +FILTER int32:[10 null 30] select:[true true false] -> int32:[10 null] + +# Int64 arrays +FILTER int64:[100 200 300] select:[true false true] -> int64:[100 300] +FILTER int64:[null 200 300] select:[true true false] -> int64:[null 200] + +# Uint32 arrays +FILTER uint32:[1 2 3] select:[false true false] -> uint32:[2] + +# Uint64 arrays +FILTER uint64:[10 20 30] select:[true true false] -> uint64:[10 20] + +# UTF8 arrays +FILTER utf8:["hello" "world" "foo"] select:[true false true] -> utf8:["hello" "foo"] +FILTER utf8:["a" "b" "c" "d"] select:[false true true false] -> utf8:["b" "c"] +FILTER utf8:[null "hello" "world"] select:[true true false] -> utf8:[null "hello"] + +# Null arrays +FILTER null:[null null null] select:[true false true] -> null:[null null] diff --git a/pkg/compute/testdata/logical.test b/pkg/compute/testdata/logical.test index bfe516405b..c8e7cb9c3a 100644 --- a/pkg/compute/testdata/logical.test +++ b/pkg/compute/testdata/logical.test @@ -73,35 +73,35 @@ OR bool:[true true false false] bool:[false true false true] # # NOT with selection -NOT bool:[true false true false] select:[true false true false] -> bool:[false null false null] # Partial +NOT bool:[true false true false] select:[true false true false] -> bool:[false _ false _] # Partial NOT bool:[true false null true] select:[true true true true] -> bool:[false true null false] # Full selection -NOT bool:[true false true] select:[false true false] -> bool:[null true null] # Single selection -NOT bool:[true null false null] select:[true true false false] -> bool:[false null null null] # With nulls +NOT bool:[true false true] select:[false true false] -> bool:[_ true _] # Single selection +NOT bool:[true null false null] select:[true true false false] -> bool:[false null _ _] # With nulls # AND with selection - (Array, Array) -AND bool:[true false true false] bool:[true true false false] select:[true false true false] -> bool:[true null false null] -AND bool:[true false true false] bool:[true true false false] select:[false false false false] -> bool:[null null null null] +AND bool:[true false true false] bool:[true true false false] select:[true false true false] -> bool:[true _ false _] +AND bool:[true false true false] bool:[true true false false] select:[false false false false] -> bool:[_ _ _ _] AND bool:[true false true] bool:[false true true] select:[true true true] -> bool:[false false true] -AND bool:[true null true false] bool:[true true false null] select:[true true false false] -> bool:[true null null null] +AND bool:[true null true false] bool:[true true false null] select:[true true false false] -> bool:[true null _ _] # AND with selection - (Scalar, Array) -AND bool:true bool:[true false true false] select:[true false true false] -> bool:[true null true null] -AND bool:false bool:[true false true false] select:[true false true false] -> bool:[false null false null] +AND bool:true bool:[true false true false] select:[true false true false] -> bool:[true _ true _] +AND bool:false bool:[true false true false] select:[true false true false] -> bool:[false _ false _] # AND with selection - (Array, Scalar) -AND bool:[true false true false] bool:true select:[false true false true] -> bool:[null false null false] -AND bool:[true false true false] bool:false select:[false true false true] -> bool:[null false null false] +AND bool:[true false true false] bool:true select:[false true false true] -> bool:[_ false _ false] +AND bool:[true false true false] bool:false select:[false true false true] -> bool:[_ false _ false] # OR with selection - (Array, Array) -OR bool:[true false true false] bool:[false false true true] select:[true false true false] -> bool:[true null true null] -OR bool:[true false true false] bool:[false false true true] select:[false false false false] -> bool:[null null null null] +OR bool:[true false true false] bool:[false false true true] select:[true false true false] -> bool:[true _ true _] +OR bool:[true false true false] bool:[false false true true] select:[false false false false] -> bool:[_ _ _ _] OR bool:[true false false] bool:[false true false] select:[true true true] -> bool:[true true false] -OR bool:[true null false false] bool:[false true true null] select:[true true false false] -> bool:[true null null null] +OR bool:[true null false false] bool:[false true true null] select:[true true false false] -> bool:[true null _ _] # OR with selection - (Scalar, Array) -OR bool:false bool:[true false true false] select:[true false true false] -> bool:[true null true null] -OR bool:true bool:[true false true false] select:[true false true false] -> bool:[true null true null] +OR bool:false bool:[true false true false] select:[true false true false] -> bool:[true _ true _] +OR bool:true bool:[true false true false] select:[true false true false] -> bool:[true _ true _] # OR with selection - (Array, Scalar) -OR bool:[true false true false] bool:false select:[false true false true] -> bool:[null false null false] -OR bool:[true false true false] bool:true select:[false true false true] -> bool:[null true null true] +OR bool:[true false true false] bool:false select:[false true false true] -> bool:[_ false _ false] +OR bool:[true false true false] bool:true select:[false true false true] -> bool:[_ true _ true] diff --git a/pkg/compute/testdata/selection.test b/pkg/compute/testdata/selection.test index e0d4e5eba2..869a5645d1 100644 --- a/pkg/compute/testdata/selection.test +++ b/pkg/compute/testdata/selection.test @@ -3,29 +3,29 @@ # # Selection masking - alternating pattern [true, false, true, false] -NOT bool:[true false true false] select:[true false true false] -> bool:[false null false null] -EQ int32:[1 2 3 4] int32:[1 0 3 0] select:[true false true false] -> bool:[true null true null] -EQ int64:[1 2 3 4] int64:[1 0 3 0] select:[true false true false] -> bool:[true null true null] -EQ uint32:[1 2 3 4] uint32:[1 0 3 0] select:[true false true false] -> bool:[true null true null] -EQ uint64:[1 2 3 4] uint64:[1 0 3 0] select:[true false true false] -> bool:[true null true null] -AND bool:[true true false false] bool:[true false true false] select:[true false true false] -> bool:[true null false null] -SUBSTRI utf8:["a" "b" "A" "c"] utf8:"a" select:[true false true false] -> bool:[true null true null] +NOT bool:[true false true false] select:[true false true false] -> bool:[false _ false _] +EQ int32:[1 2 3 4] int32:[1 0 3 0] select:[true false true false] -> bool:[true _ true _] +EQ int64:[1 2 3 4] int64:[1 0 3 0] select:[true false true false] -> bool:[true _ true _] +EQ uint32:[1 2 3 4] uint32:[1 0 3 0] select:[true false true false] -> bool:[true _ true _] +EQ uint64:[1 2 3 4] uint64:[1 0 3 0] select:[true false true false] -> bool:[true _ true _] +AND bool:[true true false false] bool:[true false true false] select:[true false true false] -> bool:[true _ false _] +SUBSTRI utf8:["a" "b" "A" "c"] utf8:"a" select:[true false true false] -> bool:[true _ true _] # Single element with selection NOT bool:[true] select:[true] -> bool:[false] EQ int32:[42] int32:[42] select:[true] -> bool:[true] -EQ int32:[42] int32:[42] select:[false] -> bool:[null] +EQ int32:[42] int32:[42] select:[false] -> bool:[_] EQ uint32:[42] uint32:[42] select:[true] -> bool:[true] -EQ uint32:[42] uint32:[42] select:[false] -> bool:[null] +EQ uint32:[42] uint32:[42] select:[false] -> bool:[_] EQ uint64:[42] uint64:[42] select:[true] -> bool:[true] -EQ uint64:[42] uint64:[42] select:[false] -> bool:[null] +EQ uint64:[42] uint64:[42] select:[false] -> bool:[_] # All false selection - no rows selected -NOT bool:[true false true false] select:[false false false false] -> bool:[null null null null] -EQ int32:[1 2 3 4] int32:[1 0 3 0] select:[false false false false] -> bool:[null null null null] -EQ int64:[1 2 3 4] int64:[1 0 3 0] select:[false false false false] -> bool:[null null null null] -EQ uint32:[1 2 3 4] uint32:[1 0 3 0] select:[false false false false] -> bool:[null null null null] -EQ uint64:[1 2 3 4] uint64:[1 0 3 0] select:[false false false false] -> bool:[null null null null] -AND bool:[true true false false] bool:[true false true false] select:[false false false false] -> bool:[null null null null] -OR bool:[true true false false] bool:[true false true false] select:[false false false false] -> bool:[null null null null] -SUBSTRI utf8:["a" "b" "A" "c"] utf8:"a" select:[false false false false] -> bool:[null null null null] +NOT bool:[true false true false] select:[false false false false] -> bool:[_ _ _ _] +EQ int32:[1 2 3 4] int32:[1 0 3 0] select:[false false false false] -> bool:[_ _ _ _] +EQ int64:[1 2 3 4] int64:[1 0 3 0] select:[false false false false] -> bool:[_ _ _ _] +EQ uint32:[1 2 3 4] uint32:[1 0 3 0] select:[false false false false] -> bool:[_ _ _ _] +EQ uint64:[1 2 3 4] uint64:[1 0 3 0] select:[false false false false] -> bool:[_ _ _ _] +AND bool:[true true false false] bool:[true false true false] select:[false false false false] -> bool:[_ _ _ _] +OR bool:[true true false false] bool:[true false true false] select:[false false false false] -> bool:[_ _ _ _] +SUBSTRI utf8:["a" "b" "A" "c"] utf8:"a" select:[false false false false] -> bool:[_ _ _ _] diff --git a/pkg/compute/testdata/set.test b/pkg/compute/testdata/set.test index 4f9ab528b4..3bba3703b5 100644 --- a/pkg/compute/testdata/set.test +++ b/pkg/compute/testdata/set.test @@ -63,40 +63,40 @@ ISMEMBER uint64:[null] uint64:[] -> bool:[null] # # ISMEMBER with selection - UTF8 -ISMEMBER utf8:["test1" "test2" "test3" "test4"] utf8:["test1" "test3"] select:[true false true false] -> bool:[true null true null] # Partial +ISMEMBER utf8:["test1" "test2" "test3" "test4"] utf8:["test1" "test3"] select:[true false true false] -> bool:[true _ true _] # Partial ISMEMBER utf8:["test1" "test2" "test3" "test4"] utf8:["test1" "test3"] select:[true true true true] -> bool:[true false true false] # Full selection -ISMEMBER utf8:["test1" "test2" "test3"] utf8:["test1" "test3"] select:[false false false] -> bool:[null null null] # No selection +ISMEMBER utf8:["test1" "test2" "test3"] utf8:["test1" "test3"] select:[false false false] -> bool:[_ _ _] # No selection ISMEMBER utf8:["apple" "banana" "cherry" "date"] utf8:["apple" "cherry"] select:[true true true true] -> bool:[true false true false] -ISMEMBER utf8:["apple" "banana" "cherry" "date" "elderberry"] utf8:["apple" "cherry" "elderberry"] select:[true false true false true] -> bool:[true null true null true] -ISMEMBER utf8:["apple" "banana" "cherry" "date" "elderberry"] utf8:["cherry"] select:[false false true false false] -> bool:[null null true null null] +ISMEMBER utf8:["apple" "banana" "cherry" "date" "elderberry"] utf8:["apple" "cherry" "elderberry"] select:[true false true false true] -> bool:[true _ true _ true] +ISMEMBER utf8:["apple" "banana" "cherry" "date" "elderberry"] utf8:["cherry"] select:[false false true false false] -> bool:[_ _ true _ _] ISMEMBER utf8:["apple" null "cherry" null] utf8:["apple" "cherry"] select:[true true true true] -> bool:[true null true null] -ISMEMBER utf8:["apple" null "cherry" null] utf8:["apple" "cherry"] select:[true false true false] -> bool:[true null true null] -ISMEMBER utf8:[null "banana" null "date" null "fig"] utf8:["banana" "fig"] select:[true true true true false false] -> bool:[null true null false null null] +ISMEMBER utf8:["apple" null "cherry" null] utf8:["apple" "cherry"] select:[true false true false] -> bool:[true _ true _] +ISMEMBER utf8:[null "banana" null "date" null "fig"] utf8:["banana" "fig"] select:[true true true true false false] -> bool:[null true null false _ _] # ISMEMBER with selection - int32 -ISMEMBER int32:[1 2 3 4] int32:[1 3] select:[true false true false] -> bool:[true null true null] # Partial +ISMEMBER int32:[1 2 3 4] int32:[1 3] select:[true false true false] -> bool:[true _ true _] # Partial ISMEMBER int32:[1 2 3 4] int32:[1 3] select:[true true true true] -> bool:[true false true false] # Full selection ISMEMBER int32:[10 20 30 40] int32:[10 30] select:[true true true true] -> bool:[true false true false] ISMEMBER int32:[10 null 30 null] int32:[10 30] select:[true true true true] -> bool:[true null true null] -ISMEMBER int32:[10 null 30 null] int32:[10 30] select:[true false true false] -> bool:[true null true null] +ISMEMBER int32:[10 null 30 null] int32:[10 30] select:[true false true false] -> bool:[true _ true _] # ISMEMBER with selection - int64 -ISMEMBER int64:[1 2 3 4] int64:[1 3] select:[true false true false] -> bool:[true null true null] # Partial +ISMEMBER int64:[1 2 3 4] int64:[1 3] select:[true false true false] -> bool:[true _ true _] # Partial ISMEMBER int64:[1 2 3 4] int64:[1 3] select:[true true true true] -> bool:[true false true false] # Full selection ISMEMBER int64:[10 20 30 40] int64:[10 30] select:[true true true true] -> bool:[true false true false] ISMEMBER int64:[10 null 30 null] int64:[10 30] select:[true true true true] -> bool:[true null true null] -ISMEMBER int64:[10 null 30 null] int64:[10 30] select:[true false true false] -> bool:[true null true null] +ISMEMBER int64:[10 null 30 null] int64:[10 30] select:[true false true false] -> bool:[true _ true _] # ISMEMBER with selection - uint32 -ISMEMBER uint32:[1 2 3 4] uint32:[1 3] select:[true false true false] -> bool:[true null true null] # Partial +ISMEMBER uint32:[1 2 3 4] uint32:[1 3] select:[true false true false] -> bool:[true _ true _] # Partial ISMEMBER uint32:[1 2 3 4] uint32:[1 3] select:[true true true true] -> bool:[true false true false] # Full selection ISMEMBER uint32:[10 20 30 40] uint32:[10 30] select:[true true true true] -> bool:[true false true false] ISMEMBER uint32:[10 null 30 null] uint32:[10 30] select:[true true true true] -> bool:[true null true null] -ISMEMBER uint32:[10 null 30 null] uint32:[10 30] select:[true false true false] -> bool:[true null true null] +ISMEMBER uint32:[10 null 30 null] uint32:[10 30] select:[true false true false] -> bool:[true _ true _] # ISMEMBER with selection - uint64 -ISMEMBER uint64:[1 2 3 4] uint64:[1 3] select:[true false true false] -> bool:[true null true null] # Partial +ISMEMBER uint64:[1 2 3 4] uint64:[1 3] select:[true false true false] -> bool:[true _ true _] # Partial ISMEMBER uint64:[1 2 3 4] uint64:[1 3] select:[true true true true] -> bool:[true false true false] # Full selection ISMEMBER uint64:[10 20 30 40] uint64:[10 30] select:[true true true true] -> bool:[true false true false] ISMEMBER uint64:[10 null 30 null] uint64:[10 30] select:[true true true true] -> bool:[true null true null] -ISMEMBER uint64:[10 null 30 null] uint64:[10 30] select:[true false true false] -> bool:[true null true null] +ISMEMBER uint64:[10 null 30 null] uint64:[10 30] select:[true false true false] -> bool:[true _ true _] diff --git a/pkg/compute/testdata/utf8.test b/pkg/compute/testdata/utf8.test index a59a8debd1..87e9d4400d 100644 --- a/pkg/compute/testdata/utf8.test +++ b/pkg/compute/testdata/utf8.test @@ -42,26 +42,26 @@ REGEXP utf8:[null] utf8:null -> bool:[null] # REGEXP with selection REGEXP utf8:["foo" "bar" "baz" "qux" "test"] utf8:"ba." select:[true true true true true] -> bool:[false true true false false] # Full selection -REGEXP utf8:["foo" "bar" "baz" "qux" "test"] utf8:"ba." select:[true true true false false] -> bool:[false true true null null] # Partial (first three) -REGEXP utf8:["foo" "bar" "baz" "qux" "test"] utf8:"ba." select:[false true true true false] -> bool:[null true true false null] # Partial (middle) -REGEXP utf8:["foo" "bar" "baz" "qux" "test"] utf8:"ba." select:[false false false false false] -> bool:[null null null null null] # No selection -REGEXP utf8:["foo" "bar" "baz" "qux" "test"] utf8:"ba." select:[false false true false false] -> bool:[null null true null null] # Single row +REGEXP utf8:["foo" "bar" "baz" "qux" "test"] utf8:"ba." select:[true true true false false] -> bool:[false true true _ _] # Partial (first three) +REGEXP utf8:["foo" "bar" "baz" "qux" "test"] utf8:"ba." select:[false true true true false] -> bool:[_ true true false _] # Partial (middle) +REGEXP utf8:["foo" "bar" "baz" "qux" "test"] utf8:"ba." select:[false false false false false] -> bool:[_ _ _ _ _] # No selection +REGEXP utf8:["foo" "bar" "baz" "qux" "test"] utf8:"ba." select:[false false true false false] -> bool:[_ _ true _ _] # Single row # SUBSTRI with selection - case-insensitive SUBSTRI utf8:["FOO" "BAR" "BAZ" "QUX" "TEST"] utf8:"ba" select:[true true true true true] -> bool:[false true true false false] # Full selection -SUBSTRI utf8:["FOO" "BAR" "BAZ" "QUX" "TEST"] utf8:"ba" select:[false false true true true] -> bool:[null null true false false] # Partial (last three) -SUBSTRI utf8:["FOO" "BAR" "BAZ" "QUX" "TEST"] utf8:"ba" select:[false false false false false] -> bool:[null null null null null] # No selection -SUBSTRI utf8:["FOO" "BAR" "BAZ" "QUX" "TEST"] utf8:"ba" select:[false true false false false] -> bool:[null true null null null] # Single row +SUBSTRI utf8:["FOO" "BAR" "BAZ" "QUX" "TEST"] utf8:"ba" select:[false false true true true] -> bool:[_ _ true false false] # Partial (last three) +SUBSTRI utf8:["FOO" "BAR" "BAZ" "QUX" "TEST"] utf8:"ba" select:[false false false false false] -> bool:[_ _ _ _ _] # No selection +SUBSTRI utf8:["FOO" "BAR" "BAZ" "QUX" "TEST"] utf8:"ba" select:[false true false false false] -> bool:[_ true _ _ _] # Single row -SUBSTRI utf8:["FOO" "BAR" "BAZ" "QUX" "TEST"] utf8:"" select:[true false true false true] -> bool:[true null true null true] # Empty needle +SUBSTRI utf8:["FOO" "BAR" "BAZ" "QUX" "TEST"] utf8:"" select:[true false true false true] -> bool:[true _ true _ true] # Empty needle SUBSTRI utf8:["FOO" "BAR" "BAZ" "QUX" "TEST"] utf8:null select:[true true false false true] -> bool:[null null null null null] # Null needle # SUBSTR with selection - case-sensitive SUBSTR utf8:["foo" "bar" "baz" "qux" "test"] utf8:"ba" select:[true true true true true] -> bool:[false true true false false] # Full selection -SUBSTR utf8:["foo" "bar" "baz" "qux" "test"] utf8:"ba" select:[true false true false true] -> bool:[false null true null false] # Partial (alternating) -SUBSTR utf8:["foo" "bar" "baz" "qux" "test"] utf8:"ba" select:[false false false false false] -> bool:[null null null null null] # No selection -SUBSTR utf8:["foo" "bar" "baz" "qux" "test"] utf8:"ba" select:[false true false false false] -> bool:[null true null null null] # Single row +SUBSTR utf8:["foo" "bar" "baz" "qux" "test"] utf8:"ba" select:[true false true false true] -> bool:[false _ true _ false] # Partial (alternating) +SUBSTR utf8:["foo" "bar" "baz" "qux" "test"] utf8:"ba" select:[false false false false false] -> bool:[_ _ _ _ _] # No selection +SUBSTR utf8:["foo" "bar" "baz" "qux" "test"] utf8:"ba" select:[false true false false false] -> bool:[_ true _ _ _] # Single row -SUBSTR utf8:["FOO" "BAR" "BAZ" "qux" "test"] utf8:"ba" select:[true true true false false] -> bool:[false false false null null] # Case-sensitive no match -SUBSTR utf8:["foo" "bar" "baz" "qux" "test"] utf8:"" select:[false true false true false] -> bool:[null true null true null] # Empty needle +SUBSTR utf8:["FOO" "BAR" "BAZ" "qux" "test"] utf8:"ba" select:[true true true false false] -> bool:[false false false _ _] # Case-sensitive no match +SUBSTR utf8:["foo" "bar" "baz" "qux" "test"] utf8:"" select:[false true false true false] -> bool:[_ true _ true _] # Empty needle SUBSTR utf8:["foo" "bar" "baz" "qux" "test"] utf8:null select:[true false true false true] -> bool:[null null null null null] # Null needle diff --git a/pkg/compute/utf8.go b/pkg/compute/utf8.go index 74450652d2..b806608d0f 100644 --- a/pkg/compute/utf8.go +++ b/pkg/compute/utf8.go @@ -42,7 +42,8 @@ func regexpMatchAS(alloc *memory.Allocator, haystack *columnar.UTF8, regexp *reg return builder.Build(), nil } - validity, err := computeValidityAA(alloc, haystack.Validity(), selection) + // Compute rows to look at by merging selection with input validity. + fullSelection, err := computeValidityAA(alloc, haystack.Validity(), selection) if err != nil { return nil, fmt.Errorf("apply selection to validity: %w", err) } @@ -50,10 +51,17 @@ func regexpMatchAS(alloc *memory.Allocator, haystack *columnar.UTF8, regexp *reg values := memory.NewBitmap(alloc, haystack.Len()) values.Resize(haystack.Len()) - for i := range iterTrue(validity, haystack.Len()) { + for i := range iterTrue(fullSelection, haystack.Len()) { values.Set(i, regexp.Match(haystack.Get(i))) } + var validity memory.Bitmap + if haystack.Nulls() > 0 { + // Output validity is always based purely on input validity, not + // selection. + validity = memory.NewBitmap(alloc, haystack.Len()) + validity.AppendBitmap(haystack.Validity()) + } return columnar.NewBool(values, validity), nil } @@ -100,7 +108,8 @@ func substrInsensitiveAS(alloc *memory.Allocator, haystack *columnar.UTF8, needl return builder.Build(), nil } - validity, err := computeValidityAA(alloc, haystack.Validity(), selection) + // Compute rows to look at by merging selection with input validity. + fullSelection, err := computeValidityAA(alloc, haystack.Validity(), selection) if err != nil { return nil, fmt.Errorf("apply selection to validity: %w", err) } @@ -110,11 +119,18 @@ func substrInsensitiveAS(alloc *memory.Allocator, haystack *columnar.UTF8, needl values := memory.NewBitmap(alloc, haystack.Len()) values.Resize(haystack.Len()) - for i := range iterTrue(validity, haystack.Len()) { + for i := range iterTrue(fullSelection, haystack.Len()) { haystackValueUpper := bytes.ToUpper(haystack.Get(i)) values.Set(i, bytes.Contains(haystackValueUpper, needleUpper)) } + var validity memory.Bitmap + if haystack.Nulls() > 0 { + // Output validity is always based purely on input validity, not + // selection. + validity = memory.NewBitmap(alloc, haystack.Len()) + validity.AppendBitmap(haystack.Validity()) + } return columnar.NewBool(values, validity), nil } @@ -164,7 +180,8 @@ func substrAS(alloc *memory.Allocator, haystack *columnar.UTF8, needle *columnar return builder.Build(), nil } - validity, err := computeValidityAA(alloc, haystack.Validity(), selection) + // Compute rows to look at by merging selection with input validity. + fullSelection, err := computeValidityAA(alloc, haystack.Validity(), selection) if err != nil { return nil, fmt.Errorf("apply selection to validity: %w", err) } @@ -172,10 +189,17 @@ func substrAS(alloc *memory.Allocator, haystack *columnar.UTF8, needle *columnar values := memory.NewBitmap(alloc, haystack.Len()) values.Resize(haystack.Len()) - for i := range iterTrue(validity, haystack.Len()) { + for i := range iterTrue(fullSelection, haystack.Len()) { values.Set(i, bytes.Contains(haystack.Get(i), needle.Value)) } + var validity memory.Bitmap + if haystack.Nulls() > 0 { + // Output validity is always based purely on input validity, not + // selection. + validity = memory.NewBitmap(alloc, haystack.Len()) + validity.AppendBitmap(haystack.Validity()) + } return columnar.NewBool(values, validity), nil } diff --git a/pkg/expr/evaluate_test.go b/pkg/expr/evaluate_test.go index 32a49021c8..5ec7930bd5 100644 --- a/pkg/expr/evaluate_test.go +++ b/pkg/expr/evaluate_test.go @@ -47,7 +47,7 @@ func TestEvaluate(t *testing.T) { result, err := expr.Evaluate(&alloc, e, record) require.NoError(t, err) - columnartest.RequireDatumsEqual(t, expect, result) + columnartest.RequireDatumsEqual(t, expect, result, memory.Bitmap{}) } func TestEvaluate_Constant(t *testing.T) { @@ -59,7 +59,7 @@ func TestEvaluate_Constant(t *testing.T) { result, err := expr.Evaluate(&alloc, e, nil) require.NoError(t, err) - columnartest.RequireDatumsEqual(t, expect, result) + columnartest.RequireDatumsEqual(t, expect, result, memory.Bitmap{}) } func TestEvaluate_Column(t *testing.T) { @@ -86,7 +86,7 @@ func TestEvaluate_Column(t *testing.T) { result, err := expr.Evaluate(&alloc, e, record) require.NoError(t, err) - columnartest.RequireDatumsEqual(t, expect, result) + columnartest.RequireDatumsEqual(t, expect, result, memory.Bitmap{}) }) t.Run("non-existing column", func(t *testing.T) { @@ -96,7 +96,7 @@ func TestEvaluate_Column(t *testing.T) { result, err := expr.Evaluate(&alloc, e, record) require.NoError(t, err) - columnartest.RequireDatumsEqual(t, expect, result) + columnartest.RequireDatumsEqual(t, expect, result, memory.Bitmap{}) }) } @@ -122,7 +122,7 @@ func TestEvaluate_Unary(t *testing.T) { result, err := expr.Evaluate(&alloc, e, record) require.NoError(t, err) - columnartest.RequireDatumsEqual(t, expect, result) + columnartest.RequireDatumsEqual(t, expect, result, memory.Bitmap{}) } func TestEvaluate_Binary(t *testing.T) { @@ -226,7 +226,7 @@ func TestEvaluate_Binary(t *testing.T) { result, err := expr.Evaluate(&alloc, e, record) require.NoError(t, err) - columnartest.RequireDatumsEqual(t, tt.expect, result) + columnartest.RequireDatumsEqual(t, tt.expect, result, memory.Bitmap{}) }) } } diff --git a/pkg/ingester/stream.go b/pkg/ingester/stream.go index 81db783bcf..c14f0d94cf 100644 --- a/pkg/ingester/stream.go +++ b/pkg/ingester/stream.go @@ -16,7 +16,6 @@ import ( "go.opentelemetry.io/otel/attribute" "go.opentelemetry.io/otel/trace" - pushtypes "github.com/grafana/loki/pkg/push" "github.com/grafana/loki/v3/pkg/chunkenc" "github.com/grafana/loki/v3/pkg/distributor/writefailures" "github.com/grafana/loki/v3/pkg/ingester/wal" @@ -30,6 +29,8 @@ import ( "github.com/grafana/loki/v3/pkg/util/flagext" util_log "github.com/grafana/loki/v3/pkg/util/log" "github.com/grafana/loki/v3/pkg/validation" + + pushtypes "github.com/grafana/loki/pkg/push" ) var ErrEntriesExist = errors.New("duplicate push - entries already exist")