mirror of https://github.com/grafana/loki
You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
301 lines
8.0 KiB
301 lines
8.0 KiB
package array_test
|
|
|
|
import (
|
|
"errors"
|
|
"fmt"
|
|
"io"
|
|
"math/rand"
|
|
"testing"
|
|
|
|
"github.com/stretchr/testify/require"
|
|
|
|
"github.com/grafana/loki/v3/pkg/columnar"
|
|
"github.com/grafana/loki/v3/pkg/columnar/columnartest"
|
|
"github.com/grafana/loki/v3/pkg/columnar/types"
|
|
"github.com/grafana/loki/v3/pkg/dataset/array"
|
|
"github.com/grafana/loki/v3/pkg/dataset/buffer"
|
|
"github.com/grafana/loki/v3/pkg/memory"
|
|
)
|
|
|
|
func TestZstdCodec_Validation(t *testing.T) {
|
|
tt := []struct {
|
|
name string
|
|
spec array.Spec
|
|
typ types.Type
|
|
|
|
expectError bool
|
|
}{
|
|
{
|
|
name: "accepts valid non-nullable utf8",
|
|
spec: &array.SpecZstd{Offsets: &array.SpecPlain{}},
|
|
typ: &types.UTF8{Nullable: false},
|
|
expectError: false,
|
|
},
|
|
{
|
|
name: "rejects missing offsets spec",
|
|
spec: &array.SpecZstd{Offsets: nil},
|
|
typ: &types.UTF8{Nullable: false},
|
|
expectError: true,
|
|
},
|
|
{
|
|
name: "rejects non-nullable utf8 with validity spec",
|
|
spec: &array.SpecZstd{Offsets: &array.SpecPlain{}, Validity: &array.SpecBool{}},
|
|
typ: &types.UTF8{Nullable: false},
|
|
expectError: true,
|
|
},
|
|
{
|
|
name: "rejects nullable utf8 with no validity spec",
|
|
spec: &array.SpecZstd{Offsets: &array.SpecPlain{}},
|
|
typ: &types.UTF8{Nullable: true},
|
|
expectError: true,
|
|
},
|
|
{
|
|
name: "accepts nullable utf8 with validity spec",
|
|
spec: &array.SpecZstd{Offsets: &array.SpecPlain{}, Validity: &array.SpecBool{}},
|
|
typ: &types.UTF8{Nullable: true},
|
|
expectError: false,
|
|
},
|
|
{
|
|
name: "rejects unsupported type bool",
|
|
spec: &array.SpecZstd{Offsets: &array.SpecPlain{}},
|
|
typ: &types.Bool{Nullable: false},
|
|
expectError: true,
|
|
},
|
|
{
|
|
name: "rejects unsupported type int64",
|
|
spec: &array.SpecZstd{Offsets: &array.SpecPlain{}},
|
|
typ: &types.Int64{Nullable: false},
|
|
expectError: true,
|
|
},
|
|
}
|
|
|
|
for _, tc := range tt {
|
|
t.Run(tc.name, func(t *testing.T) {
|
|
var alloc memory.Allocator
|
|
_, err := array.NewWriter(&alloc, tc.spec, tc.typ)
|
|
|
|
if tc.expectError {
|
|
require.Error(t, err)
|
|
} else {
|
|
require.NoError(t, err)
|
|
}
|
|
})
|
|
}
|
|
}
|
|
|
|
func TestZstdCodec_NonNullable(t *testing.T) {
|
|
var alloc memory.Allocator
|
|
var store buffer.MemoryStore
|
|
|
|
var (
|
|
spec = &array.SpecZstd{Offsets: &array.SpecPlain{}}
|
|
typ = &types.UTF8{Nullable: false}
|
|
)
|
|
|
|
w, err := array.NewWriter(&alloc, spec, typ)
|
|
require.NoError(t, err)
|
|
|
|
input := []columnar.Array{
|
|
columnartest.Array(t, types.KindUTF8, &alloc, "hello", "world", "", "foo", "bar"),
|
|
columnartest.Array(t, types.KindUTF8, &alloc, "baz", "qux", "quux", "corge", "grault"),
|
|
}
|
|
for _, arr := range input {
|
|
require.NoError(t, w.Append(arr))
|
|
}
|
|
|
|
result, err := w.Flush(t.Context(), &store)
|
|
require.NoError(t, err)
|
|
require.Equal(t, 10, result.RowCount)
|
|
require.Equal(t, 0, result.Stats.NullCount)
|
|
|
|
r, err := array.NewReader(&alloc, result, &store)
|
|
require.NoError(t, err)
|
|
|
|
expect := columnartest.Array(
|
|
t, types.KindUTF8, &alloc,
|
|
"hello", "world", "", "foo", "bar",
|
|
"baz", "qux", "quux", "corge", "grault",
|
|
)
|
|
|
|
actual := readBatches(t, &alloc, r, 2) // Read in a small batch size to test reading multiple times
|
|
columnartest.RequireArraysEqual(t, expect, actual, memory.Bitmap{})
|
|
|
|
// Reading again should produce a EOF.
|
|
_, err = r.Read(t.Context(), &alloc, 1)
|
|
require.ErrorIs(t, err, io.EOF)
|
|
}
|
|
|
|
func TestZstdCodec_Nullable(t *testing.T) {
|
|
var alloc memory.Allocator
|
|
var store buffer.MemoryStore
|
|
|
|
var (
|
|
spec = &array.SpecZstd{Offsets: &array.SpecPlain{}, Validity: &array.SpecBool{}}
|
|
typ = &types.UTF8{Nullable: true}
|
|
)
|
|
|
|
w, err := array.NewWriter(&alloc, spec, typ)
|
|
require.NoError(t, err)
|
|
|
|
input := []columnar.Array{
|
|
columnartest.Array(t, types.KindUTF8, &alloc, "hello", nil, "world", nil, "foo"),
|
|
columnartest.Array(t, types.KindUTF8, &alloc, "bar", "baz", "qux", "quux", "corge"),
|
|
columnartest.Array(t, types.KindUTF8, &alloc, nil),
|
|
}
|
|
for _, arr := range input {
|
|
require.NoError(t, w.Append(arr))
|
|
}
|
|
|
|
result, err := w.Flush(t.Context(), &store)
|
|
require.NoError(t, err)
|
|
require.Equal(t, 11, result.RowCount)
|
|
require.Equal(t, 3, result.Stats.NullCount)
|
|
|
|
r, err := array.NewReader(&alloc, result, &store)
|
|
require.NoError(t, err)
|
|
|
|
expect := columnartest.Array(
|
|
t, types.KindUTF8, &alloc,
|
|
"hello", nil, "world", nil, "foo",
|
|
"bar", "baz", "qux", "quux", "corge",
|
|
nil,
|
|
)
|
|
|
|
actual := readBatches(t, &alloc, r, 2) // Read in a small batch size to test reading multiple times
|
|
columnartest.RequireArraysEqual(t, expect, actual, memory.Bitmap{})
|
|
|
|
// Reading again should produce a EOF.
|
|
_, err = r.Read(t.Context(), &alloc, 1)
|
|
require.ErrorIs(t, err, io.EOF)
|
|
}
|
|
|
|
func TestZstdCodec_SlicedInput(t *testing.T) {
|
|
var alloc memory.Allocator
|
|
var store buffer.MemoryStore
|
|
|
|
var (
|
|
spec = &array.SpecZstd{Offsets: &array.SpecPlain{}}
|
|
typ = &types.UTF8{Nullable: false}
|
|
)
|
|
|
|
// Build a full array then slice out a middle section, so the input has
|
|
// non-zero-based offsets and a shared data buffer.
|
|
full := columnartest.Array(t, types.KindUTF8, &alloc,
|
|
"alpha", "bravo", "charlie", "delta", "echo",
|
|
)
|
|
sliced := full.Slice(1, 4) // ["bravo", "charlie", "delta"]
|
|
|
|
w, err := array.NewWriter(&alloc, spec, typ)
|
|
require.NoError(t, err)
|
|
require.NoError(t, w.Append(sliced))
|
|
|
|
result, err := w.Flush(t.Context(), &store)
|
|
require.NoError(t, err)
|
|
require.Equal(t, 3, result.RowCount)
|
|
|
|
r, err := array.NewReader(&alloc, result, &store)
|
|
require.NoError(t, err)
|
|
|
|
expect := columnartest.Array(t, types.KindUTF8, &alloc,
|
|
"bravo", "charlie", "delta",
|
|
)
|
|
|
|
actual := readBatches(t, &alloc, r, 2) // Read in a small batch size to test reading multiple times
|
|
columnartest.RequireArraysEqual(t, expect, actual, memory.Bitmap{})
|
|
}
|
|
|
|
func BenchmarkZstdCodec(b *testing.B) {
|
|
var (
|
|
store buffer.MemoryStore
|
|
|
|
spec = &array.SpecZstd{Offsets: &array.SpecPlain{}}
|
|
typ = &types.UTF8{Nullable: false}
|
|
)
|
|
|
|
const valuesPerPage = 1 << 16
|
|
|
|
type scenario struct {
|
|
name string
|
|
valueCount int
|
|
encoded array.Array
|
|
}
|
|
|
|
build := func(name string, valueCount int, valueAt func(i int) []byte) scenario {
|
|
var alloc memory.Allocator
|
|
w, err := array.NewWriter(&alloc, spec, typ)
|
|
require.NoError(b, err)
|
|
|
|
builder := columnar.NewUTF8Builder(&alloc)
|
|
builder.Grow(valueCount)
|
|
|
|
for i := range valueCount {
|
|
builder.AppendValue(valueAt(i))
|
|
}
|
|
require.NoError(b, w.Append(builder.Build()))
|
|
|
|
arr, err := w.Flush(b.Context(), &store)
|
|
require.NoError(b, err)
|
|
return scenario{name: name, valueCount: valueCount, encoded: arr}
|
|
}
|
|
|
|
scenarios := []scenario{
|
|
build("variance=constant", valuesPerPage, func(int) []byte { return []byte("hello") }),
|
|
func() scenario {
|
|
rnd := rand.New(rand.NewSource(0))
|
|
return build("variance=random", valuesPerPage, func(int) []byte {
|
|
buf := make([]byte, 5+rnd.Intn(20))
|
|
rnd.Read(buf)
|
|
return buf
|
|
})
|
|
}(),
|
|
}
|
|
|
|
batchSizes := []int{256, 1024, 4096}
|
|
|
|
for _, sc := range scenarios {
|
|
b.Run(sc.name, func(b *testing.B) {
|
|
b.Run(fmt.Sprintf("values_per_page=%d", sc.valueCount), func(b *testing.B) {
|
|
for _, batchSize := range batchSizes {
|
|
b.Run(fmt.Sprintf("batch_size=%d", batchSize), func(b *testing.B) {
|
|
var alloc memory.Allocator
|
|
|
|
b.ReportAllocs()
|
|
// Approximate bytes: data + offsets
|
|
decodedBytesPerOp := int64(sc.encoded.RowCount) * 15 // ~15 bytes avg string
|
|
b.SetBytes(decodedBytesPerOp)
|
|
|
|
for b.Loop() {
|
|
alloc.Reset()
|
|
|
|
r, _ := array.NewReader(&alloc, sc.encoded, &store)
|
|
|
|
var decoded int
|
|
for {
|
|
arr, err := r.Read(b.Context(), &alloc, batchSize)
|
|
if arr != nil {
|
|
decoded += arr.Len()
|
|
}
|
|
|
|
if errors.Is(err, io.EOF) {
|
|
break
|
|
} else if err != nil {
|
|
b.Fatal(err)
|
|
}
|
|
}
|
|
|
|
if decoded != sc.valueCount {
|
|
b.Fatalf("decoded %d values, expected %d", decoded, sc.valueCount)
|
|
}
|
|
}
|
|
|
|
elapsed := b.Elapsed()
|
|
if elapsed > 0 {
|
|
totalDecoded := int64(sc.valueCount) * int64(b.N)
|
|
b.ReportMetric(float64(totalDecoded)/elapsed.Seconds(), "rows/s")
|
|
}
|
|
})
|
|
}
|
|
})
|
|
})
|
|
}
|
|
}
|
|
|