Like Prometheus, but for logs.
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 
 
 
loki/pkg/dataobj/sections/pointers/reader_test.go

692 lines
27 KiB

package pointers_test
import (
"bytes"
"context"
"errors"
"io"
"testing"
"github.com/apache/arrow-go/v18/arrow"
"github.com/apache/arrow-go/v18/arrow/array"
"github.com/apache/arrow-go/v18/arrow/memory"
"github.com/apache/arrow-go/v18/arrow/scalar"
"github.com/stretchr/testify/require"
"github.com/grafana/loki/v3/pkg/dataobj"
"github.com/grafana/loki/v3/pkg/dataobj/sections/pointers"
"github.com/grafana/loki/v3/pkg/util/arrowtest"
)
// TestReader does a basic end-to-end test over a reader with a predicate applied.
func TestReader(t *testing.T) {
sec := buildSection(t, []pointers.SectionPointer{
{Path: "path1", Section: 1, PointerKind: pointers.PointerKindStreamIndex, StreamID: 10, StreamIDRef: 100, StartTs: unixTime(10), EndTs: unixTime(20), LineCount: 5, UncompressedSize: 1024},
{Path: "path2", Section: 2, PointerKind: pointers.PointerKindStreamIndex, StreamID: 20, StreamIDRef: 200, StartTs: unixTime(30), EndTs: unixTime(40), LineCount: 10, UncompressedSize: 2048},
{Path: "path3", Section: 3, PointerKind: pointers.PointerKindStreamIndex, StreamID: 30, StreamIDRef: 300, StartTs: unixTime(50), EndTs: unixTime(60), LineCount: 15, UncompressedSize: 3072},
{Path: "path1", Section: 1, PointerKind: pointers.PointerKindColumnIndex, ColumnName: "col1", ColumnIndex: 0, ValuesBloomFilter: []byte{1, 2, 3}},
{Path: "path2", Section: 2, PointerKind: pointers.PointerKindColumnIndex, ColumnName: "col2", ColumnIndex: 1, ValuesBloomFilter: []byte{4, 5, 6}},
})
var (
pathCol = sec.Columns()[0]
sectionCol = sec.Columns()[1]
pointerKindCol = sec.Columns()[2]
streamIDCol = sec.Columns()[3]
streamIDRefCol = sec.Columns()[4]
minTimestampCol = sec.Columns()[5]
maxTimestampCol = sec.Columns()[6]
rowCountCol = sec.Columns()[7]
uncompressedCol = sec.Columns()[8]
columnNameCol = sec.Columns()[9]
columnIndexCol = sec.Columns()[10]
valuesBloomCol = sec.Columns()[11]
)
require.Equal(t, "path", pathCol.Name)
require.Equal(t, pointers.ColumnTypePath, pathCol.Type)
require.Equal(t, "", sectionCol.Name)
require.Equal(t, pointers.ColumnTypeSection, sectionCol.Type)
require.Equal(t, "", pointerKindCol.Name)
require.Equal(t, pointers.ColumnTypePointerKind, pointerKindCol.Type)
require.Equal(t, "", streamIDCol.Name)
require.Equal(t, pointers.ColumnTypeStreamID, streamIDCol.Type)
require.Equal(t, "", streamIDRefCol.Name)
require.Equal(t, pointers.ColumnTypeStreamIDRef, streamIDRefCol.Type)
require.Equal(t, "", minTimestampCol.Name)
require.Equal(t, pointers.ColumnTypeMinTimestamp, minTimestampCol.Type)
require.Equal(t, "", maxTimestampCol.Name)
require.Equal(t, pointers.ColumnTypeMaxTimestamp, maxTimestampCol.Type)
require.Equal(t, "", rowCountCol.Name)
require.Equal(t, pointers.ColumnTypeRowCount, rowCountCol.Type)
require.Equal(t, "", uncompressedCol.Name)
require.Equal(t, pointers.ColumnTypeUncompressedSize, uncompressedCol.Type)
require.Equal(t, "column_name", columnNameCol.Name)
require.Equal(t, pointers.ColumnTypeColumnName, columnNameCol.Type)
require.Equal(t, "", columnIndexCol.Name)
require.Equal(t, pointers.ColumnTypeColumnIndex, columnIndexCol.Type)
require.Equal(t, "values_bloom_filter", valuesBloomCol.Name)
require.Equal(t, pointers.ColumnTypeValuesBloomFilter, valuesBloomCol.Type)
for _, tt := range []struct {
name string
columns []*pointers.Column
expected arrowtest.Rows
}{
{
name: "basic reads with predicate",
columns: []*pointers.Column{pathCol, sectionCol, pointerKindCol, streamIDCol, streamIDRefCol},
expected: arrowtest.Rows{
{"path.path.utf8": "path1", "section.int64": int64(1), "pointer_kind.int64": int64(pointers.PointerKindStreamIndex), "stream_id.int64": int64(10), "stream_id_ref.int64": int64(100)},
{"path.path.utf8": "path2", "section.int64": int64(2), "pointer_kind.int64": int64(pointers.PointerKindStreamIndex), "stream_id.int64": int64(20), "stream_id_ref.int64": int64(200)},
},
},
// tests that the reader evaluates predicates correctly even when only some columns are selected for output
{
name: "reads with subset of columns",
columns: []*pointers.Column{pathCol, sectionCol, pointerKindCol, streamIDCol},
expected: arrowtest.Rows{
{"path.path.utf8": "path1", "section.int64": int64(1), "pointer_kind.int64": int64(pointers.PointerKindStreamIndex), "stream_id.int64": int64(10)},
{"path.path.utf8": "path2", "section.int64": int64(2), "pointer_kind.int64": int64(pointers.PointerKindStreamIndex), "stream_id.int64": int64(20)},
},
},
// tests reading all columns
{
name: "read all columns for stream pointers",
columns: []*pointers.Column{
pathCol, sectionCol, pointerKindCol, streamIDCol, streamIDRefCol,
minTimestampCol, maxTimestampCol, rowCountCol, uncompressedCol,
},
expected: arrowtest.Rows{
{
"path.path.utf8": "path1",
"section.int64": int64(1),
"pointer_kind.int64": int64(pointers.PointerKindStreamIndex),
"stream_id.int64": int64(10),
"stream_id_ref.int64": int64(100),
"min_timestamp.timestamp": unixTime(10).UTC(),
"max_timestamp.timestamp": unixTime(20).UTC(),
"row_count.int64": int64(2),
"uncompressed_size.int64": int64(1024),
},
{
"path.path.utf8": "path2",
"section.int64": int64(2),
"pointer_kind.int64": int64(pointers.PointerKindStreamIndex),
"stream_id.int64": int64(20),
"stream_id_ref.int64": int64(200),
"min_timestamp.timestamp": unixTime(30).UTC(),
"max_timestamp.timestamp": unixTime(40).UTC(),
"row_count.int64": int64(2),
"uncompressed_size.int64": int64(2048),
},
},
},
} {
t.Run(tt.name, func(t *testing.T) {
r := pointers.NewReader(pointers.ReaderOptions{
Columns: tt.columns,
Allocator: memory.DefaultAllocator,
Predicates: []pointers.Predicate{
pointers.InPredicate{
Column: streamIDCol,
Values: []scalar.Scalar{
scalar.NewInt64Scalar(10),
scalar.NewInt64Scalar(20),
},
},
pointers.EqualPredicate{
Column: pointerKindCol,
Value: scalar.NewInt64Scalar(int64(pointers.PointerKindStreamIndex)),
},
},
})
actualTable, err := readTable(context.Background(), r)
require.NoError(t, err)
actual, err := arrowtest.TableRows(memory.DefaultAllocator, actualTable)
require.NoError(t, err, "failed to get rows from table")
require.Equal(t, tt.expected, actual)
})
}
}
// TestReaderWithEqualPredicate tests reading with an EqualPredicate.
func TestReaderWithEqualPredicate(t *testing.T) {
sec := buildSection(t, []pointers.SectionPointer{
{Path: "path1", Section: 1, PointerKind: pointers.PointerKindStreamIndex, StreamID: 10, StreamIDRef: 100, StartTs: unixTime(10), EndTs: unixTime(20), LineCount: 5, UncompressedSize: 1024},
{Path: "path2", Section: 2, PointerKind: pointers.PointerKindStreamIndex, StreamID: 20, StreamIDRef: 200, StartTs: unixTime(30), EndTs: unixTime(40), LineCount: 10, UncompressedSize: 2048},
{Path: "path3", Section: 3, PointerKind: pointers.PointerKindStreamIndex, StreamID: 30, StreamIDRef: 300, StartTs: unixTime(50), EndTs: unixTime(60), LineCount: 15, UncompressedSize: 3072},
})
var (
pathCol = sec.Columns()[0]
sectionCol = sec.Columns()[1]
streamIDCol = sec.Columns()[3]
)
r := pointers.NewReader(pointers.ReaderOptions{
Columns: []*pointers.Column{pathCol, sectionCol, streamIDCol},
Allocator: memory.DefaultAllocator,
Predicates: []pointers.Predicate{
pointers.EqualPredicate{
Column: streamIDCol,
Value: scalar.NewInt64Scalar(20),
},
},
})
actualTable, err := readTable(context.Background(), r)
require.NoError(t, err)
actual, err := arrowtest.TableRows(memory.DefaultAllocator, actualTable)
require.NoError(t, err)
expected := arrowtest.Rows{
{"path.path.utf8": "path2", "section.int64": int64(2), "stream_id.int64": int64(20)},
}
require.Equal(t, expected, actual)
}
// TestReaderWithInPredicate tests reading with an InPredicate.
func TestReaderWithInPredicate(t *testing.T) {
sec := buildSection(t, []pointers.SectionPointer{
{Path: "path1", Section: 1, PointerKind: pointers.PointerKindStreamIndex, StreamID: 10, StreamIDRef: 100, StartTs: unixTime(10), EndTs: unixTime(20), LineCount: 5, UncompressedSize: 1024},
{Path: "path2", Section: 2, PointerKind: pointers.PointerKindStreamIndex, StreamID: 20, StreamIDRef: 200, StartTs: unixTime(30), EndTs: unixTime(40), LineCount: 10, UncompressedSize: 2048},
{Path: "path3", Section: 3, PointerKind: pointers.PointerKindStreamIndex, StreamID: 30, StreamIDRef: 300, StartTs: unixTime(50), EndTs: unixTime(60), LineCount: 15, UncompressedSize: 3072},
{Path: "path4", Section: 4, PointerKind: pointers.PointerKindStreamIndex, StreamID: 40, StreamIDRef: 400, StartTs: unixTime(70), EndTs: unixTime(80), LineCount: 20, UncompressedSize: 4096},
})
var (
pathCol = sec.Columns()[0]
sectionCol = sec.Columns()[1]
streamIDCol = sec.Columns()[3]
)
r := pointers.NewReader(pointers.ReaderOptions{
Columns: []*pointers.Column{pathCol, sectionCol, streamIDCol},
Allocator: memory.DefaultAllocator,
Predicates: []pointers.Predicate{
pointers.InPredicate{
Column: streamIDCol,
Values: []scalar.Scalar{
scalar.NewInt64Scalar(10),
scalar.NewInt64Scalar(30),
},
},
},
})
actualTable, err := readTable(context.Background(), r)
require.NoError(t, err)
actual, err := arrowtest.TableRows(memory.DefaultAllocator, actualTable)
require.NoError(t, err)
expected := arrowtest.Rows{
{"path.path.utf8": "path1", "section.int64": int64(1), "stream_id.int64": int64(10)},
{"path.path.utf8": "path3", "section.int64": int64(3), "stream_id.int64": int64(30)},
}
require.Equal(t, expected, actual)
}
// TestReaderWithGreaterThanPredicate tests reading with a GreaterThanPredicate.
func TestReaderWithGreaterThanPredicate(t *testing.T) {
sec := buildSection(t, []pointers.SectionPointer{
{Path: "path1", Section: 1, PointerKind: pointers.PointerKindStreamIndex, StreamID: 10, StreamIDRef: 100, StartTs: unixTime(10), EndTs: unixTime(20), LineCount: 5, UncompressedSize: 1024},
{Path: "path2", Section: 2, PointerKind: pointers.PointerKindStreamIndex, StreamID: 20, StreamIDRef: 200, StartTs: unixTime(30), EndTs: unixTime(40), LineCount: 10, UncompressedSize: 2048},
{Path: "path3", Section: 3, PointerKind: pointers.PointerKindStreamIndex, StreamID: 30, StreamIDRef: 300, StartTs: unixTime(50), EndTs: unixTime(60), LineCount: 15, UncompressedSize: 3072},
})
var (
pathCol = sec.Columns()[0]
sectionCol = sec.Columns()[1]
streamIDCol = sec.Columns()[3]
)
r := pointers.NewReader(pointers.ReaderOptions{
Columns: []*pointers.Column{pathCol, sectionCol, streamIDCol},
Allocator: memory.DefaultAllocator,
Predicates: []pointers.Predicate{
pointers.GreaterThanPredicate{
Column: streamIDCol,
Value: scalar.NewInt64Scalar(15),
},
},
})
actualTable, err := readTable(context.Background(), r)
require.NoError(t, err)
actual, err := arrowtest.TableRows(memory.DefaultAllocator, actualTable)
require.NoError(t, err)
expected := arrowtest.Rows{
{"path.path.utf8": "path2", "section.int64": int64(2), "stream_id.int64": int64(20)},
{"path.path.utf8": "path3", "section.int64": int64(3), "stream_id.int64": int64(30)},
}
require.Equal(t, expected, actual)
}
// TestReaderWithLessThanPredicate tests reading with a LessThanPredicate.
func TestReaderWithLessThanPredicate(t *testing.T) {
sec := buildSection(t, []pointers.SectionPointer{
{Path: "path1", Section: 1, PointerKind: pointers.PointerKindStreamIndex, StreamID: 10, StreamIDRef: 100, StartTs: unixTime(10), EndTs: unixTime(20), LineCount: 5, UncompressedSize: 1024},
{Path: "path2", Section: 2, PointerKind: pointers.PointerKindStreamIndex, StreamID: 20, StreamIDRef: 200, StartTs: unixTime(30), EndTs: unixTime(40), LineCount: 10, UncompressedSize: 2048},
{Path: "path3", Section: 3, PointerKind: pointers.PointerKindStreamIndex, StreamID: 30, StreamIDRef: 300, StartTs: unixTime(50), EndTs: unixTime(60), LineCount: 15, UncompressedSize: 3072},
})
var (
pathCol = sec.Columns()[0]
sectionCol = sec.Columns()[1]
streamIDCol = sec.Columns()[3]
)
r := pointers.NewReader(pointers.ReaderOptions{
Columns: []*pointers.Column{pathCol, sectionCol, streamIDCol},
Allocator: memory.DefaultAllocator,
Predicates: []pointers.Predicate{
pointers.LessThanPredicate{
Column: streamIDCol,
Value: scalar.NewInt64Scalar(25),
},
},
})
actualTable, err := readTable(context.Background(), r)
require.NoError(t, err)
actual, err := arrowtest.TableRows(memory.DefaultAllocator, actualTable)
require.NoError(t, err)
expected := arrowtest.Rows{
{"path.path.utf8": "path1", "section.int64": int64(1), "stream_id.int64": int64(10)},
{"path.path.utf8": "path2", "section.int64": int64(2), "stream_id.int64": int64(20)},
}
require.Equal(t, expected, actual)
}
// TestReaderWithTimestampPredicates tests reading with timestamp predicates.
func TestReaderWithTimestampPredicates(t *testing.T) {
var (
t10 = unixTime(10)
t20 = unixTime(20)
t25 = unixTime(25)
t25s = scalar.NewTimestampScalar(arrow.Timestamp(t25.UnixNano()), &arrow.TimestampType{Unit: arrow.Nanosecond})
t30 = unixTime(30)
t40 = unixTime(40)
t50 = unixTime(50)
t55 = unixTime(55)
t55s = scalar.NewTimestampScalar(arrow.Timestamp(t55.UnixNano()), &arrow.TimestampType{Unit: arrow.Nanosecond})
t60 = unixTime(60)
)
sec := buildSection(t, []pointers.SectionPointer{
{Path: "path1", Section: 1, PointerKind: pointers.PointerKindStreamIndex, StreamID: 10, StreamIDRef: 100, StartTs: t10, EndTs: t20, LineCount: 5, UncompressedSize: 1024},
{Path: "path2", Section: 2, PointerKind: pointers.PointerKindStreamIndex, StreamID: 20, StreamIDRef: 200, StartTs: t30, EndTs: t40, LineCount: 10, UncompressedSize: 2048},
{Path: "path3", Section: 3, PointerKind: pointers.PointerKindStreamIndex, StreamID: 30, StreamIDRef: 300, StartTs: t50, EndTs: t60, LineCount: 15, UncompressedSize: 3072},
})
var (
pathCol = sec.Columns()[0]
sectionCol = sec.Columns()[1]
minTimestampCol = sec.Columns()[5]
maxTimestampCol = sec.Columns()[6]
)
r := pointers.NewReader(pointers.ReaderOptions{
Columns: []*pointers.Column{pathCol, sectionCol, minTimestampCol, maxTimestampCol},
Allocator: memory.DefaultAllocator,
Predicates: []pointers.Predicate{
pointers.WhereTimeRangeOverlapsWith(minTimestampCol, maxTimestampCol, t25s, t55s),
},
})
actualTable, err := readTable(context.Background(), r)
require.NoError(t, err)
actual, err := arrowtest.TableRows(memory.DefaultAllocator, actualTable)
require.NoError(t, err)
expected := arrowtest.Rows{
{
"path.path.utf8": "path2",
"section.int64": int64(2),
"min_timestamp.timestamp": t30.UTC(),
"max_timestamp.timestamp": t40.UTC(),
},
{
"path.path.utf8": "path3",
"section.int64": int64(3),
"min_timestamp.timestamp": t50.UTC(),
"max_timestamp.timestamp": t60.UTC(),
},
}
require.Equal(t, expected, actual)
}
// TestReaderWithFuncPredicate tests reading with a FuncPredicate.
func TestReaderWithFuncPredicate(t *testing.T) {
sec := buildSection(t, []pointers.SectionPointer{
{Path: "path1", Section: 1, PointerKind: pointers.PointerKindStreamIndex, StreamID: 10, StreamIDRef: 100, StartTs: unixTime(10), EndTs: unixTime(20), LineCount: 5, UncompressedSize: 1024},
{Path: "path2", Section: 2, PointerKind: pointers.PointerKindStreamIndex, StreamID: 20, StreamIDRef: 200, StartTs: unixTime(30), EndTs: unixTime(40), LineCount: 10, UncompressedSize: 2048},
{Path: "path3", Section: 3, PointerKind: pointers.PointerKindStreamIndex, StreamID: 30, StreamIDRef: 300, StartTs: unixTime(50), EndTs: unixTime(60), LineCount: 15, UncompressedSize: 3072},
})
var (
pathCol = sec.Columns()[0]
sectionCol = sec.Columns()[1]
)
r := pointers.NewReader(pointers.ReaderOptions{
Columns: []*pointers.Column{pathCol, sectionCol},
Allocator: memory.DefaultAllocator,
Predicates: []pointers.Predicate{
pointers.FuncPredicate{
Column: pathCol,
Keep: func(_ *pointers.Column, value scalar.Scalar) bool {
if !value.IsValid() {
return false
}
bb := value.(*scalar.String).Value.Bytes()
return bytes.Equal(bb, []byte("path1")) || bytes.Equal(bb, []byte("path3"))
},
},
},
})
actualTable, err := readTable(context.Background(), r)
require.NoError(t, err)
actual, err := arrowtest.TableRows(memory.DefaultAllocator, actualTable)
require.NoError(t, err)
expected := arrowtest.Rows{
{"path.path.utf8": "path1", "section.int64": int64(1)},
{"path.path.utf8": "path3", "section.int64": int64(3)},
}
require.Equal(t, expected, actual)
}
// TestReaderWithAndPredicate tests reading with an AndPredicate.
func TestReaderWithAndPredicate(t *testing.T) {
sec := buildSection(t, []pointers.SectionPointer{
{Path: "path1", Section: 1, PointerKind: pointers.PointerKindStreamIndex, StreamID: 10, StreamIDRef: 100, StartTs: unixTime(10), EndTs: unixTime(20), LineCount: 5, UncompressedSize: 1024},
{Path: "path2", Section: 2, PointerKind: pointers.PointerKindStreamIndex, StreamID: 20, StreamIDRef: 200, StartTs: unixTime(30), EndTs: unixTime(40), LineCount: 10, UncompressedSize: 2048},
{Path: "path3", Section: 3, PointerKind: pointers.PointerKindStreamIndex, StreamID: 30, StreamIDRef: 300, StartTs: unixTime(50), EndTs: unixTime(60), LineCount: 15, UncompressedSize: 3072},
})
var (
pathCol = sec.Columns()[0]
sectionCol = sec.Columns()[1]
streamIDCol = sec.Columns()[3]
)
r := pointers.NewReader(pointers.ReaderOptions{
Columns: []*pointers.Column{pathCol, sectionCol, streamIDCol},
Allocator: memory.DefaultAllocator,
Predicates: []pointers.Predicate{
pointers.AndPredicate{
Left: pointers.GreaterThanPredicate{
Column: streamIDCol,
Value: scalar.NewInt64Scalar(5),
},
Right: pointers.LessThanPredicate{
Column: streamIDCol,
Value: scalar.NewInt64Scalar(25),
},
},
},
})
actualTable, err := readTable(context.Background(), r)
require.NoError(t, err)
actual, err := arrowtest.TableRows(memory.DefaultAllocator, actualTable)
require.NoError(t, err)
expected := arrowtest.Rows{
{"path.path.utf8": "path1", "section.int64": int64(1), "stream_id.int64": int64(10)},
{"path.path.utf8": "path2", "section.int64": int64(2), "stream_id.int64": int64(20)},
}
require.Equal(t, expected, actual)
}
// TestReaderWithOrPredicate tests reading with an OrPredicate.
func TestReaderWithOrPredicate(t *testing.T) {
sec := buildSection(t, []pointers.SectionPointer{
{Path: "path1", Section: 1, PointerKind: pointers.PointerKindStreamIndex, StreamID: 10, StreamIDRef: 100, StartTs: unixTime(10), EndTs: unixTime(20), LineCount: 5, UncompressedSize: 1024},
{Path: "path2", Section: 2, PointerKind: pointers.PointerKindStreamIndex, StreamID: 20, StreamIDRef: 200, StartTs: unixTime(30), EndTs: unixTime(40), LineCount: 10, UncompressedSize: 2048},
{Path: "path3", Section: 3, PointerKind: pointers.PointerKindStreamIndex, StreamID: 30, StreamIDRef: 300, StartTs: unixTime(50), EndTs: unixTime(60), LineCount: 15, UncompressedSize: 3072},
})
var (
pathCol = sec.Columns()[0]
sectionCol = sec.Columns()[1]
streamIDCol = sec.Columns()[3]
)
r := pointers.NewReader(pointers.ReaderOptions{
Columns: []*pointers.Column{pathCol, sectionCol, streamIDCol},
Allocator: memory.DefaultAllocator,
Predicates: []pointers.Predicate{
pointers.OrPredicate{
Left: pointers.EqualPredicate{
Column: streamIDCol,
Value: scalar.NewInt64Scalar(10),
},
Right: pointers.EqualPredicate{
Column: streamIDCol,
Value: scalar.NewInt64Scalar(30),
},
},
},
})
actualTable, err := readTable(context.Background(), r)
require.NoError(t, err)
actual, err := arrowtest.TableRows(memory.DefaultAllocator, actualTable)
require.NoError(t, err)
expected := arrowtest.Rows{
{"path.path.utf8": "path1", "section.int64": int64(1), "stream_id.int64": int64(10)},
{"path.path.utf8": "path3", "section.int64": int64(3), "stream_id.int64": int64(30)},
}
require.Equal(t, expected, actual)
}
// TestReaderWithNotPredicate tests reading with a NotPredicate.
func TestReaderWithNotPredicate(t *testing.T) {
sec := buildSection(t, []pointers.SectionPointer{
{Path: "path1", Section: 1, PointerKind: pointers.PointerKindStreamIndex, StreamID: 10, StreamIDRef: 100, StartTs: unixTime(10), EndTs: unixTime(20), LineCount: 5, UncompressedSize: 1024},
{Path: "path2", Section: 2, PointerKind: pointers.PointerKindStreamIndex, StreamID: 20, StreamIDRef: 200, StartTs: unixTime(30), EndTs: unixTime(40), LineCount: 10, UncompressedSize: 2048},
{Path: "path3", Section: 3, PointerKind: pointers.PointerKindStreamIndex, StreamID: 30, StreamIDRef: 300, StartTs: unixTime(50), EndTs: unixTime(60), LineCount: 15, UncompressedSize: 3072},
})
var (
pathCol = sec.Columns()[0]
sectionCol = sec.Columns()[1]
streamIDCol = sec.Columns()[3]
)
r := pointers.NewReader(pointers.ReaderOptions{
Columns: []*pointers.Column{pathCol, sectionCol, streamIDCol},
Allocator: memory.DefaultAllocator,
Predicates: []pointers.Predicate{
pointers.NotPredicate{
Inner: pointers.EqualPredicate{
Column: streamIDCol,
Value: scalar.NewInt64Scalar(20),
},
},
},
})
actualTable, err := readTable(context.Background(), r)
require.NoError(t, err)
actual, err := arrowtest.TableRows(memory.DefaultAllocator, actualTable)
require.NoError(t, err)
expected := arrowtest.Rows{
{"path.path.utf8": "path1", "section.int64": int64(1), "stream_id.int64": int64(10)},
{"path.path.utf8": "path3", "section.int64": int64(3), "stream_id.int64": int64(30)},
}
require.Equal(t, expected, actual)
}
// TestReaderWithColumnIndexPointers tests reading column index pointers.
func TestReaderWithColumnIndexPointers(t *testing.T) {
sec := buildSection(t, []pointers.SectionPointer{
{Path: "path1", Section: 1, PointerKind: pointers.PointerKindColumnIndex, ColumnName: "col1", ColumnIndex: 0, ValuesBloomFilter: []byte{1, 2, 3}},
{Path: "path2", Section: 2, PointerKind: pointers.PointerKindColumnIndex, ColumnName: "col2", ColumnIndex: 1, ValuesBloomFilter: []byte{4, 5, 6}},
{Path: "path3", Section: 3, PointerKind: pointers.PointerKindColumnIndex, ColumnName: "col3", ColumnIndex: 2, ValuesBloomFilter: []byte{7, 8, 9}},
})
cols := sec.Columns()
require.Len(t, cols, 6, "expected 6 columns for column index pointers")
var (
pathCol = cols[0]
sectionCol = cols[1]
pointerKindCol = cols[2]
columnNameCol = cols[3]
columnIndexCol = cols[4]
valuesBloomCol = cols[5]
)
r := pointers.NewReader(pointers.ReaderOptions{
Columns: []*pointers.Column{
pathCol, sectionCol, pointerKindCol, columnNameCol, columnIndexCol, valuesBloomCol,
},
Allocator: memory.DefaultAllocator,
Predicates: []pointers.Predicate{
pointers.EqualPredicate{
Column: pointerKindCol,
Value: scalar.NewInt64Scalar(int64(pointers.PointerKindColumnIndex)),
},
},
})
actualTable, err := readTable(context.Background(), r)
require.NoError(t, err)
actual, err := arrowtest.TableRows(memory.DefaultAllocator, actualTable)
require.NoError(t, err)
expected := arrowtest.Rows{
{
"path.path.utf8": "path1",
"section.int64": int64(1),
"pointer_kind.int64": int64(pointers.PointerKindColumnIndex),
"column_name.column_name.utf8": "col1",
"column_index.int64": int64(0),
"values_bloom_filter.values_bloom_filter.binary": []byte{1, 2, 3},
},
{
"path.path.utf8": "path2",
"section.int64": int64(2),
"pointer_kind.int64": int64(pointers.PointerKindColumnIndex),
"column_name.column_name.utf8": "col2",
"column_index.int64": int64(1),
"values_bloom_filter.values_bloom_filter.binary": []byte{4, 5, 6},
},
{
"path.path.utf8": "path3",
"section.int64": int64(3),
"pointer_kind.int64": int64(pointers.PointerKindColumnIndex),
"column_name.column_name.utf8": "col3",
"column_index.int64": int64(2),
"values_bloom_filter.values_bloom_filter.binary": []byte{7, 8, 9},
},
}
require.Equal(t, expected, actual)
}
// TestReaderWithMixedPointers tests reading both stream and column index pointers.
func TestReaderWithMixedPointers(t *testing.T) {
sec := buildSection(t, []pointers.SectionPointer{
{Path: "path1", Section: 1, PointerKind: pointers.PointerKindStreamIndex, StreamID: 10, StreamIDRef: 100, StartTs: unixTime(10), EndTs: unixTime(20), LineCount: 5, UncompressedSize: 1024},
{Path: "path2", Section: 2, PointerKind: pointers.PointerKindColumnIndex, ColumnName: "col1", ColumnIndex: 0, ValuesBloomFilter: []byte{1, 2, 3}},
{Path: "path3", Section: 3, PointerKind: pointers.PointerKindStreamIndex, StreamID: 20, StreamIDRef: 200, StartTs: unixTime(30), EndTs: unixTime(40), LineCount: 10, UncompressedSize: 2048},
})
var (
pathCol = sec.Columns()[0]
sectionCol = sec.Columns()[1]
pointerKindCol = sec.Columns()[2]
)
// Read all pointer kinds without filtering
r := pointers.NewReader(pointers.ReaderOptions{
Columns: []*pointers.Column{pathCol, sectionCol, pointerKindCol},
Allocator: memory.DefaultAllocator,
Predicates: []pointers.Predicate{},
})
actualTable, err := readTable(context.Background(), r)
require.NoError(t, err)
actual, err := arrowtest.TableRows(memory.DefaultAllocator, actualTable)
require.NoError(t, err)
expected := arrowtest.Rows{
{"path.path.utf8": "path1", "section.int64": int64(1), "pointer_kind.int64": int64(pointers.PointerKindStreamIndex)},
{"path.path.utf8": "path3", "section.int64": int64(3), "pointer_kind.int64": int64(pointers.PointerKindStreamIndex)},
{"path.path.utf8": "path2", "section.int64": int64(2), "pointer_kind.int64": int64(pointers.PointerKindColumnIndex)},
}
require.Equal(t, expected, actual)
}
func buildSection(t *testing.T, ptrData []pointers.SectionPointer) *pointers.Section {
t.Helper()
sectionBuilder := pointers.NewBuilder(nil, 0, 2)
for _, ptr := range ptrData {
if ptr.PointerKind == pointers.PointerKindStreamIndex {
sectionBuilder.ObserveStream(ptr.Path, ptr.Section, ptr.StreamIDRef, ptr.StreamID, ptr.StartTs, ptr.UncompressedSize)
sectionBuilder.ObserveStream(ptr.Path, ptr.Section, ptr.StreamIDRef, ptr.StreamID, ptr.EndTs, 0)
} else {
sectionBuilder.RecordColumnIndex(ptr.Path, ptr.Section, ptr.ColumnName, ptr.ColumnIndex, ptr.ValuesBloomFilter)
}
}
objectBuilder := dataobj.NewBuilder(nil)
require.NoError(t, objectBuilder.Append(sectionBuilder))
obj, closer, err := objectBuilder.Flush()
require.NoError(t, err)
t.Cleanup(func() { closer.Close() })
sec, err := pointers.Open(t.Context(), obj.Sections()[0])
require.NoError(t, err)
return sec
}
func readTable(ctx context.Context, r *pointers.Reader) (arrow.Table, error) {
var recs []arrow.RecordBatch
for {
rec, err := r.Read(ctx, 128)
if rec != nil {
if rec.NumRows() > 0 {
recs = append(recs, rec)
}
}
if err != nil && errors.Is(err, io.EOF) {
break
} else if err != nil {
return nil, err
}
}
if len(recs) == 0 {
return nil, io.EOF
}
return array.NewTableFromRecords(recs[0].Schema(), recs), nil
}