Like Prometheus, but for logs.
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 
 
 
loki/pkg/dataobj/internal/dataset/reader_test.go

464 lines
12 KiB

package dataset
import (
"context"
"errors"
"io"
"slices"
"testing"
"github.com/stretchr/testify/require"
"github.com/grafana/loki/v3/pkg/dataobj/internal/metadata/datasetmd"
"github.com/grafana/loki/v3/pkg/dataobj/internal/result"
"github.com/grafana/loki/v3/pkg/logqlmodel/stats"
)
func Test_Reader_ReadAll(t *testing.T) {
dset, columns := buildTestDataset(t)
r := NewReader(ReaderOptions{Dataset: dset, Columns: columns})
defer r.Close()
actualRows, err := readDataset(r, 3)
require.NoError(t, err)
require.Equal(t, basicReaderTestData, convertToTestPersons(actualRows))
}
func Test_Reader_ReadWithPredicate(t *testing.T) {
dset, columns := buildTestDataset(t)
// Create a predicate that only returns people born after 1985
r := NewReader(ReaderOptions{
Dataset: dset,
Columns: columns,
Predicate: GreaterThanPredicate{
Column: columns[3], // birth_year column
Value: Int64Value(1985),
},
})
defer r.Close()
actualRows, err := readDataset(r, 3)
require.NoError(t, err)
// Filter expected data manually to verify
var expected []testPerson
for _, p := range basicReaderTestData {
if p.birthYear > 1985 {
expected = append(expected, p)
}
}
require.Equal(t, expected, convertToTestPersons(actualRows))
}
// Test_Reader_ReadWithPageFiltering tests that a Reader can filter rows based
// on a predicate that has filtered pages out.
func Test_Reader_ReadWithPageFiltering(t *testing.T) {
dset, columns := buildTestDataset(t)
r := NewReader(ReaderOptions{
Dataset: dset,
Columns: columns,
// Henry is out of range of most pages except for the first and last, so
// other pages would be filtered out of testing.
//
// TODO(rfratto): make it easier to prove that a predicate includes a value
// which is out of range of at least one page.
Predicate: EqualPredicate{
Column: columns[0], // first_name column
Value: ByteArrayValue([]byte("Henry")),
},
})
defer r.Close()
actualRows, err := readDataset(r, 3)
require.NoError(t, err)
// Filter expected data manually to verify
var expected []testPerson
for _, p := range basicReaderTestData {
if p.firstName == "Henry" {
expected = append(expected, p)
}
}
require.Equal(t, expected, convertToTestPersons(actualRows))
}
func Test_Reader_ReadWithPredicate_NoSecondary(t *testing.T) {
dset, columns := buildTestDataset(t)
// Create a predicate that only returns people born after 1985
r := NewReader(ReaderOptions{
Dataset: dset,
Columns: []Column{columns[3]},
Predicate: GreaterThanPredicate{
Column: columns[3], // birth_year column
Value: Int64Value(1985),
},
})
defer r.Close()
actualRows, err := readDataset(r, 3)
require.NoError(t, err)
// Filter expected data manually to verify
var expected []int
for _, p := range basicReaderTestData {
if p.birthYear > 1985 {
expected = append(expected, int(p.birthYear))
}
}
var actual []int
for _, row := range actualRows {
actual = append(actual, int(row.Values[0].Int64()))
}
require.Equal(t, expected, actual)
}
func Test_Reader_Reset(t *testing.T) {
dset, columns := buildTestDataset(t)
r := NewReader(ReaderOptions{Dataset: dset, Columns: columns})
defer r.Close()
// First read everything
_, err := readDataset(r, 3)
require.NoError(t, err)
// Reset and read again
r.Reset(ReaderOptions{Dataset: dset, Columns: columns})
actualRows, err := readDataset(r, 3)
require.NoError(t, err)
require.Equal(t, basicReaderTestData, convertToTestPersons(actualRows))
}
func Test_Reader_Stats(t *testing.T) {
dset, columns := buildTestDataset(t)
// Create a predicate that only returns people born after 1985
r := NewReader(ReaderOptions{
Dataset: dset,
Columns: columns,
Predicate: GreaterThanPredicate{
Column: columns[3], // birth_year column
Value: Int64Value(1985),
},
})
defer r.Close()
statsCtx, ctx := stats.NewContext(context.Background())
actualRows, err := readDatasetWithContext(ctx, r, 3)
require.NoError(t, err)
// Filter expected data manually to verify
var expected []testPerson
for _, p := range basicReaderTestData {
if p.birthYear > 1985 {
expected = append(expected, p)
}
}
require.Equal(t, expected, convertToTestPersons(actualRows))
primaryColumnBytes := int64(Int64Value(0).Size()) * int64(len(basicReaderTestData)) // Size of Int64Value * all rows
var totalBytestoFill int64
for _, row := range actualRows {
totalBytestoFill += row.Size()
}
totalBytestoFill -= int64(Int64Value(0).Size()) * int64(len(expected)) // remove already filled primary columns
// verify statistics
result := statsCtx.Result(0, 0, len(actualRows))
require.Equal(t, int64(len(basicReaderTestData)), result.Querier.Store.Dataobj.PrePredicateDecompressedRows)
require.Equal(t, int64(len(expected)), result.Querier.Store.Dataobj.PostPredicateRows)
require.Equal(t, primaryColumnBytes, result.Querier.Store.Dataobj.PrePredicateDecompressedBytes)
require.Equal(t, totalBytestoFill, result.Querier.Store.Dataobj.PostPredicateDecompressedBytes)
}
func Test_buildMask(t *testing.T) {
tt := []struct {
name string
fullRange rowRange
rows []Row
expect []rowRange
}{
{
name: "no rows",
fullRange: rowRange{1, 10},
rows: nil,
expect: []rowRange{{1, 10}},
},
{
name: "full coverage",
fullRange: rowRange{1, 10},
rows: makeRows(1, 10, 1),
expect: nil,
},
{
name: "full coverage - split",
fullRange: rowRange{1, 10},
rows: mergeRows(makeRows(1, 5, 1), makeRows(6, 10, 1)),
expect: nil,
},
{
name: "partial coverage - front",
fullRange: rowRange{1, 10},
rows: makeRows(1, 5, 1),
expect: []rowRange{{6, 10}},
},
{
name: "partial coverage - middle",
fullRange: rowRange{1, 10},
rows: makeRows(5, 7, 1),
expect: []rowRange{{1, 4}, {8, 10}},
},
{
name: "partial coverage - end",
fullRange: rowRange{1, 10},
rows: makeRows(6, 10, 1),
expect: []rowRange{{1, 5}},
},
{
name: "partial coverage - gaps",
fullRange: rowRange{1, 10},
rows: []Row{{Index: 3}, {Index: 5}, {Index: 7}, {Index: 9}},
expect: []rowRange{{1, 2}, {4, 4}, {6, 6}, {8, 8}, {10, 10}},
},
}
for _, tc := range tt {
t.Run(tc.name, func(t *testing.T) {
actual := slices.Collect(buildMask(tc.fullRange, tc.rows))
require.Equal(t, tc.expect, actual)
})
}
}
func makeRows(from, to, inc int) []Row {
var rows []Row
for i := from; i <= to; i += inc {
rows = append(rows, Row{Index: i})
}
return rows
}
func mergeRows(rows ...[]Row) []Row {
var res []Row
for _, r := range rows {
res = append(res, r...)
}
return res
}
// readDataset reads all rows from a Reader using the given batch size.
func readDataset(br *Reader, batchSize int) ([]Row, error) {
return readDatasetWithContext(context.Background(), br, batchSize)
}
// readDatasetWithContext reads all rows from a Reader using the given batch size and context.
func readDatasetWithContext(ctx context.Context, br *Reader, batchSize int) ([]Row, error) {
var (
all []Row
batch = make([]Row, batchSize)
)
for {
// Clear the batch for each read, to ensure that any memory in Row and
// Value doesn't get reused. See comment in implmentation of
// [readBasicReader] for more information.
clear(batch)
n, err := br.Read(ctx, batch)
all = append(all, batch[:n]...)
if errors.Is(err, io.EOF) {
return all, nil
} else if err != nil {
return all, err
}
}
}
func Test_BuildPredicateRanges(t *testing.T) {
ds, cols := buildMemDatasetWithStats(t)
tt := []struct {
name string
predicate Predicate
want rowRanges
}{
{
name: "nil predicate returns full range",
predicate: nil,
want: rowRanges{{Start: 0, End: 999}}, // Full dataset range
},
{
name: "equal predicate in range",
predicate: EqualPredicate{Column: cols[1], Value: Int64Value(50)},
want: rowRanges{{Start: 0, End: 249}}, // Page 1 of Timestamp column
},
{
name: "equal predicate not in any range",
predicate: EqualPredicate{Column: cols[1], Value: Int64Value(1500)},
want: nil, // No ranges should match
},
{
name: "greater than predicate",
predicate: GreaterThanPredicate{Column: cols[1], Value: Int64Value(400)},
want: rowRanges{{Start: 250, End: 749}, {Start: 750, End: 999}}, // Pages 2 and 3 of Timestamp column
},
{
name: "less than predicate",
predicate: LessThanPredicate{Column: cols[1], Value: Int64Value(300)},
want: rowRanges{{Start: 0, End: 249}, {Start: 250, End: 749}}, // Pages 1 and 2 of Timestamp column
},
{
name: "and predicate",
predicate: AndPredicate{
Left: EqualPredicate{Column: cols[0], Value: Int64Value(1)}, // Rows 0 - 299 of stream column
Right: LessThanPredicate{Column: cols[1], Value: Int64Value(600)}, // Rows 0 - 249, 250 - 749 of timestamp column
},
want: rowRanges{{Start: 0, End: 249}, {Start: 250, End: 299}},
},
{
name: "or predicate",
predicate: OrPredicate{
Left: EqualPredicate{Column: cols[0], Value: Int64Value(1)}, // Rows 0 - 299 of stream column
Right: GreaterThanPredicate{Column: cols[1], Value: Int64Value(800)}, // Rows 750 - 999 of timestamp column
},
want: rowRanges{{Start: 0, End: 299}, {Start: 750, End: 999}}, // Rows 0 - 299, 750 - 999
},
{
name: "InPredicate with values inside and outside page ranges",
predicate: InPredicate{
Column: cols[1], // timestamp column
Values: []Value{
Int64Value(50), // Inside page 1 (0-100)
Int64Value(300), // Inside page 2 (200-500)
Int64Value(150), // Outside all pages
Int64Value(600), // Outside all pages
},
},
want: rowRanges{
{Start: 0, End: 249}, // Page 1: contains 50
{Start: 250, End: 749}, // Page 2: contains 300
},
},
{
name: "InPredicate with values all outside page ranges",
predicate: InPredicate{
Column: cols[1], // timestamp column
Values: []Value{
Int64Value(150), // Outside all pages
Int64Value(600), // Outside all pages
},
},
want: nil, // No pages should be included
},
}
ctx := context.Background()
for _, tc := range tt {
t.Run(tc.name, func(t *testing.T) {
r := NewReader(ReaderOptions{
Dataset: ds,
Columns: cols,
Predicate: tc.predicate,
})
defer r.Close()
// Initialize downloader
require.NoError(t, r.initDownloader(ctx))
got, err := r.buildPredicateRanges(ctx, tc.predicate)
require.NoError(t, err)
require.Equal(t, tc.want, got, "row ranges should match expected ranges")
})
}
}
// buildMemDatasetWithStats creates a test dataset with only column and page stats.
func buildMemDatasetWithStats(t *testing.T) (Dataset, []Column) {
t.Helper()
dset := FromMemory([]*MemColumn{
{
Info: ColumnInfo{
Name: "stream",
Type: datasetmd.VALUE_TYPE_INT64,
RowsCount: 1000, // 0 - 999
},
Pages: []*MemPage{
{
Info: PageInfo{
RowCount: 300, // 0 - 299
Stats: &datasetmd.Statistics{
MinValue: encodeInt64Value(t, 1),
MaxValue: encodeInt64Value(t, 2),
},
},
},
{
Info: PageInfo{
RowCount: 700, // 300 - 999
Stats: &datasetmd.Statistics{
MinValue: encodeInt64Value(t, 2),
MaxValue: encodeInt64Value(t, 2),
},
},
},
},
},
{
Info: ColumnInfo{
Name: "timestamp",
Type: datasetmd.VALUE_TYPE_INT64,
RowsCount: 1000, // 0 - 999
},
Pages: []*MemPage{
{
Info: PageInfo{
RowCount: 250, // 0 - 249
Stats: &datasetmd.Statistics{
MinValue: encodeInt64Value(t, 0),
MaxValue: encodeInt64Value(t, 100),
},
},
},
{
Info: PageInfo{
RowCount: 500, // 249 - 749
Stats: &datasetmd.Statistics{
MinValue: encodeInt64Value(t, 200),
MaxValue: encodeInt64Value(t, 500),
},
},
},
{
Info: PageInfo{
RowCount: 250, // 750 - 999
Stats: &datasetmd.Statistics{
MinValue: encodeInt64Value(t, 800),
MaxValue: encodeInt64Value(t, 1000),
},
},
},
},
},
})
cols, err := result.Collect(dset.ListColumns(context.Background()))
require.NoError(t, err)
return dset, cols
}
// Helper function to encode an integer value for statistics
func encodeInt64Value(t *testing.T, v int64) []byte {
t.Helper()
data, err := Int64Value(v).MarshalBinary()
require.NoError(t, err)
return data
}