mirror of https://github.com/grafana/loki
You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
287 lines
9.8 KiB
287 lines
9.8 KiB
package index
|
|
|
|
import (
|
|
"context"
|
|
"math"
|
|
"testing"
|
|
"time"
|
|
|
|
"github.com/prometheus/prometheus/model/labels"
|
|
"github.com/stretchr/testify/require"
|
|
|
|
"github.com/grafana/loki/v3/pkg/dataobj/sections/logs"
|
|
"github.com/grafana/loki/v3/pkg/dataobj/sections/postings"
|
|
)
|
|
|
|
// makeColumnValuesStats creates a logs.Stats with a set of metadata columns
|
|
// as if Prepare would receive them from a real logs section.
|
|
func makeColumnValuesStats(metadataColumns []string) logs.Stats {
|
|
var stats logs.Stats
|
|
// Non-metadata columns come first (streamid, timestamp, message).
|
|
stats.Columns = append(stats.Columns, logs.ColumnStats{
|
|
Name: "stream_id",
|
|
Type: "stream_id",
|
|
ColumnIndex: 0,
|
|
Cardinality: 10,
|
|
})
|
|
stats.Columns = append(stats.Columns, logs.ColumnStats{
|
|
Name: "timestamp",
|
|
Type: "timestamp",
|
|
ColumnIndex: 1,
|
|
Cardinality: 100,
|
|
})
|
|
for i, name := range metadataColumns {
|
|
stats.Columns = append(stats.Columns, logs.ColumnStats{
|
|
Name: name,
|
|
Type: "metadata",
|
|
ColumnIndex: int64(2 + i),
|
|
Cardinality: 5,
|
|
})
|
|
}
|
|
stats.Columns = append(stats.Columns, logs.ColumnStats{
|
|
Name: "message",
|
|
Type: "message",
|
|
ColumnIndex: int64(2 + len(metadataColumns)),
|
|
Cardinality: 50,
|
|
})
|
|
return stats
|
|
}
|
|
|
|
func TestColumnValuesCalculation_BloomPostingAppended(t *testing.T) {
|
|
builder := newTestIndexBuilder(t)
|
|
calcCtx := &logsCalculationContext{
|
|
tenantID: "tenant-1",
|
|
objectPath: "test/path/obj1",
|
|
sectionIdx: 0,
|
|
streamLabels: makeTestStreamLabels(),
|
|
builder: builder,
|
|
}
|
|
|
|
calc := &columnValuesCalculation{}
|
|
stat := makeColumnValuesStats([]string{"trace_id", "span_id"})
|
|
require.NoError(t, calc.Prepare(context.Background(), calcCtx, nil, stat))
|
|
|
|
ts1 := time.Unix(10, 0).UTC()
|
|
ts2 := time.Unix(20, 0).UTC()
|
|
ts3 := time.Unix(15, 0).UTC()
|
|
|
|
line1 := []byte("hello from stream 1")
|
|
line2 := []byte("hello again")
|
|
|
|
// Stream 1 has both trace_id and span_id metadata.
|
|
// Stream 2 has only trace_id metadata.
|
|
batch := []logs.Record{
|
|
{
|
|
StreamID: 1,
|
|
Timestamp: ts1,
|
|
Line: line1,
|
|
Metadata: labels.FromStrings("trace_id", "abc", "span_id", "111"),
|
|
},
|
|
{
|
|
StreamID: 2,
|
|
Timestamp: ts2,
|
|
Line: line2,
|
|
Metadata: labels.FromStrings("trace_id", "def"),
|
|
},
|
|
{
|
|
StreamID: 1,
|
|
Timestamp: ts3,
|
|
Line: line1,
|
|
Metadata: labels.FromStrings("trace_id", "ghi"),
|
|
},
|
|
}
|
|
|
|
require.NoError(t, calc.ProcessBatch(context.Background(), calcCtx, batch))
|
|
require.NoError(t, calc.Flush(context.Background(), calcCtx))
|
|
|
|
tbl := flushAndReadAllPostingsTable(t, builder)
|
|
|
|
// We expect 2 bloom postings: one for trace_id, one for span_id.
|
|
i := findRow(tbl.rows, map[string]any{
|
|
"kind.int64": int64(postings.KindBloom),
|
|
"column_name.utf8": "trace_id",
|
|
})
|
|
require.NotEqual(t, -1, i, "expected bloom posting for trace_id")
|
|
|
|
j := findRow(tbl.rows, map[string]any{
|
|
"kind.int64": int64(postings.KindBloom),
|
|
"column_name.utf8": "span_id",
|
|
})
|
|
require.NotEqual(t, -1, j, "expected bloom posting for span_id")
|
|
|
|
// Bloom filter bytes should be non-empty.
|
|
require.NotEmpty(t, tbl.opaque["bloom_filter.binary"][i], "expected non-empty bloom filter for trace_id")
|
|
require.NotEmpty(t, tbl.opaque["bloom_filter.binary"][j], "expected non-empty bloom filter for span_id")
|
|
|
|
// Stream ID bitmap should be non-empty.
|
|
require.NotEmpty(t, tbl.opaque["stream_id_bitmap.binary"][i], "expected non-empty stream bitmap for trace_id")
|
|
require.NotEmpty(t, tbl.opaque["stream_id_bitmap.binary"][j], "expected non-empty stream bitmap for span_id")
|
|
}
|
|
|
|
func TestColumnValuesCalculation_TimestampsAndSizes(t *testing.T) {
|
|
builder := newTestIndexBuilder(t)
|
|
calcCtx := &logsCalculationContext{
|
|
tenantID: "tenant-1",
|
|
objectPath: "test/path/obj1",
|
|
sectionIdx: 0,
|
|
streamLabels: makeTestStreamLabels(),
|
|
builder: builder,
|
|
}
|
|
|
|
calc := &columnValuesCalculation{}
|
|
stat := makeColumnValuesStats([]string{"trace_id"})
|
|
require.NoError(t, calc.Prepare(context.Background(), calcCtx, nil, stat))
|
|
|
|
ts1 := time.Unix(10, 0).UTC()
|
|
ts2 := time.Unix(20, 0).UTC()
|
|
ts3 := time.Unix(30, 0).UTC() // latest timestamp — proves third record is processed
|
|
|
|
line1 := []byte("first")
|
|
line2 := []byte("second line")
|
|
line3 := []byte("third")
|
|
|
|
batch := []logs.Record{
|
|
{StreamID: 1, Timestamp: ts1, Line: line1, Metadata: labels.FromStrings("trace_id", "aaa")},
|
|
{StreamID: 2, Timestamp: ts2, Line: line2, Metadata: labels.FromStrings("trace_id", "bbb")},
|
|
{StreamID: 1, Timestamp: ts3, Line: line3, Metadata: labels.FromStrings("trace_id", "ccc")},
|
|
}
|
|
|
|
require.NoError(t, calc.ProcessBatch(context.Background(), calcCtx, batch))
|
|
require.NoError(t, calc.Flush(context.Background(), calcCtx))
|
|
|
|
tbl := flushAndReadAllPostingsTable(t, builder)
|
|
|
|
i := findRow(tbl.rows, map[string]any{
|
|
"kind.int64": int64(postings.KindBloom),
|
|
"column_name.utf8": "trace_id",
|
|
})
|
|
require.NotEqual(t, -1, i, "expected bloom posting for trace_id")
|
|
|
|
row := tbl.rows[i]
|
|
// Timestamps: min=ts1 (10s), max=ts3 (30s) — ts3 is the latest, proving the
|
|
// third record's timestamp was tracked.
|
|
require.Equal(t, ts1.UTC(), row["min_timestamp.timestamp"])
|
|
require.Equal(t, ts3.UTC(), row["max_timestamp.timestamp"])
|
|
|
|
// Size: sum of line lengths for records that have trace_id metadata.
|
|
expectedSize := int64(len(line1) + len(line2) + len(line3))
|
|
require.Equal(t, expectedSize, row["uncompressed_size.int64"])
|
|
}
|
|
|
|
func TestColumnValuesCalculation_StreamIDBitmapBitsSet(t *testing.T) {
|
|
builder := newTestIndexBuilder(t)
|
|
calcCtx := &logsCalculationContext{
|
|
tenantID: "tenant-1",
|
|
objectPath: "test/path/obj1",
|
|
sectionIdx: 0,
|
|
streamLabels: makeTestStreamLabels(),
|
|
builder: builder,
|
|
}
|
|
|
|
calc := &columnValuesCalculation{}
|
|
stat := makeColumnValuesStats([]string{"trace_id"})
|
|
require.NoError(t, calc.Prepare(context.Background(), calcCtx, nil, stat))
|
|
|
|
// Only stream 1 has trace_id metadata; stream 2 does not.
|
|
batch := []logs.Record{
|
|
{StreamID: 1, Timestamp: time.Unix(10, 0).UTC(), Line: []byte("a"), Metadata: labels.FromStrings("trace_id", "x")},
|
|
{StreamID: 2, Timestamp: time.Unix(20, 0).UTC(), Line: []byte("b")}, // no trace_id
|
|
}
|
|
|
|
require.NoError(t, calc.ProcessBatch(context.Background(), calcCtx, batch))
|
|
require.NoError(t, calc.Flush(context.Background(), calcCtx))
|
|
|
|
tbl := flushAndReadAllPostingsTable(t, builder)
|
|
|
|
i := findRow(tbl.rows, map[string]any{
|
|
"kind.int64": int64(postings.KindBloom),
|
|
"column_name.utf8": "trace_id",
|
|
})
|
|
require.NotEqual(t, -1, i, "expected bloom posting for trace_id")
|
|
|
|
// Bitmap should have bit 1 set (stream ID 1 has trace_id).
|
|
require.NotEmpty(t, tbl.opaque["stream_id_bitmap.binary"][i])
|
|
}
|
|
|
|
func TestColumnValuesCalculation_EmptyBatch(t *testing.T) {
|
|
builder := newTestIndexBuilder(t)
|
|
calcCtx := &logsCalculationContext{
|
|
tenantID: "tenant-1",
|
|
objectPath: "test/path/obj1",
|
|
sectionIdx: 0,
|
|
streamLabels: makeTestStreamLabels(),
|
|
builder: builder,
|
|
}
|
|
|
|
calc := &columnValuesCalculation{}
|
|
stat := makeColumnValuesStats([]string{"trace_id"})
|
|
require.NoError(t, calc.Prepare(context.Background(), calcCtx, nil, stat))
|
|
require.NoError(t, calc.ProcessBatch(context.Background(), calcCtx, nil))
|
|
require.NoError(t, calc.Flush(context.Background(), calcCtx))
|
|
|
|
// The column was registered during Prepare, so Flush still appends a bloom
|
|
// posting for trace_id — but with empty data since no records were processed.
|
|
tbl := flushAndReadAllPostingsTable(t, builder)
|
|
|
|
i := findRow(tbl.rows, map[string]any{
|
|
"kind.int64": int64(postings.KindBloom),
|
|
"column_name.utf8": "trace_id",
|
|
})
|
|
require.NotEqual(t, -1, i, "expected bloom posting for trace_id even with empty batch")
|
|
|
|
row := tbl.rows[i]
|
|
// With the bloom aggregator, an unobserved but prepared column uses sentinel values:
|
|
// MinTimestamp = math.MaxInt64 and MaxTimestamp = math.MinInt64.
|
|
// These are stored and read back as time.Time values.
|
|
sentinelMinTimestamp := time.Unix(0, math.MaxInt64).UTC() // unobserved min starts at max possible
|
|
sentinelMaxTimestamp := time.Unix(0, math.MinInt64).UTC() // unobserved max starts at min possible
|
|
require.Equal(t, sentinelMinTimestamp, row["min_timestamp.timestamp"], "no records means sentinel max int64 for min timestamp")
|
|
require.Equal(t, sentinelMaxTimestamp, row["max_timestamp.timestamp"], "no records means sentinel min int64 for max timestamp")
|
|
require.Equal(t, int64(0), row["uncompressed_size.int64"], "no records means zero size")
|
|
}
|
|
|
|
func TestColumnValuesCalculation_MultipleBatches(t *testing.T) {
|
|
builder := newTestIndexBuilder(t)
|
|
calcCtx := &logsCalculationContext{
|
|
tenantID: "tenant-1",
|
|
objectPath: "test/path/obj1",
|
|
sectionIdx: 0,
|
|
streamLabels: makeTestStreamLabels(),
|
|
builder: builder,
|
|
}
|
|
|
|
calc := &columnValuesCalculation{}
|
|
stat := makeColumnValuesStats([]string{"trace_id"})
|
|
require.NoError(t, calc.Prepare(context.Background(), calcCtx, nil, stat))
|
|
|
|
ts1 := time.Unix(10, 0).UTC()
|
|
ts2 := time.Unix(30, 0).UTC()
|
|
|
|
batch1 := []logs.Record{
|
|
{StreamID: 1, Timestamp: ts1, Line: []byte("first"), Metadata: labels.FromStrings("trace_id", "aaa")},
|
|
}
|
|
batch2 := []logs.Record{
|
|
{StreamID: 2, Timestamp: ts2, Line: []byte("second"), Metadata: labels.FromStrings("trace_id", "bbb")},
|
|
}
|
|
|
|
require.NoError(t, calc.ProcessBatch(context.Background(), calcCtx, batch1))
|
|
require.NoError(t, calc.ProcessBatch(context.Background(), calcCtx, batch2))
|
|
require.NoError(t, calc.Flush(context.Background(), calcCtx))
|
|
|
|
tbl := flushAndReadAllPostingsTable(t, builder)
|
|
|
|
i := findRow(tbl.rows, map[string]any{
|
|
"kind.int64": int64(postings.KindBloom),
|
|
"column_name.utf8": "trace_id",
|
|
})
|
|
require.NotEqual(t, -1, i, "expected bloom posting for trace_id")
|
|
|
|
row := tbl.rows[i]
|
|
// Timestamps should span both batches.
|
|
require.Equal(t, ts1.UTC(), row["min_timestamp.timestamp"])
|
|
require.Equal(t, ts2.UTC(), row["max_timestamp.timestamp"])
|
|
|
|
// Size: both lines.
|
|
expectedSize := int64(len("first") + len("second"))
|
|
require.Equal(t, expectedSize, row["uncompressed_size.int64"])
|
|
}
|
|
|