chore: align benchmark results between chunks and dataobjs (#17127)

pull/17130/head
Robert Fratto 1 month ago committed by GitHub
parent 88beefb02a
commit bd4331363b
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
  1. 2
      pkg/dataobj/internal/sections/logs/iter.go
  2. 4
      pkg/dataobj/internal/sections/logs/iter_test.go
  3. 12
      pkg/dataobj/internal/sections/logs/logs_test.go
  4. 4
      pkg/dataobj/internal/sections/streams/iter.go
  5. 16
      pkg/dataobj/internal/sections/streams/streams_test.go
  6. 11
      pkg/dataobj/querier/iter.go
  7. 4
      pkg/dataobj/streams_reader_test.go
  8. 101
      pkg/logql/bench/bench_test.go

@ -117,7 +117,7 @@ func Decode(columns []*logsmd.ColumnDesc, row dataset.Row) (Record, error) {
if ty := columnValue.Type(); ty != datasetmd.VALUE_TYPE_INT64 {
return Record{}, fmt.Errorf("invalid type %s for %s", ty, column.Type)
}
record.Timestamp = time.Unix(0, columnValue.Int64()).UTC()
record.Timestamp = time.Unix(0, columnValue.Int64())
case logsmd.COLUMN_TYPE_METADATA:
if ty := columnValue.Type(); ty != datasetmd.VALUE_TYPE_STRING {

@ -40,7 +40,7 @@ func TestDecode(t *testing.T) {
},
expected: Record{
StreamID: 123,
Timestamp: time.Date(2009, 2, 13, 23, 31, 30, 0, time.UTC),
Timestamp: time.Unix(0, 1234567890000000000),
Metadata: labels.FromStrings("app", "test-app", "env", "prod"),
Line: []byte("test message"),
},
@ -63,7 +63,7 @@ func TestDecode(t *testing.T) {
},
expected: Record{
StreamID: 123,
Timestamp: time.Date(2009, 2, 13, 23, 31, 30, 0, time.UTC),
Timestamp: time.Unix(0, 1234567890000000000),
Metadata: labels.FromStrings(),
Line: []byte("test message"),
},

@ -17,19 +17,19 @@ func Test(t *testing.T) {
records := []logs.Record{
{
StreamID: 1,
Timestamp: time.Unix(10, 0).UTC(),
Timestamp: time.Unix(10, 0),
Metadata: nil,
Line: []byte("hello world"),
},
{
StreamID: 2,
Timestamp: time.Unix(100, 0).UTC(),
Timestamp: time.Unix(100, 0),
Metadata: labels.FromStrings("cluster", "test", "app", "bar"),
Line: []byte("goodbye world"),
},
{
StreamID: 1,
Timestamp: time.Unix(5, 0).UTC(),
Timestamp: time.Unix(5, 0),
Metadata: labels.FromStrings("cluster", "test", "app", "foo"),
Line: []byte("foo bar"),
},
@ -55,7 +55,7 @@ func Test(t *testing.T) {
expect := []logs.Record{
{
StreamID: 1,
Timestamp: time.Unix(5, 0).UTC(),
Timestamp: time.Unix(5, 0),
Metadata: labels.FromStrings(
"app", "foo",
"cluster", "test",
@ -64,13 +64,13 @@ func Test(t *testing.T) {
},
{
StreamID: 1,
Timestamp: time.Unix(10, 0).UTC(),
Timestamp: time.Unix(10, 0),
Metadata: labels.FromStrings(),
Line: []byte("hello world"),
},
{
StreamID: 2,
Timestamp: time.Unix(100, 0).UTC(),
Timestamp: time.Unix(100, 0),
Metadata: labels.FromStrings("app", "bar", "cluster", "test"),
Line: []byte("goodbye world"),
},

@ -111,13 +111,13 @@ func Decode(columns []*streamsmd.ColumnDesc, row dataset.Row) (Stream, error) {
if ty := columnValue.Type(); ty != datasetmd.VALUE_TYPE_INT64 {
return stream, fmt.Errorf("invalid type %s for %s", ty, column.Type)
}
stream.MinTimestamp = time.Unix(0, columnValue.Int64()).UTC()
stream.MinTimestamp = time.Unix(0, columnValue.Int64())
case streamsmd.COLUMN_TYPE_MAX_TIMESTAMP:
if ty := columnValue.Type(); ty != datasetmd.VALUE_TYPE_INT64 {
return stream, fmt.Errorf("invalid type %s for %s", ty, column.Type)
}
stream.MaxTimestamp = time.Unix(0, columnValue.Int64()).UTC()
stream.MaxTimestamp = time.Unix(0, columnValue.Int64())
case streamsmd.COLUMN_TYPE_ROWS:
if ty := columnValue.Type(); ty != datasetmd.VALUE_TYPE_INT64 {

@ -21,10 +21,10 @@ func Test(t *testing.T) {
}
tt := []ent{
{labels.FromStrings("cluster", "test", "app", "foo"), time.Unix(10, 0).UTC(), 10},
{labels.FromStrings("cluster", "test", "app", "bar", "special", "yes"), time.Unix(100, 0).UTC(), 20},
{labels.FromStrings("cluster", "test", "app", "foo"), time.Unix(15, 0).UTC(), 15},
{labels.FromStrings("cluster", "test", "app", "foo"), time.Unix(9, 0).UTC(), 5},
{labels.FromStrings("cluster", "test", "app", "foo"), time.Unix(10, 0), 10},
{labels.FromStrings("cluster", "test", "app", "bar", "special", "yes"), time.Unix(100, 0), 20},
{labels.FromStrings("cluster", "test", "app", "foo"), time.Unix(15, 0), 15},
{labels.FromStrings("cluster", "test", "app", "foo"), time.Unix(9, 0), 5},
}
tracker := streams.New(nil, 1024)
@ -39,16 +39,16 @@ func Test(t *testing.T) {
{
ID: 1,
Labels: labels.FromStrings("cluster", "test", "app", "foo"),
MinTimestamp: time.Unix(9, 0).UTC(),
MaxTimestamp: time.Unix(15, 0).UTC(),
MinTimestamp: time.Unix(9, 0),
MaxTimestamp: time.Unix(15, 0),
Rows: 3,
UncompressedSize: 30,
},
{
ID: 2,
Labels: labels.FromStrings("cluster", "test", "app", "bar", "special", "yes"),
MinTimestamp: time.Unix(100, 0).UTC(),
MaxTimestamp: time.Unix(100, 0).UTC(),
MinTimestamp: time.Unix(100, 0),
MaxTimestamp: time.Unix(100, 0),
Rows: 1,
UncompressedSize: 20,
},

@ -105,17 +105,14 @@ func newEntryIterator(ctx context.Context,
}
statistics.AddPostFilterLines(1)
var metadata []logproto.LabelAdapter
if len(record.Metadata) > 0 {
metadata = logproto.FromLabelsToLabelAdapters(record.Metadata)
}
top.Add(entryWithLabels{
Labels: parsedLabels.String(),
StreamHash: streamHash,
Entry: logproto.Entry{
Timestamp: record.Timestamp,
Line: string(line),
StructuredMetadata: metadata,
StructuredMetadata: logproto.FromLabelsToLabelAdapters(parsedLabels.StructuredMetadata()),
Parsed: logproto.FromLabelsToLabelAdapters(parsedLabels.Parsed()),
},
})
}
@ -202,11 +199,12 @@ func newTopK(k int, direction logproto.Direction) *topk {
panic("k must be greater than 0")
}
entries := entryWithLabelsPool.Get().(*[]entryWithLabels)
return &topk{
k: k,
minHeap: entryHeap{
less: lessFn(direction),
entries: *entries,
entries: (*entries)[:0],
},
}
}
@ -279,6 +277,7 @@ func (s *sliceIterator) StreamHash() uint64 {
}
func (s *sliceIterator) Close() error {
clear(s.entries)
entryWithLabelsPool.Put(&s.entries)
return nil
}

@ -92,9 +92,7 @@ func TestStreamsReader_AddLabelFilter(t *testing.T) {
require.Equal(t, expect, actual)
}
func unixTime(sec int64) time.Time {
return time.Unix(sec, 0).UTC()
}
func unixTime(sec int64) time.Time { return time.Unix(sec, 0) }
func buildStreamsObject(t *testing.T, pageSize int) *dataobj.Object {
t.Helper()

@ -2,8 +2,11 @@ package bench
import (
"context"
"flag"
"fmt"
"os"
"regexp"
"slices"
"strings"
"testing"
"time"
@ -13,12 +16,15 @@ import (
"github.com/grafana/dskit/user"
"github.com/prometheus/prometheus/promql"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
"github.com/grafana/loki/v3/pkg/logproto"
"github.com/grafana/loki/v3/pkg/logql"
)
var slowTests = flag.Bool("slow-tests", false, "run slow tests")
const testTenant = "test-tenant"
//go:generate go run ./cmd/generate/main.go -size 2147483648 -dir ./data -tenant test-tenant
@ -68,6 +74,101 @@ func setupBenchmarkWithStore(tb testing.TB, storeType string) (*logql.QueryEngin
return engine, config
}
// TestStorageEquality ensures that for each test case, all known storages
// return the same query result.
func TestStorageEquality(t *testing.T) {
ctx := user.InjectOrgID(t.Context(), testTenant)
if !*slowTests {
t.Skip("test skipped because -slow-tests flag is not set")
}
type store struct {
Name string
Cases []TestCase
Engine *logql.QueryEngine
}
generateStore := func(name string) *store {
engine, config := setupBenchmarkWithStore(t, name)
cases := config.GenerateTestCases()
return &store{
Name: name,
Cases: cases,
Engine: engine,
}
}
// Generate a list of stores. The first store name provided here is the one
// that acts as the baseline.
var stores []*store
for _, name := range []string{"chunk", "dataobj"} {
store := generateStore(name)
stores = append(stores, store)
}
if len(stores) < 2 {
t.Skipf("not enough stores to compare; need at least 2, got %d", len(stores))
}
baseStore := stores[0]
for _, baseCase := range baseStore.Cases {
t.Run(baseCase.Name(), func(t *testing.T) {
defer func() {
if t.Failed() {
t.Logf("Re-run just this test with -test.run='%s'", testNameRegex(t.Name()))
}
}()
t.Logf("Query information:\n%s", baseCase.Description())
params, err := logql.NewLiteralParams(
baseCase.Query,
baseCase.Start,
baseCase.End,
baseCase.Step,
0,
baseCase.Direction,
1000,
nil,
nil,
)
require.NoError(t, err)
expected, err := baseStore.Engine.Query(params).Exec(ctx)
require.NoError(t, err)
// Find matching test case in other stores and then compare results.
for _, store := range stores[1:] {
idx := slices.IndexFunc(store.Cases, func(tc TestCase) bool {
return tc == baseCase
})
if idx == -1 {
t.Logf("Store %s missing test case %s", store.Name, baseCase.Name())
continue
}
actual, err := store.Engine.Query(params).Exec(ctx)
if assert.NoError(t, err) {
assert.Equal(t, expected.Data, actual.Data, "store %q results do not match base store %q", store.Name, baseStore.Name)
}
}
})
}
}
// testNameRegex converts the test name into an argument that can be passed to
// -test.run.
func testNameRegex(name string) string {
// -test.run accepts a sequence of regexes separated by '/'. To pass a
// literal test name, we need to escape the regex characters in the name.
var newParts []string
for part := range strings.SplitSeq(name, "/") {
newParts = append(newParts, regexp.QuoteMeta(part))
}
return strings.Join(newParts, "/")
}
func TestLogQLQueries(t *testing.T) {
// We keep this test for debugging even though it's too slow for now.
t.Skip("Too slow for now.")

Loading…
Cancel
Save