chore: align benchmark results between chunks and dataobjs (#17127)

1 month ago · bd4331363b
parent 88beefb02a
commit bd4331363b
8 changed files with 126 additions and 28 deletions
--- a/pkg/dataobj/internal/sections/logs/iter.go
+++ b/pkg/dataobj/internal/sections/logs/iter.go
@ -117,7 +117,7 @@ func Decode(columns []*logsmd.ColumnDesc, row dataset.Row) (Record, error) {
 			if ty := columnValue.Type(); ty != datasetmd.VALUE_TYPE_INT64 {
 				return Record{}, fmt.Errorf("invalid type %s for %s", ty, column.Type)
 			}
-			record.Timestamp = time.Unix(0, columnValue.Int64()).UTC()
+			record.Timestamp = time.Unix(0, columnValue.Int64())

 		case logsmd.COLUMN_TYPE_METADATA:
 			if ty := columnValue.Type(); ty != datasetmd.VALUE_TYPE_STRING {
--- a/pkg/dataobj/internal/sections/logs/iter_test.go
+++ b/pkg/dataobj/internal/sections/logs/iter_test.go
@ -40,7 +40,7 @@ func TestDecode(t *testing.T) {
 			},
 			expected: Record{
 				StreamID:  123,
-				Timestamp: time.Date(2009, 2, 13, 23, 31, 30, 0, time.UTC),
+				Timestamp: time.Unix(0, 1234567890000000000),
 				Metadata:  labels.FromStrings("app", "test-app", "env", "prod"),
 				Line:      []byte("test message"),
 			},
@ -63,7 +63,7 @@ func TestDecode(t *testing.T) {
 			},
 			expected: Record{
 				StreamID:  123,
-				Timestamp: time.Date(2009, 2, 13, 23, 31, 30, 0, time.UTC),
+				Timestamp: time.Unix(0, 1234567890000000000),
 				Metadata:  labels.FromStrings(),
 				Line:      []byte("test message"),
 			},
--- a/pkg/dataobj/internal/sections/logs/logs_test.go
+++ b/pkg/dataobj/internal/sections/logs/logs_test.go
@ -17,19 +17,19 @@ func Test(t *testing.T) {
 	records := []logs.Record{
 		{
 			StreamID:  1,
-			Timestamp: time.Unix(10, 0).UTC(),
+			Timestamp: time.Unix(10, 0),
 			Metadata:  nil,
 			Line:      []byte("hello world"),
 		},
 		{
 			StreamID:  2,
-			Timestamp: time.Unix(100, 0).UTC(),
+			Timestamp: time.Unix(100, 0),
 			Metadata:  labels.FromStrings("cluster", "test", "app", "bar"),
 			Line:      []byte("goodbye world"),
 		},
 		{
 			StreamID:  1,
-			Timestamp: time.Unix(5, 0).UTC(),
+			Timestamp: time.Unix(5, 0),
 			Metadata:  labels.FromStrings("cluster", "test", "app", "foo"),
 			Line:      []byte("foo bar"),
 		},
@ -55,7 +55,7 @@ func Test(t *testing.T) {
 	expect := []logs.Record{
 		{
 			StreamID:  1,
-			Timestamp: time.Unix(5, 0).UTC(),
+			Timestamp: time.Unix(5, 0),
 			Metadata: labels.FromStrings(
 				"app", "foo",
 				"cluster", "test",
@ -64,13 +64,13 @@ func Test(t *testing.T) {
 		},
 		{
 			StreamID:  1,
-			Timestamp: time.Unix(10, 0).UTC(),
+			Timestamp: time.Unix(10, 0),
 			Metadata:  labels.FromStrings(),
 			Line:      []byte("hello world"),
 		},
 		{
 			StreamID:  2,
-			Timestamp: time.Unix(100, 0).UTC(),
+			Timestamp: time.Unix(100, 0),
 			Metadata:  labels.FromStrings("app", "bar", "cluster", "test"),
 			Line:      []byte("goodbye world"),
 		},
--- a/pkg/dataobj/internal/sections/streams/iter.go
+++ b/pkg/dataobj/internal/sections/streams/iter.go
@ -111,13 +111,13 @@ func Decode(columns []*streamsmd.ColumnDesc, row dataset.Row) (Stream, error) {
 			if ty := columnValue.Type(); ty != datasetmd.VALUE_TYPE_INT64 {
 				return stream, fmt.Errorf("invalid type %s for %s", ty, column.Type)
 			}
-			stream.MinTimestamp = time.Unix(0, columnValue.Int64()).UTC()
+			stream.MinTimestamp = time.Unix(0, columnValue.Int64())

 		case streamsmd.COLUMN_TYPE_MAX_TIMESTAMP:
 			if ty := columnValue.Type(); ty != datasetmd.VALUE_TYPE_INT64 {
 				return stream, fmt.Errorf("invalid type %s for %s", ty, column.Type)
 			}
-			stream.MaxTimestamp = time.Unix(0, columnValue.Int64()).UTC()
+			stream.MaxTimestamp = time.Unix(0, columnValue.Int64())

 		case streamsmd.COLUMN_TYPE_ROWS:
 			if ty := columnValue.Type(); ty != datasetmd.VALUE_TYPE_INT64 {
--- a/pkg/dataobj/internal/sections/streams/streams_test.go
+++ b/pkg/dataobj/internal/sections/streams/streams_test.go
@ -21,10 +21,10 @@ func Test(t *testing.T) {
 	}

 	tt := []ent{
-		{labels.FromStrings("cluster", "test", "app", "foo"), time.Unix(10, 0).UTC(), 10},
-		{labels.FromStrings("cluster", "test", "app", "bar", "special", "yes"), time.Unix(100, 0).UTC(), 20},
-		{labels.FromStrings("cluster", "test", "app", "foo"), time.Unix(15, 0).UTC(), 15},
-		{labels.FromStrings("cluster", "test", "app", "foo"), time.Unix(9, 0).UTC(), 5},
+		{labels.FromStrings("cluster", "test", "app", "foo"), time.Unix(10, 0), 10},
+		{labels.FromStrings("cluster", "test", "app", "bar", "special", "yes"), time.Unix(100, 0), 20},
+		{labels.FromStrings("cluster", "test", "app", "foo"), time.Unix(15, 0), 15},
+		{labels.FromStrings("cluster", "test", "app", "foo"), time.Unix(9, 0), 5},
 	}

 	tracker := streams.New(nil, 1024)
@ -39,16 +39,16 @@ func Test(t *testing.T) {
 		{
 			ID:               1,
 			Labels:           labels.FromStrings("cluster", "test", "app", "foo"),
-			MinTimestamp:     time.Unix(9, 0).UTC(),
-			MaxTimestamp:     time.Unix(15, 0).UTC(),
+			MinTimestamp:     time.Unix(9, 0),
+			MaxTimestamp:     time.Unix(15, 0),
 			Rows:             3,
 			UncompressedSize: 30,
 		},
 		{
 			ID:               2,
 			Labels:           labels.FromStrings("cluster", "test", "app", "bar", "special", "yes"),
-			MinTimestamp:     time.Unix(100, 0).UTC(),
-			MaxTimestamp:     time.Unix(100, 0).UTC(),
+			MinTimestamp:     time.Unix(100, 0),
+			MaxTimestamp:     time.Unix(100, 0),
 			Rows:             1,
 			UncompressedSize: 20,
 		},
--- a/pkg/dataobj/querier/iter.go
+++ b/pkg/dataobj/querier/iter.go
@ -105,17 +105,14 @@ func newEntryIterator(ctx context.Context,
 			}
 			statistics.AddPostFilterLines(1)

-			var metadata []logproto.LabelAdapter
-			if len(record.Metadata) > 0 {
-				metadata = logproto.FromLabelsToLabelAdapters(record.Metadata)
-			}
 			top.Add(entryWithLabels{
 				Labels:     parsedLabels.String(),
 				StreamHash: streamHash,
 				Entry: logproto.Entry{
 					Timestamp:          record.Timestamp,
 					Line:               string(line),
-					StructuredMetadata: metadata,
+					StructuredMetadata: logproto.FromLabelsToLabelAdapters(parsedLabels.StructuredMetadata()),
+					Parsed:             logproto.FromLabelsToLabelAdapters(parsedLabels.Parsed()),
 				},
 			})
 		}
@ -202,11 +199,12 @@ func newTopK(k int, direction logproto.Direction) *topk {
 		panic("k must be greater than 0")
 	}
 	entries := entryWithLabelsPool.Get().(*[]entryWithLabels)
+
 	return &topk{
 		k: k,
 		minHeap: entryHeap{
 			less:    lessFn(direction),
-			entries: *entries,
+			entries: (*entries)[:0],
 		},
 	}
 }
@ -279,6 +277,7 @@ func (s *sliceIterator) StreamHash() uint64 {
 }

 func (s *sliceIterator) Close() error {
+	clear(s.entries)
 	entryWithLabelsPool.Put(&s.entries)
 	return nil
 }
--- a/pkg/dataobj/streams_reader_test.go
+++ b/pkg/dataobj/streams_reader_test.go
@ -92,9 +92,7 @@ func TestStreamsReader_AddLabelFilter(t *testing.T) {
 	require.Equal(t, expect, actual)
 }

-func unixTime(sec int64) time.Time {
-	return time.Unix(sec, 0).UTC()
-}
+func unixTime(sec int64) time.Time { return time.Unix(sec, 0) }

 func buildStreamsObject(t *testing.T, pageSize int) *dataobj.Object {
 	t.Helper()
--- a/pkg/logql/bench/bench_test.go
+++ b/pkg/logql/bench/bench_test.go
@ -2,8 +2,11 @@ package bench

 import (
 	"context"
+	"flag"
 	"fmt"
 	"os"
+	"regexp"
+	"slices"
 	"strings"
 	"testing"
 	"time"
@ -13,12 +16,15 @@ import (
 	"github.com/grafana/dskit/user"
 	"github.com/prometheus/prometheus/promql"

+	"github.com/stretchr/testify/assert"
 	"github.com/stretchr/testify/require"

 	"github.com/grafana/loki/v3/pkg/logproto"
 	"github.com/grafana/loki/v3/pkg/logql"
 )

+var slowTests = flag.Bool("slow-tests", false, "run slow tests")
+
 const testTenant = "test-tenant"

 //go:generate go run ./cmd/generate/main.go -size 2147483648 -dir ./data -tenant test-tenant
@ -68,6 +74,101 @@ func setupBenchmarkWithStore(tb testing.TB, storeType string) (*logql.QueryEngin
 	return engine, config
 }

+// TestStorageEquality ensures that for each test case, all known storages
+// return the same query result.
+func TestStorageEquality(t *testing.T) {
+	ctx := user.InjectOrgID(t.Context(), testTenant)
+
+	if !*slowTests {
+		t.Skip("test skipped because -slow-tests flag is not set")
+	}
+
+	type store struct {
+		Name   string
+		Cases  []TestCase
+		Engine *logql.QueryEngine
+	}
+
+	generateStore := func(name string) *store {
+		engine, config := setupBenchmarkWithStore(t, name)
+		cases := config.GenerateTestCases()
+
+		return &store{
+			Name:   name,
+			Cases:  cases,
+			Engine: engine,
+		}
+	}
+
+	// Generate a list of stores. The first store name provided here is the one
+	// that acts as the baseline.
+	var stores []*store
+	for _, name := range []string{"chunk", "dataobj"} {
+		store := generateStore(name)
+		stores = append(stores, store)
+	}
+	if len(stores) < 2 {
+		t.Skipf("not enough stores to compare; need at least 2, got %d", len(stores))
+	}
+
+	baseStore := stores[0]
+	for _, baseCase := range baseStore.Cases {
+		t.Run(baseCase.Name(), func(t *testing.T) {
+			defer func() {
+				if t.Failed() {
+					t.Logf("Re-run just this test with -test.run='%s'", testNameRegex(t.Name()))
+				}
+			}()
+
+			t.Logf("Query information:\n%s", baseCase.Description())
+
+			params, err := logql.NewLiteralParams(
+				baseCase.Query,
+				baseCase.Start,
+				baseCase.End,
+				baseCase.Step,
+				0,
+				baseCase.Direction,
+				1000,
+				nil,
+				nil,
+			)
+			require.NoError(t, err)
+
+			expected, err := baseStore.Engine.Query(params).Exec(ctx)
+			require.NoError(t, err)
+
+			// Find matching test case in other stores and then compare results.
+			for _, store := range stores[1:] {
+				idx := slices.IndexFunc(store.Cases, func(tc TestCase) bool {
+					return tc == baseCase
+				})
+				if idx == -1 {
+					t.Logf("Store %s missing test case %s", store.Name, baseCase.Name())
+					continue
+				}
+
+				actual, err := store.Engine.Query(params).Exec(ctx)
+				if assert.NoError(t, err) {
+					assert.Equal(t, expected.Data, actual.Data, "store %q results do not match base store %q", store.Name, baseStore.Name)
+				}
+			}
+		})
+	}
+}
+
+// testNameRegex converts the test name into an argument that can be passed to
+// -test.run.
+func testNameRegex(name string) string {
+	// -test.run accepts a sequence of regexes separated by '/'. To pass a
+	// literal test name, we need to escape the regex characters in the name.
+	var newParts []string
+	for part := range strings.SplitSeq(name, "/") {
+		newParts = append(newParts, regexp.QuoteMeta(part))
+	}
+	return strings.Join(newParts, "/")
+}
+
 func TestLogQLQueries(t *testing.T) {
 	// We keep this test for debugging even though it's too slow for now.
 	t.Skip("Too slow for now.")