TSDB Index reuses slices, adds pools (#5630)

* adds a pool for ChunkMetas * index takes slice pointers for allocation reasons and adds pools for all index types * reuse slices in tsdb benchmarks * adds count to bench script * properly returns series to pool * more ergonomic index signatures while still supporting slice reuse * tsdb index documentation * aligns tsdb-map tooling with new index signatures
3 years ago · 3f28a33d93
parent d084dc7f4b
commit 3f28a33d93
8 changed files with 131 additions and 54 deletions
--- a/pkg/storage/tsdb/index.go
+++ b/pkg/storage/tsdb/index.go
@ -32,8 +32,19 @@ func (r ChunkRef) Less(x ChunkRef) bool {

 type Index interface {
 	Bounded
-	GetChunkRefs(ctx context.Context, userID string, from, through model.Time, shard *index.ShardAnnotation, matchers ...*labels.Matcher) ([]ChunkRef, error)
-	Series(ctx context.Context, userID string, from, through model.Time, shard *index.ShardAnnotation, matchers ...*labels.Matcher) ([]Series, error)
+	// GetChunkRefs accepts an optional []ChunkRef argument.
+	// If not nil, it will use that slice to build the result,
+	// allowing us to avoid unnecessary allocations at the caller's discretion.
+	// If nil, the underlying index implementation is required
+	// to build the resulting slice nonetheless (it should not panic),
+	// ideally by requesting a slice from the pool.
+	// Shard is also optional. If not nil, TSDB will limit the result to
+	// the requested shard. If it is nil, TSDB will return all results,
+	// regardless of shard.
+	// Note: any shard used must be a valid factor of two, meaning `0_of_2` and `3_of_4` are fine, but `0_of_3` is not.
+	GetChunkRefs(ctx context.Context, userID string, from, through model.Time, res []ChunkRef, shard *index.ShardAnnotation, matchers ...*labels.Matcher) ([]ChunkRef, error)
+	// Series follows the same semantics regarding the passed slice and shard as GetChunkRefs.
+	Series(ctx context.Context, userID string, from, through model.Time, res []Series, shard *index.ShardAnnotation, matchers ...*labels.Matcher) ([]Series, error)
 	LabelNames(ctx context.Context, userID string, from, through model.Time, matchers ...*labels.Matcher) ([]string, error)
 	LabelValues(ctx context.Context, userID string, from, through model.Time, name string, matchers ...*labels.Matcher) ([]string, error)
 }
--- a/pkg/storage/tsdb/multi_file_index.go
+++ b/pkg/storage/tsdb/multi_file_index.go
@ -92,80 +92,72 @@ func (i *MultiIndex) forIndices(ctx context.Context, from, through model.Time, f
 	return results, nil
 }

-func (i *MultiIndex) GetChunkRefs(ctx context.Context, userID string, from, through model.Time, shard *index.ShardAnnotation, matchers ...*labels.Matcher) ([]ChunkRef, error) {
+func (i *MultiIndex) GetChunkRefs(ctx context.Context, userID string, from, through model.Time, res []ChunkRef, shard *index.ShardAnnotation, matchers ...*labels.Matcher) ([]ChunkRef, error) {
+	if res == nil {
+		res = ChunkRefsPool.Get()
+	}
+	res = res[:0]
+
 	groups, err := i.forIndices(ctx, from, through, func(ctx context.Context, idx Index) (interface{}, error) {
-		return idx.GetChunkRefs(ctx, userID, from, through, shard, matchers...)
+		return idx.GetChunkRefs(ctx, userID, from, through, nil, shard, matchers...)
 	})

 	if err != nil {
 		return nil, err
 	}

-	var maxLn int // maximum number of chunk refs, assuming no duplicates
-	refGroups := make([][]ChunkRef, 0, len(i.indices))
-	for _, group := range groups {
-		rg := group.([]ChunkRef)
-		maxLn += len(rg)
-		refGroups = append(refGroups, rg)
-	}
-	// optimistically allocate the maximum length slice
-	// to avoid growing incrementally
-	results := make([]ChunkRef, 0, maxLn)
-
 	// keep track of duplicates
 	seen := make(map[ChunkRef]struct{})

 	// TODO(owen-d): Do this more efficiently,
 	// not all indices overlap each other
-	for _, group := range refGroups {
-		for _, ref := range group {
+	for _, group := range groups {
+		g := group.([]ChunkRef)
+		for _, ref := range g {
 			_, ok := seen[ref]
 			if ok {
 				continue
 			}
 			seen[ref] = struct{}{}
-			results = append(results, ref)
+			res = append(res, ref)
 		}
+		ChunkRefsPool.Put(g)
 	}

-	return results, nil
+	return res, nil

 }

-func (i *MultiIndex) Series(ctx context.Context, userID string, from, through model.Time, shard *index.ShardAnnotation, matchers ...*labels.Matcher) ([]Series, error) {
+func (i *MultiIndex) Series(ctx context.Context, userID string, from, through model.Time, res []Series, shard *index.ShardAnnotation, matchers ...*labels.Matcher) ([]Series, error) {
+	if res == nil {
+		res = SeriesPool.Get()
+	}
+	res = res[:0]
+
 	groups, err := i.forIndices(ctx, from, through, func(ctx context.Context, idx Index) (interface{}, error) {
-		return idx.Series(ctx, userID, from, through, shard, matchers...)
+		return idx.Series(ctx, userID, from, through, nil, shard, matchers...)
 	})

 	if err != nil {
 		return nil, err
 	}

-	var maxLn int // maximum number of chunk refs, assuming no duplicates
-	xs := make([][]Series, 0, len(i.indices))
-	for _, group := range groups {
-		x := group.([]Series)
-		maxLn += len(x)
-		xs = append(xs, x)
-	}
-
-	// optimistically allocate the maximum length slice
-	// to avoid growing incrementally
-	results := make([]Series, 0, maxLn)
 	seen := make(map[model.Fingerprint]struct{})

-	for _, seriesSet := range xs {
+	for _, x := range groups {
+		seriesSet := x.([]Series)
 		for _, s := range seriesSet {
 			_, ok := seen[s.Fingerprint]
 			if ok {
 				continue
 			}
 			seen[s.Fingerprint] = struct{}{}
-			results = append(results, s)
+			res = append(res, s)
 		}
+		SeriesPool.Put(seriesSet)
 	}

-	return results, nil
+	return res, nil
 }

 func (i *MultiIndex) LabelNames(ctx context.Context, userID string, from, through model.Time, matchers ...*labels.Matcher) ([]string, error) {
--- a/pkg/storage/tsdb/multi_file_index_test.go
+++ b/pkg/storage/tsdb/multi_file_index_test.go
@ -67,7 +67,7 @@ func TestMultiIndex(t *testing.T) {
 	require.Nil(t, err)

 	t.Run("GetChunkRefs", func(t *testing.T) {
-		refs, err := idx.GetChunkRefs(context.Background(), "fake", 2, 5, nil, labels.MustNewMatcher(labels.MatchEqual, "foo", "bar"))
+		refs, err := idx.GetChunkRefs(context.Background(), "fake", 2, 5, nil, nil, labels.MustNewMatcher(labels.MatchEqual, "foo", "bar"))
 		require.Nil(t, err)

 		expected := []ChunkRef{
@ -104,7 +104,7 @@ func TestMultiIndex(t *testing.T) {
 	})

 	t.Run("Series", func(t *testing.T) {
-		xs, err := idx.Series(context.Background(), "fake", 2, 5, nil, labels.MustNewMatcher(labels.MatchEqual, "foo", "bar"))
+		xs, err := idx.Series(context.Background(), "fake", 2, 5, nil, nil, labels.MustNewMatcher(labels.MatchEqual, "foo", "bar"))
 		require.Nil(t, err)
 		expected := []Series{
 			{
--- a/pkg/storage/tsdb/pool.go
+++ b/pkg/storage/tsdb/pool.go
@ -0,0 +1,64 @@
+package tsdb
+
+import (
+	"sync"
+
+	"github.com/grafana/loki/pkg/storage/tsdb/index"
+)
+
+var (
+	chunkMetasPool poolChunkMetas // private, internal pkg use only
+	SeriesPool     PoolSeries
+	ChunkRefsPool  PoolChunkRefs
+)
+
+type poolChunkMetas struct {
+	pool sync.Pool
+}
+
+func (p *poolChunkMetas) Get() []index.ChunkMeta {
+	if xs := p.pool.Get(); xs != nil {
+		return xs.([]index.ChunkMeta)
+	}
+	return make([]index.ChunkMeta, 0, 1<<10)
+}
+
+func (p *poolChunkMetas) Put(xs []index.ChunkMeta) {
+	xs = xs[:0]
+	//nolint:staticcheck
+	p.pool.Put(xs)
+}
+
+type PoolSeries struct {
+	pool sync.Pool
+}
+
+func (p *PoolSeries) Get() []Series {
+	if xs := p.pool.Get(); xs != nil {
+		return xs.([]Series)
+	}
+	return make([]Series, 0, 1<<10)
+}
+
+func (p *PoolSeries) Put(xs []Series) {
+	xs = xs[:0]
+	//nolint:staticcheck
+	p.pool.Put(xs)
+}
+
+type PoolChunkRefs struct {
+	pool sync.Pool
+}
+
+func (p *PoolChunkRefs) Get() []ChunkRef {
+	if xs := p.pool.Get(); xs != nil {
+		return xs.([]ChunkRef)
+	}
+	return make([]ChunkRef, 0, 1<<10)
+}
+
+func (p *PoolChunkRefs) Put(xs []ChunkRef) {
+	xs = xs[:0]
+	//nolint:staticcheck
+	p.pool.Put(xs)
+}
--- a/pkg/storage/tsdb/single_file_index.go
+++ b/pkg/storage/tsdb/single_file_index.go
@ -25,6 +25,8 @@ func (i *TSDBIndex) Bounds() (model.Time, model.Time) {
 	return model.Time(from), model.Time(through)
 }

+// fn must NOT capture it's arguments. They're reused across series iterations and returned to
+// a pool after completion.
 func (i *TSDBIndex) forSeries(
 	shard *index.ShardAnnotation,
 	fn func(labels.Labels, model.Fingerprint, []index.ChunkMeta),
@ -35,10 +37,9 @@ func (i *TSDBIndex) forSeries(
 		return err
 	}

-	var (
-		ls   labels.Labels
-		chks []index.ChunkMeta
-	)
+	var ls labels.Labels
+	chks := chunkMetasPool.Get()
+	defer chunkMetasPool.Put(chks)

 	for p.Next() {
 		hash, err := i.reader.Series(p.At(), &ls, &chks)
@ -56,9 +57,12 @@ func (i *TSDBIndex) forSeries(
 	return p.Err()
 }

-func (i *TSDBIndex) GetChunkRefs(_ context.Context, userID string, from, through model.Time, shard *index.ShardAnnotation, matchers ...*labels.Matcher) ([]ChunkRef, error) {
+func (i *TSDBIndex) GetChunkRefs(_ context.Context, userID string, from, through model.Time, res []ChunkRef, shard *index.ShardAnnotation, matchers ...*labels.Matcher) ([]ChunkRef, error) {
 	queryBounds := newBounds(from, through)
-	var res []ChunkRef // TODO(owen-d): pool, reduce allocs
+	if res == nil {
+		res = ChunkRefsPool.Get()
+	}
+	res = res[:0]

 	if err := i.forSeries(shard,
 		func(ls labels.Labels, fp model.Fingerprint, chks []index.ChunkMeta) {
@ -86,9 +90,12 @@ func (i *TSDBIndex) GetChunkRefs(_ context.Context, userID string, from, through
 	return res, nil
 }

-func (i *TSDBIndex) Series(_ context.Context, _ string, from, through model.Time, shard *index.ShardAnnotation, matchers ...*labels.Matcher) ([]Series, error) {
+func (i *TSDBIndex) Series(_ context.Context, _ string, from, through model.Time, res []Series, shard *index.ShardAnnotation, matchers ...*labels.Matcher) ([]Series, error) {
 	queryBounds := newBounds(from, through)
-	var res []Series // TODO(owen-d): pool, reduce allocs
+	if res == nil {
+		res = SeriesPool.Get()
+	}
+	res = res[:0]

 	if err := i.forSeries(shard,
 		func(ls labels.Labels, fp model.Fingerprint, chks []index.ChunkMeta) {
--- a/pkg/storage/tsdb/single_file_index_test.go
+++ b/pkg/storage/tsdb/single_file_index_test.go
@ -58,7 +58,7 @@ func TestSingleIdx(t *testing.T) {
 	idx := BuildIndex(t, cases)

 	t.Run("GetChunkRefs", func(t *testing.T) {
-		refs, err := idx.GetChunkRefs(context.Background(), "fake", 1, 5, nil, labels.MustNewMatcher(labels.MatchEqual, "foo", "bar"))
+		refs, err := idx.GetChunkRefs(context.Background(), "fake", 1, 5, nil, nil, labels.MustNewMatcher(labels.MatchEqual, "foo", "bar"))
 		require.Nil(t, err)

 		expected := []ChunkRef{
@ -99,7 +99,7 @@ func TestSingleIdx(t *testing.T) {
 			Shard: 1,
 			Of:    2,
 		}
-		shardedRefs, err := idx.GetChunkRefs(context.Background(), "fake", 1, 5, &shard, labels.MustNewMatcher(labels.MatchEqual, "foo", "bar"))
+		shardedRefs, err := idx.GetChunkRefs(context.Background(), "fake", 1, 5, nil, &shard, labels.MustNewMatcher(labels.MatchEqual, "foo", "bar"))

 		require.Nil(t, err)

@ -114,7 +114,7 @@ func TestSingleIdx(t *testing.T) {
 	})

 	t.Run("Series", func(t *testing.T) {
-		xs, err := idx.Series(context.Background(), "fake", 8, 9, nil, labels.MustNewMatcher(labels.MatchEqual, "foo", "bar"))
+		xs, err := idx.Series(context.Background(), "fake", 8, 9, nil, nil, labels.MustNewMatcher(labels.MatchEqual, "foo", "bar"))
 		require.Nil(t, err)

 		expected := []Series{
@ -132,7 +132,7 @@ func TestSingleIdx(t *testing.T) {
 			Of:    2,
 		}

-		xs, err := idx.Series(context.Background(), "fake", 0, 10, &shard, labels.MustNewMatcher(labels.MatchEqual, "foo", "bar"))
+		xs, err := idx.Series(context.Background(), "fake", 0, 10, nil, &shard, labels.MustNewMatcher(labels.MatchEqual, "foo", "bar"))
 		require.Nil(t, err)

 		expected := []Series{
--- a/tools/tsdb/tsdb-map/diff.sh
+++ b/tools/tsdb/tsdb-map/diff.sh
@ -1,14 +1,13 @@
 #!/usr/bin/env bash

 # This can be run like:
-# ./tools/tsdb/tsdb-map/diff.sh /tmp/loki-scratch/loki-ops-daily.r main $(git rev-parse --abbrev-ref HEAD)
+# ./tools/tsdb/tsdb-map/diff.sh /tmp/loki-scratch/loki-ops-daily.r main $(git rev-parse --abbrev-ref HEAD) <rounds>

 boltdb_base=$1
 branch_a=$2
 branch_b=$3
 COUNT="${4:-8}"
-
-echo using count: "${COUNT}"
+echo running "${COUNT}" rounds

 echo building from "${branch_a}"
 git checkout "${branch_a}"
--- a/tools/tsdb/tsdb-map/main_test.go
+++ b/tools/tsdb/tsdb-map/main_test.go
@ -76,8 +76,10 @@ func BenchmarkQuery_GetChunkRefs(b *testing.B) {
 		}
 		idx := tsdb.NewTSDBIndex(reader)
 		b.Run(bm.name, func(b *testing.B) {
+			refs := tsdb.ChunkRefsPool.Get()
 			for i := 0; i < b.N; i++ {
-				_, err := idx.GetChunkRefs(context.Background(), "fake", 0, math.MaxInt64, nil, bm.matchers...)
+				var err error
+				refs, err = idx.GetChunkRefs(context.Background(), "fake", 0, math.MaxInt64, refs, nil, bm.matchers...)
 				if err != nil {
 					panic(err)
 				}
@ -101,14 +103,16 @@ func BenchmarkQuery_GetChunkRefsSharded(b *testing.B) {
 		shardFactor := 16

 		b.Run(bm.name, func(b *testing.B) {
+			refs := tsdb.ChunkRefsPool.Get()
 			for i := 0; i < b.N; i++ {
 				for j := 0; j < shardFactor; j++ {
 					shard := index.ShardAnnotation{
 						Shard: uint32(j),
 						Of:    uint32(shardFactor),
 					}
+					var err error

-					_, err := idx.GetChunkRefs(context.Background(), "fake", 0, math.MaxInt64, &shard, bm.matchers...)
+					refs, err = idx.GetChunkRefs(context.Background(), "fake", 0, math.MaxInt64, refs, &shard, bm.matchers...)
 					if err != nil {
 						panic(err)
 					}