TSDB Index reuses slices, adds pools (#5630)

* adds a pool for ChunkMetas

* index takes slice pointers for allocation reasons and adds pools for all index types

* reuse slices in tsdb benchmarks

* adds count to bench script

* properly returns series to pool

* more ergonomic index signatures while still supporting slice reuse

* tsdb index documentation

* aligns tsdb-map tooling with new index signatures
pull/5664/head
Owen Diehl 3 years ago committed by GitHub
parent d084dc7f4b
commit 3f28a33d93
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
  1. 15
      pkg/storage/tsdb/index.go
  2. 58
      pkg/storage/tsdb/multi_file_index.go
  3. 4
      pkg/storage/tsdb/multi_file_index_test.go
  4. 64
      pkg/storage/tsdb/pool.go
  5. 23
      pkg/storage/tsdb/single_file_index.go
  6. 8
      pkg/storage/tsdb/single_file_index_test.go
  7. 5
      tools/tsdb/tsdb-map/diff.sh
  8. 8
      tools/tsdb/tsdb-map/main_test.go

@ -32,8 +32,19 @@ func (r ChunkRef) Less(x ChunkRef) bool {
type Index interface {
Bounded
GetChunkRefs(ctx context.Context, userID string, from, through model.Time, shard *index.ShardAnnotation, matchers ...*labels.Matcher) ([]ChunkRef, error)
Series(ctx context.Context, userID string, from, through model.Time, shard *index.ShardAnnotation, matchers ...*labels.Matcher) ([]Series, error)
// GetChunkRefs accepts an optional []ChunkRef argument.
// If not nil, it will use that slice to build the result,
// allowing us to avoid unnecessary allocations at the caller's discretion.
// If nil, the underlying index implementation is required
// to build the resulting slice nonetheless (it should not panic),
// ideally by requesting a slice from the pool.
// Shard is also optional. If not nil, TSDB will limit the result to
// the requested shard. If it is nil, TSDB will return all results,
// regardless of shard.
// Note: any shard used must be a valid factor of two, meaning `0_of_2` and `3_of_4` are fine, but `0_of_3` is not.
GetChunkRefs(ctx context.Context, userID string, from, through model.Time, res []ChunkRef, shard *index.ShardAnnotation, matchers ...*labels.Matcher) ([]ChunkRef, error)
// Series follows the same semantics regarding the passed slice and shard as GetChunkRefs.
Series(ctx context.Context, userID string, from, through model.Time, res []Series, shard *index.ShardAnnotation, matchers ...*labels.Matcher) ([]Series, error)
LabelNames(ctx context.Context, userID string, from, through model.Time, matchers ...*labels.Matcher) ([]string, error)
LabelValues(ctx context.Context, userID string, from, through model.Time, name string, matchers ...*labels.Matcher) ([]string, error)
}

@ -92,80 +92,72 @@ func (i *MultiIndex) forIndices(ctx context.Context, from, through model.Time, f
return results, nil
}
func (i *MultiIndex) GetChunkRefs(ctx context.Context, userID string, from, through model.Time, shard *index.ShardAnnotation, matchers ...*labels.Matcher) ([]ChunkRef, error) {
func (i *MultiIndex) GetChunkRefs(ctx context.Context, userID string, from, through model.Time, res []ChunkRef, shard *index.ShardAnnotation, matchers ...*labels.Matcher) ([]ChunkRef, error) {
if res == nil {
res = ChunkRefsPool.Get()
}
res = res[:0]
groups, err := i.forIndices(ctx, from, through, func(ctx context.Context, idx Index) (interface{}, error) {
return idx.GetChunkRefs(ctx, userID, from, through, shard, matchers...)
return idx.GetChunkRefs(ctx, userID, from, through, nil, shard, matchers...)
})
if err != nil {
return nil, err
}
var maxLn int // maximum number of chunk refs, assuming no duplicates
refGroups := make([][]ChunkRef, 0, len(i.indices))
for _, group := range groups {
rg := group.([]ChunkRef)
maxLn += len(rg)
refGroups = append(refGroups, rg)
}
// optimistically allocate the maximum length slice
// to avoid growing incrementally
results := make([]ChunkRef, 0, maxLn)
// keep track of duplicates
seen := make(map[ChunkRef]struct{})
// TODO(owen-d): Do this more efficiently,
// not all indices overlap each other
for _, group := range refGroups {
for _, ref := range group {
for _, group := range groups {
g := group.([]ChunkRef)
for _, ref := range g {
_, ok := seen[ref]
if ok {
continue
}
seen[ref] = struct{}{}
results = append(results, ref)
res = append(res, ref)
}
ChunkRefsPool.Put(g)
}
return results, nil
return res, nil
}
func (i *MultiIndex) Series(ctx context.Context, userID string, from, through model.Time, shard *index.ShardAnnotation, matchers ...*labels.Matcher) ([]Series, error) {
func (i *MultiIndex) Series(ctx context.Context, userID string, from, through model.Time, res []Series, shard *index.ShardAnnotation, matchers ...*labels.Matcher) ([]Series, error) {
if res == nil {
res = SeriesPool.Get()
}
res = res[:0]
groups, err := i.forIndices(ctx, from, through, func(ctx context.Context, idx Index) (interface{}, error) {
return idx.Series(ctx, userID, from, through, shard, matchers...)
return idx.Series(ctx, userID, from, through, nil, shard, matchers...)
})
if err != nil {
return nil, err
}
var maxLn int // maximum number of chunk refs, assuming no duplicates
xs := make([][]Series, 0, len(i.indices))
for _, group := range groups {
x := group.([]Series)
maxLn += len(x)
xs = append(xs, x)
}
// optimistically allocate the maximum length slice
// to avoid growing incrementally
results := make([]Series, 0, maxLn)
seen := make(map[model.Fingerprint]struct{})
for _, seriesSet := range xs {
for _, x := range groups {
seriesSet := x.([]Series)
for _, s := range seriesSet {
_, ok := seen[s.Fingerprint]
if ok {
continue
}
seen[s.Fingerprint] = struct{}{}
results = append(results, s)
res = append(res, s)
}
SeriesPool.Put(seriesSet)
}
return results, nil
return res, nil
}
func (i *MultiIndex) LabelNames(ctx context.Context, userID string, from, through model.Time, matchers ...*labels.Matcher) ([]string, error) {

@ -67,7 +67,7 @@ func TestMultiIndex(t *testing.T) {
require.Nil(t, err)
t.Run("GetChunkRefs", func(t *testing.T) {
refs, err := idx.GetChunkRefs(context.Background(), "fake", 2, 5, nil, labels.MustNewMatcher(labels.MatchEqual, "foo", "bar"))
refs, err := idx.GetChunkRefs(context.Background(), "fake", 2, 5, nil, nil, labels.MustNewMatcher(labels.MatchEqual, "foo", "bar"))
require.Nil(t, err)
expected := []ChunkRef{
@ -104,7 +104,7 @@ func TestMultiIndex(t *testing.T) {
})
t.Run("Series", func(t *testing.T) {
xs, err := idx.Series(context.Background(), "fake", 2, 5, nil, labels.MustNewMatcher(labels.MatchEqual, "foo", "bar"))
xs, err := idx.Series(context.Background(), "fake", 2, 5, nil, nil, labels.MustNewMatcher(labels.MatchEqual, "foo", "bar"))
require.Nil(t, err)
expected := []Series{
{

@ -0,0 +1,64 @@
package tsdb
import (
"sync"
"github.com/grafana/loki/pkg/storage/tsdb/index"
)
var (
chunkMetasPool poolChunkMetas // private, internal pkg use only
SeriesPool PoolSeries
ChunkRefsPool PoolChunkRefs
)
type poolChunkMetas struct {
pool sync.Pool
}
func (p *poolChunkMetas) Get() []index.ChunkMeta {
if xs := p.pool.Get(); xs != nil {
return xs.([]index.ChunkMeta)
}
return make([]index.ChunkMeta, 0, 1<<10)
}
func (p *poolChunkMetas) Put(xs []index.ChunkMeta) {
xs = xs[:0]
//nolint:staticcheck
p.pool.Put(xs)
}
type PoolSeries struct {
pool sync.Pool
}
func (p *PoolSeries) Get() []Series {
if xs := p.pool.Get(); xs != nil {
return xs.([]Series)
}
return make([]Series, 0, 1<<10)
}
func (p *PoolSeries) Put(xs []Series) {
xs = xs[:0]
//nolint:staticcheck
p.pool.Put(xs)
}
type PoolChunkRefs struct {
pool sync.Pool
}
func (p *PoolChunkRefs) Get() []ChunkRef {
if xs := p.pool.Get(); xs != nil {
return xs.([]ChunkRef)
}
return make([]ChunkRef, 0, 1<<10)
}
func (p *PoolChunkRefs) Put(xs []ChunkRef) {
xs = xs[:0]
//nolint:staticcheck
p.pool.Put(xs)
}

@ -25,6 +25,8 @@ func (i *TSDBIndex) Bounds() (model.Time, model.Time) {
return model.Time(from), model.Time(through)
}
// fn must NOT capture it's arguments. They're reused across series iterations and returned to
// a pool after completion.
func (i *TSDBIndex) forSeries(
shard *index.ShardAnnotation,
fn func(labels.Labels, model.Fingerprint, []index.ChunkMeta),
@ -35,10 +37,9 @@ func (i *TSDBIndex) forSeries(
return err
}
var (
ls labels.Labels
chks []index.ChunkMeta
)
var ls labels.Labels
chks := chunkMetasPool.Get()
defer chunkMetasPool.Put(chks)
for p.Next() {
hash, err := i.reader.Series(p.At(), &ls, &chks)
@ -56,9 +57,12 @@ func (i *TSDBIndex) forSeries(
return p.Err()
}
func (i *TSDBIndex) GetChunkRefs(_ context.Context, userID string, from, through model.Time, shard *index.ShardAnnotation, matchers ...*labels.Matcher) ([]ChunkRef, error) {
func (i *TSDBIndex) GetChunkRefs(_ context.Context, userID string, from, through model.Time, res []ChunkRef, shard *index.ShardAnnotation, matchers ...*labels.Matcher) ([]ChunkRef, error) {
queryBounds := newBounds(from, through)
var res []ChunkRef // TODO(owen-d): pool, reduce allocs
if res == nil {
res = ChunkRefsPool.Get()
}
res = res[:0]
if err := i.forSeries(shard,
func(ls labels.Labels, fp model.Fingerprint, chks []index.ChunkMeta) {
@ -86,9 +90,12 @@ func (i *TSDBIndex) GetChunkRefs(_ context.Context, userID string, from, through
return res, nil
}
func (i *TSDBIndex) Series(_ context.Context, _ string, from, through model.Time, shard *index.ShardAnnotation, matchers ...*labels.Matcher) ([]Series, error) {
func (i *TSDBIndex) Series(_ context.Context, _ string, from, through model.Time, res []Series, shard *index.ShardAnnotation, matchers ...*labels.Matcher) ([]Series, error) {
queryBounds := newBounds(from, through)
var res []Series // TODO(owen-d): pool, reduce allocs
if res == nil {
res = SeriesPool.Get()
}
res = res[:0]
if err := i.forSeries(shard,
func(ls labels.Labels, fp model.Fingerprint, chks []index.ChunkMeta) {

@ -58,7 +58,7 @@ func TestSingleIdx(t *testing.T) {
idx := BuildIndex(t, cases)
t.Run("GetChunkRefs", func(t *testing.T) {
refs, err := idx.GetChunkRefs(context.Background(), "fake", 1, 5, nil, labels.MustNewMatcher(labels.MatchEqual, "foo", "bar"))
refs, err := idx.GetChunkRefs(context.Background(), "fake", 1, 5, nil, nil, labels.MustNewMatcher(labels.MatchEqual, "foo", "bar"))
require.Nil(t, err)
expected := []ChunkRef{
@ -99,7 +99,7 @@ func TestSingleIdx(t *testing.T) {
Shard: 1,
Of: 2,
}
shardedRefs, err := idx.GetChunkRefs(context.Background(), "fake", 1, 5, &shard, labels.MustNewMatcher(labels.MatchEqual, "foo", "bar"))
shardedRefs, err := idx.GetChunkRefs(context.Background(), "fake", 1, 5, nil, &shard, labels.MustNewMatcher(labels.MatchEqual, "foo", "bar"))
require.Nil(t, err)
@ -114,7 +114,7 @@ func TestSingleIdx(t *testing.T) {
})
t.Run("Series", func(t *testing.T) {
xs, err := idx.Series(context.Background(), "fake", 8, 9, nil, labels.MustNewMatcher(labels.MatchEqual, "foo", "bar"))
xs, err := idx.Series(context.Background(), "fake", 8, 9, nil, nil, labels.MustNewMatcher(labels.MatchEqual, "foo", "bar"))
require.Nil(t, err)
expected := []Series{
@ -132,7 +132,7 @@ func TestSingleIdx(t *testing.T) {
Of: 2,
}
xs, err := idx.Series(context.Background(), "fake", 0, 10, &shard, labels.MustNewMatcher(labels.MatchEqual, "foo", "bar"))
xs, err := idx.Series(context.Background(), "fake", 0, 10, nil, &shard, labels.MustNewMatcher(labels.MatchEqual, "foo", "bar"))
require.Nil(t, err)
expected := []Series{

@ -1,14 +1,13 @@
#!/usr/bin/env bash
# This can be run like:
# ./tools/tsdb/tsdb-map/diff.sh /tmp/loki-scratch/loki-ops-daily.r main $(git rev-parse --abbrev-ref HEAD)
# ./tools/tsdb/tsdb-map/diff.sh /tmp/loki-scratch/loki-ops-daily.r main $(git rev-parse --abbrev-ref HEAD) <rounds>
boltdb_base=$1
branch_a=$2
branch_b=$3
COUNT="${4:-8}"
echo using count: "${COUNT}"
echo running "${COUNT}" rounds
echo building from "${branch_a}"
git checkout "${branch_a}"

@ -76,8 +76,10 @@ func BenchmarkQuery_GetChunkRefs(b *testing.B) {
}
idx := tsdb.NewTSDBIndex(reader)
b.Run(bm.name, func(b *testing.B) {
refs := tsdb.ChunkRefsPool.Get()
for i := 0; i < b.N; i++ {
_, err := idx.GetChunkRefs(context.Background(), "fake", 0, math.MaxInt64, nil, bm.matchers...)
var err error
refs, err = idx.GetChunkRefs(context.Background(), "fake", 0, math.MaxInt64, refs, nil, bm.matchers...)
if err != nil {
panic(err)
}
@ -101,14 +103,16 @@ func BenchmarkQuery_GetChunkRefsSharded(b *testing.B) {
shardFactor := 16
b.Run(bm.name, func(b *testing.B) {
refs := tsdb.ChunkRefsPool.Get()
for i := 0; i < b.N; i++ {
for j := 0; j < shardFactor; j++ {
shard := index.ShardAnnotation{
Shard: uint32(j),
Of: uint32(shardFactor),
}
var err error
_, err := idx.GetChunkRefs(context.Background(), "fake", 0, math.MaxInt64, &shard, bm.matchers...)
refs, err = idx.GetChunkRefs(context.Background(), "fake", 0, math.MaxInt64, refs, &shard, bm.matchers...)
if err != nil {
panic(err)
}

Loading…
Cancel
Save