avoid using bloomfilters for chunks in stats calls by avoiding duplicates (#7209)

**What this PR does / why we need it**:
Avoid using bloomfilters for chunks deduplication in tsdb `Stats` calls
by avoiding fetching duplicate entries.
The idea is to split and align queries by
[ObjectStorageIndexRequiredPeriod](61794710a7/pkg/storage/config/schema_config.go (L47))
and make each split process chunks with a start time >= start time of
the table interval.
In other terms, table interval that contains start time of the chunk,
owns it.
For e.g. if the table interval is 10s, and we have chunks 5-7, 8-12,
11-13.
Query with range 6-15 would be split into 6-10, 10-15.
query1 would process chunks 5-7, 8-12 and query2 would process chunks
11-13.

This check is not applied for the first split so that we do not
eliminate any chunks that overlaps the original query intervals but
starts at the previous table.
For e.g. if the table interval is 10s, and we have chunks 5-7, 8-13,
14-13.
Query with range 11-12 should process chunk 8-13 even though its start
time <= start time of table we will query for index.

The caveat here is that we will overestimate the data we will be
processing if the index is not compacted yet since it could have
duplicate chunks when RF > 1. I think it is okay since the Stats call is
just an estimation and need not be accurate.

Removing all the extra processing saves us quite a bit of CPU and
memory, as seen from the benchmark comparison between the two
implementations:
```
name                  old time/op    new time/op    delta
IndexClient_Stats-10     187µs ± 0%      34µs ± 1%  -82.00%  (p=0.008 n=5+5)

name                  old alloc/op   new alloc/op   delta
IndexClient_Stats-10    61.5kB ± 4%    12.5kB ± 2%  -79.69%  (p=0.008 n=5+5)

name                  old allocs/op  new allocs/op  delta
IndexClient_Stats-10     1.46k ± 0%     0.48k ± 0%  -67.28%  (p=0.008 n=5+5)
```

**Checklist**
- [x] Tests updated
pull/7212/head
Sandeep Sukhani 3 years ago committed by GitHub
parent bac52165d3
commit 7f298ff72f
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
  1. 42
      pkg/querier/queryrange/split_by_interval.go
  2. 19
      pkg/storage/stores/index/stats/stats.go
  3. 23
      pkg/storage/stores/index/stats/stats_test.go
  4. 2
      pkg/storage/stores/tsdb/compact_test.go
  5. 3
      pkg/storage/stores/tsdb/compactor_test.go
  6. 4
      pkg/storage/stores/tsdb/head_manager.go
  7. 6
      pkg/storage/stores/tsdb/index.go
  8. 42
      pkg/storage/stores/tsdb/index_client.go
  9. 202
      pkg/storage/stores/tsdb/index_client_test.go
  10. 13
      pkg/storage/stores/tsdb/index_shipper_querier.go
  11. 4
      pkg/storage/stores/tsdb/lazy_index.go
  12. 4
      pkg/storage/stores/tsdb/multi_file_index.go
  13. 2
      pkg/storage/stores/tsdb/multi_file_index_test.go
  14. 4
      pkg/storage/stores/tsdb/multitenant.go
  15. 6
      pkg/storage/stores/tsdb/single_file_index.go
  16. 2
      pkg/storage/stores/tsdb/single_file_index_test.go
  17. 2
      pkg/storage/stores/tsdb/util_test.go
  18. 26
      pkg/util/time.go
  19. 96
      pkg/util/time_test.go

@ -230,7 +230,7 @@ func splitByTime(req queryrangebase.Request, interval time.Duration) ([]queryran
switch r := req.(type) {
case *LokiRequest:
forInterval(interval, r.StartTs, r.EndTs, false, func(start, end time.Time) {
util.ForInterval(interval, r.StartTs, r.EndTs, false, func(start, end time.Time) {
reqs = append(reqs, &LokiRequest{
Query: r.Query,
Limit: r.Limit,
@ -243,7 +243,10 @@ func splitByTime(req queryrangebase.Request, interval time.Duration) ([]queryran
})
})
case *LokiSeriesRequest:
forInterval(interval, r.StartTs, r.EndTs, true, func(start, end time.Time) {
// metadata queries have end time inclusive.
// Set endTimeInclusive to true so that ForInterval keeps a gap of 1ms between splits to
// avoid querying duplicate data in adjacent queries.
util.ForInterval(interval, r.StartTs, r.EndTs, true, func(start, end time.Time) {
reqs = append(reqs, &LokiSeriesRequest{
Match: r.Match,
Path: r.Path,
@ -253,7 +256,10 @@ func splitByTime(req queryrangebase.Request, interval time.Duration) ([]queryran
})
})
case *LokiLabelNamesRequest:
forInterval(interval, r.StartTs, r.EndTs, true, func(start, end time.Time) {
// metadata queries have end time inclusive.
// Set endTimeInclusive to true so that ForInterval keeps a gap of 1ms between splits to
// avoid querying duplicate data in adjacent queries.
util.ForInterval(interval, r.StartTs, r.EndTs, true, func(start, end time.Time) {
reqs = append(reqs, &LokiLabelNamesRequest{
Path: r.Path,
StartTs: start,
@ -266,34 +272,6 @@ func splitByTime(req queryrangebase.Request, interval time.Duration) ([]queryran
return reqs, nil
}
// forInterval splits the given start and end time into given interval.
// When endTimeInclusive is true, it would keep a gap of 1ms between the splits.
// The only queries that have both start and end time inclusive are metadata queries,
// and without keeping a gap, we would end up querying duplicate data in adjacent queries.
func forInterval(interval time.Duration, start, end time.Time, endTimeInclusive bool, callback func(start, end time.Time)) {
// align the start time by split interval for better query performance of metadata queries and
// better cache-ability of query types that are cached.
ogStart := start
startNs := start.UnixNano()
start = time.Unix(0, startNs-startNs%interval.Nanoseconds())
firstInterval := true
for start := start; start.Before(end); start = start.Add(interval) {
newEnd := start.Add(interval)
if !newEnd.Before(end) {
newEnd = end
} else if endTimeInclusive {
newEnd = newEnd.Add(-time.Millisecond)
}
if firstInterval {
callback(ogStart, newEnd)
firstInterval = false
continue
}
callback(start, newEnd)
}
}
// maxRangeVectorDuration returns the maximum range vector duration within a LogQL query.
func maxRangeVectorDuration(q string) (time.Duration, error) {
expr, err := syntax.ParseSampleExpr(q)
@ -347,7 +325,7 @@ func splitMetricByTime(r queryrangebase.Request, interval time.Duration) ([]quer
// step is >= configured split interval, let us just split the query interval by step
if lokiReq.Step >= interval.Milliseconds() {
forInterval(time.Duration(lokiReq.Step*1e6), lokiReq.StartTs, lokiReq.EndTs, false, func(start, end time.Time) {
util.ForInterval(time.Duration(lokiReq.Step*1e6), lokiReq.StartTs, lokiReq.EndTs, false, func(start, end time.Time) {
reqs = append(reqs, &LokiRequest{
Query: lokiReq.Query,
Limit: lokiReq.Limit,

@ -44,7 +44,6 @@ func (p *PoolBloom) Get() *Blooms {
func (p *PoolBloom) Put(x *Blooms) {
x.Streams.ClearAll()
x.Chunks.ClearAll()
x.stats = &Stats{}
p.pool.Put(x)
}
@ -70,11 +69,8 @@ func (p *PoolBloom) Put(x *Blooms) {
func newBlooms() *Blooms {
// 1 million streams @ 1% error =~ 1.14MB
streams := bloom.NewWithEstimates(1e6, 0.01)
// 10 million chunks @ 1% error =~ 11.43MB
chunks := bloom.NewWithEstimates(10e6, 0.01)
return &Blooms{
Streams: streams,
Chunks: chunks,
stats: &Stats{},
}
}
@ -86,8 +82,8 @@ func newBlooms() *Blooms {
// statistics prior to running queries.
type Blooms struct {
sync.RWMutex
Streams, Chunks *bloom.BloomFilter
stats *Stats
Streams *bloom.BloomFilter
stats *Stats
}
func (b *Blooms) Stats() Stats { return b.stats.Stats() }
@ -101,16 +97,7 @@ func (b *Blooms) AddStream(fp model.Fingerprint) {
}
func (b *Blooms) AddChunk(fp model.Fingerprint, chk index.ChunkMeta) {
// fingerprint + mintime + maxtime + checksum
ln := 8 + 8 + 8 + 4
key := make([]byte, ln)
binary.BigEndian.PutUint64(key, uint64(fp))
binary.BigEndian.PutUint64(key[8:], uint64(chk.MinTime))
binary.BigEndian.PutUint64(key[16:], uint64(chk.MaxTime))
binary.BigEndian.PutUint32(key[24:], chk.Checksum)
b.add(b.Chunks, key, func() {
b.stats.AddChunk(fp, chk)
})
b.stats.AddChunk(fp, chk)
}
func (b *Blooms) add(filter *bloom.BloomFilter, key []byte, update func()) {

@ -6,8 +6,6 @@ import (
"github.com/prometheus/common/model"
"github.com/stretchr/testify/require"
"github.com/grafana/loki/pkg/storage/stores/tsdb/index"
)
func TestStatsBloom_Stream(t *testing.T) {
@ -24,24 +22,3 @@ func TestStatsBloom_Stream(t *testing.T) {
require.Equal(t, uint64(2), sb.stats.Streams)
}
func TestStatsBloom_Chunks(t *testing.T) {
sb := BloomPool.Get()
var wg sync.WaitGroup
for i := 0; i < 10; i++ {
wg.Add(1)
go func(x int) {
sb.AddChunk(model.Fingerprint(x%2), index.ChunkMeta{
Checksum: uint32(x) % 4,
KB: 1,
Entries: 1,
})
wg.Done()
}(i)
}
wg.Wait()
require.Equal(t, 4, int(sb.stats.Chunks))
require.Equal(t, 4<<10, int(sb.stats.Bytes))
require.Equal(t, 4, int(sb.stats.Entries))
}

@ -354,7 +354,7 @@ func TestCompactor(t *testing.T) {
var indices []*TSDBIndex
for _, cases := range tc.input {
idx := BuildIndex(t, dir, "fake", cases)
idx := BuildIndex(t, dir, cases)
defer idx.Close()
casted, ok := idx.Index.(*TSDBIndex)
require.Equal(t, true, ok)

@ -199,6 +199,7 @@ func buildChunkMetas(from, to int64) index.ChunkMetas {
MinTime: i,
MaxTime: i,
Checksum: uint32(i),
Entries: 1,
})
}
@ -877,5 +878,5 @@ type dummyChunkData struct {
}
func (d dummyChunkData) Entries() int {
return 0
return 1
}

@ -710,12 +710,12 @@ func (t *tenantHeads) LabelValues(ctx context.Context, userID string, from, thro
}
func (t *tenantHeads) Stats(ctx context.Context, userID string, from, through model.Time, acc IndexStatsAccumulator, shard *index.ShardAnnotation, matchers ...*labels.Matcher) error {
func (t *tenantHeads) Stats(ctx context.Context, userID string, from, through model.Time, acc IndexStatsAccumulator, shard *index.ShardAnnotation, shouldIncludeChunk shouldIncludeChunk, matchers ...*labels.Matcher) error {
idx, ok := t.tenantIndex(userID, from, through)
if !ok {
return nil
}
return idx.Stats(ctx, userID, from, through, acc, shard, matchers...)
return idx.Stats(ctx, userID, from, through, acc, shard, shouldIncludeChunk, matchers...)
}
// helper only used in building TSDBs

@ -31,6 +31,8 @@ func (r ChunkRef) Less(x ChunkRef) bool {
return r.End <= x.End
}
type shouldIncludeChunk func(index.ChunkMeta) bool
type Index interface {
Bounded
SetChunkFilterer(chunkFilter chunk.RequestChunkFilterer)
@ -50,7 +52,7 @@ type Index interface {
Series(ctx context.Context, userID string, from, through model.Time, res []Series, shard *index.ShardAnnotation, matchers ...*labels.Matcher) ([]Series, error)
LabelNames(ctx context.Context, userID string, from, through model.Time, matchers ...*labels.Matcher) ([]string, error)
LabelValues(ctx context.Context, userID string, from, through model.Time, name string, matchers ...*labels.Matcher) ([]string, error)
Stats(ctx context.Context, userID string, from, through model.Time, acc IndexStatsAccumulator, shard *index.ShardAnnotation, matchers ...*labels.Matcher) error
Stats(ctx context.Context, userID string, from, through model.Time, acc IndexStatsAccumulator, shard *index.ShardAnnotation, shouldIncludeChunk shouldIncludeChunk, matchers ...*labels.Matcher) error
}
type NoopIndex struct{}
@ -72,7 +74,7 @@ func (NoopIndex) LabelValues(ctx context.Context, userID string, from, through m
return nil, nil
}
func (NoopIndex) Stats(ctx context.Context, userID string, from, through model.Time, acc IndexStatsAccumulator, shard *index.ShardAnnotation, matchers ...*labels.Matcher) error {
func (NoopIndex) Stats(ctx context.Context, userID string, from, through model.Time, acc IndexStatsAccumulator, shard *index.ShardAnnotation, shouldIncludeChunk shouldIncludeChunk, matchers ...*labels.Matcher) error {
return nil
}

@ -2,6 +2,7 @@ package tsdb
import (
"context"
"time"
"github.com/prometheus/common/model"
"github.com/prometheus/prometheus/model/labels"
@ -10,8 +11,10 @@ import (
"github.com/grafana/loki/pkg/logql/syntax"
"github.com/grafana/loki/pkg/querier/astmapper"
"github.com/grafana/loki/pkg/storage/chunk"
"github.com/grafana/loki/pkg/storage/config"
"github.com/grafana/loki/pkg/storage/stores/index/stats"
"github.com/grafana/loki/pkg/storage/stores/tsdb/index"
"github.com/grafana/loki/pkg/util"
"github.com/grafana/loki/pkg/util/spanlogger"
)
@ -171,6 +174,27 @@ func (c *IndexClient) Stats(ctx context.Context, userID string, from, through mo
return nil, err
}
// split the query range to align with table intervals i.e. ObjectStorageIndexRequiredPeriod
// This is to avoid explicitly deduping chunks by leveraging the table intervals.
// The idea is to make each split process chunks that have start time >= start time of the table interval.
// In other terms, table interval that contains start time of the chunk, owns it.
// For e.g. if the table interval is 10s, and we have chunks 5-7, 8-12, 11-13.
// Query with range 6-15 would be split into 6-10, 10-15.
// query1 would process chunks 5-7, 8-12 and query2 would process chunks 11-13.
// This check is not applied for first query of the split so that
// we do not eliminate any chunks that overlaps the original query intervals but starts at the previous table.
// For e.g. if the table interval is 10s, and we have chunks 5-7, 8-13, 14-13.
// Query with range 11-12 should process chunk 8-13 even though its start time <= start time of table we will query for index.
// The caveat here is that we will overestimate the data we will be processing if the index is not compacted yet
// since it could have duplicate chunks when RF > 1
var intervals []model.Interval
util.ForInterval(config.ObjectStorageIndexRequiredPeriod, from.Time(), through.Time(), true, func(start, end time.Time) {
intervals = append(intervals, model.Interval{
Start: model.TimeFromUnixNano(start.UnixNano()),
End: model.TimeFromUnixNano(end.UnixNano()),
})
})
var acc IndexStatsAccumulator
if c.opts.UseBloomFilters {
blooms := stats.BloomPool.Get()
@ -179,7 +203,23 @@ func (c *IndexClient) Stats(ctx context.Context, userID string, from, through mo
} else {
acc = &stats.Stats{}
}
err = c.idx.Stats(ctx, userID, from, through, acc, shard, matchers...)
queryBounds := newBounds(from, through)
for idx, interval := range intervals {
if err := c.idx.Stats(ctx, userID, interval.Start, interval.End, acc, shard, func(chk index.ChunkMeta) bool {
// for the first split, purely do overlap check to also include chunks having
// start time earlier than start time of the table interval we are querying.
// for all other splits, consider only chunks that have from >= interval.Start
// so that we start after the start time of the index table we are querying.
if Overlap(queryBounds, chk) && (idx == 0 || chk.From() >= interval.Start) {
return true
}
return false
}, matchers...); err != nil {
return nil, err
}
}
if err != nil {
return nil, err

@ -0,0 +1,202 @@
package tsdb
import (
"context"
"math"
"testing"
"time"
"github.com/prometheus/common/model"
"github.com/prometheus/prometheus/model/labels"
"github.com/stretchr/testify/require"
"github.com/grafana/loki/pkg/storage/config"
index_shipper "github.com/grafana/loki/pkg/storage/stores/indexshipper/index"
)
type mockIndexShipperIndexIterator struct {
tables map[string][]*TSDBFile
}
func (m mockIndexShipperIndexIterator) ForEach(ctx context.Context, tableName, userID string, callback index_shipper.ForEachIndexCallback) error {
indexes := m.tables[tableName]
for _, idx := range indexes {
if err := callback(false, idx); err != nil {
return err
}
}
return nil
}
func BenchmarkIndexClient_Stats(b *testing.B) {
tempDir := b.TempDir()
tableRanges := config.TableRanges{
{
Start: 0,
End: math.MaxInt64,
PeriodConfig: &config.PeriodConfig{
IndexTables: config.PeriodicTableConfig{
Period: config.ObjectStorageIndexRequiredPeriod,
},
},
},
}
indexStartToday := model.TimeFromUnixNano(time.Now().Truncate(config.ObjectStorageIndexRequiredPeriod).UnixNano())
indexStartYesterday := indexStartToday.Add(-config.ObjectStorageIndexRequiredPeriod)
tables := map[string][]*TSDBFile{
tableRanges[0].PeriodConfig.IndexTables.TableFor(indexStartToday): {
BuildIndex(b, tempDir, []LoadableSeries{
{
Labels: mustParseLabels(`{foo="bar"}`),
Chunks: buildChunkMetas(int64(indexStartToday), int64(indexStartToday+99)),
},
}),
},
tableRanges[0].PeriodConfig.IndexTables.TableFor(indexStartYesterday): {
BuildIndex(b, tempDir, []LoadableSeries{
{
Labels: mustParseLabels(`{foo="bar"}`),
Chunks: buildChunkMetas(int64(indexStartYesterday), int64(indexStartYesterday+99)),
},
}),
},
}
idx := newIndexShipperQuerier(mockIndexShipperIndexIterator{tables: tables}, config.TableRanges{
{
Start: 0,
End: math.MaxInt64,
PeriodConfig: &config.PeriodConfig{},
},
})
indexClient := NewIndexClient(idx, IndexClientOptions{UseBloomFilters: true})
b.ResetTimer()
b.ReportAllocs()
for i := 0; i < b.N; i++ {
stats, err := indexClient.Stats(context.Background(), "", indexStartYesterday-1000, model.Now()+1000, labels.MustNewMatcher(labels.MatchEqual, "foo", "bar"))
require.NoError(b, err)
require.Equal(b, uint64(200), stats.Chunks)
require.Equal(b, uint64(200), stats.Entries)
}
}
func TestIndexClient_Stats(t *testing.T) {
tempDir := t.TempDir()
tableRanges := config.TableRanges{
{
Start: 0,
End: math.MaxInt64,
PeriodConfig: &config.PeriodConfig{
IndexTables: config.PeriodicTableConfig{
Period: config.ObjectStorageIndexRequiredPeriod,
},
},
},
}
indexStartToday := model.TimeFromUnixNano(time.Now().Truncate(config.ObjectStorageIndexRequiredPeriod).UnixNano())
indexStartYesterday := indexStartToday.Add(-config.ObjectStorageIndexRequiredPeriod)
tables := map[string][]*TSDBFile{
tableRanges[0].PeriodConfig.IndexTables.TableFor(indexStartToday): {
BuildIndex(t, tempDir, []LoadableSeries{
{
Labels: mustParseLabels(`{foo="bar"}`),
Chunks: buildChunkMetas(int64(indexStartToday), int64(indexStartToday+99)),
},
{
Labels: mustParseLabels(`{fizz="buzz"}`),
Chunks: buildChunkMetas(int64(indexStartToday), int64(indexStartToday+99)),
},
}),
},
tableRanges[0].PeriodConfig.IndexTables.TableFor(indexStartYesterday): {
BuildIndex(t, tempDir, []LoadableSeries{
{
Labels: mustParseLabels(`{foo="bar"}`),
Chunks: buildChunkMetas(int64(indexStartYesterday), int64(indexStartYesterday+99)),
},
{
Labels: mustParseLabels(`{foo="bar", fizz="buzz"}`),
Chunks: buildChunkMetas(int64(indexStartYesterday), int64(indexStartYesterday+99)),
},
{
Labels: mustParseLabels(`{ping="pong"}`),
Chunks: buildChunkMetas(int64(indexStartYesterday), int64(indexStartYesterday+99)),
},
}),
},
}
idx := newIndexShipperQuerier(mockIndexShipperIndexIterator{tables: tables}, config.TableRanges{
{
Start: 0,
End: math.MaxInt64,
PeriodConfig: &config.PeriodConfig{},
},
})
indexClient := NewIndexClient(idx, IndexClientOptions{UseBloomFilters: true})
for _, tc := range []struct {
name string
queryInterval model.Interval
expectedNumChunks uint64
expectedNumEntries uint64
expectedNumStreams uint64
}{
{
name: "request spanning 2 tables",
queryInterval: model.Interval{
Start: indexStartYesterday,
End: indexStartToday + 1000,
},
expectedNumChunks: 298, // 2 chunks not included at indexStartYesterday since start time is not inclusive
expectedNumEntries: 298,
expectedNumStreams: 2,
},
{
name: "request spanning just today",
queryInterval: model.Interval{
Start: indexStartToday,
End: indexStartToday + 1000,
},
expectedNumChunks: 99, // 1 chunk not included at indexStartToday since start time is not inclusive
expectedNumEntries: 99,
expectedNumStreams: 1,
},
{
name: "request selecting just few of the chunks from today",
queryInterval: model.Interval{
Start: indexStartToday + 50,
End: indexStartToday + 60,
},
expectedNumChunks: 9, // start and end are not inclusive
expectedNumEntries: 9,
expectedNumStreams: 1,
},
{
name: "request not touching any chunks",
queryInterval: model.Interval{
Start: indexStartToday + 2000,
End: indexStartToday + 3000,
},
},
} {
t.Run(tc.name, func(t *testing.T) {
stats, err := indexClient.Stats(context.Background(), "", tc.queryInterval.Start, tc.queryInterval.End, labels.MustNewMatcher(labels.MatchEqual, "foo", "bar"))
require.NoError(t, err)
require.Equal(t, tc.expectedNumEntries, stats.Chunks)
require.Equal(t, tc.expectedNumEntries, stats.Entries)
require.Equal(t, tc.expectedNumStreams, stats.Streams)
})
}
}

@ -10,19 +10,22 @@ import (
"github.com/grafana/loki/pkg/storage/chunk"
"github.com/grafana/loki/pkg/storage/config"
"github.com/grafana/loki/pkg/storage/stores/indexshipper"
shipper_index "github.com/grafana/loki/pkg/storage/stores/indexshipper/index"
"github.com/grafana/loki/pkg/storage/stores/tsdb/index"
)
type indexShipperIterator interface {
ForEach(ctx context.Context, tableName, userID string, callback shipper_index.ForEachIndexCallback) error
}
// indexShipperQuerier is used for querying index from the shipper.
type indexShipperQuerier struct {
shipper indexshipper.IndexShipper
shipper indexShipperIterator
chunkFilter chunk.RequestChunkFilterer
tableRanges config.TableRanges
}
func newIndexShipperQuerier(shipper indexshipper.IndexShipper, tableRanges config.TableRanges) Index {
func newIndexShipperQuerier(shipper indexShipperIterator, tableRanges config.TableRanges) Index {
return &indexShipperQuerier{shipper: shipper, tableRanges: tableRanges}
}
@ -112,11 +115,11 @@ func (i *indexShipperQuerier) LabelValues(ctx context.Context, userID string, fr
return idx.LabelValues(ctx, userID, from, through, name, matchers...)
}
func (i *indexShipperQuerier) Stats(ctx context.Context, userID string, from, through model.Time, acc IndexStatsAccumulator, shard *index.ShardAnnotation, matchers ...*labels.Matcher) error {
func (i *indexShipperQuerier) Stats(ctx context.Context, userID string, from, through model.Time, acc IndexStatsAccumulator, shard *index.ShardAnnotation, shouldIncludeChunk shouldIncludeChunk, matchers ...*labels.Matcher) error {
idx, err := i.indices(ctx, from, through, userID)
if err != nil {
return err
}
return idx.Stats(ctx, userID, from, through, acc, shard, matchers...)
return idx.Stats(ctx, userID, from, through, acc, shard, shouldIncludeChunk, matchers...)
}

@ -65,10 +65,10 @@ func (f LazyIndex) LabelValues(ctx context.Context, userID string, from, through
return i.LabelValues(ctx, userID, from, through, name, matchers...)
}
func (f LazyIndex) Stats(ctx context.Context, userID string, from, through model.Time, acc IndexStatsAccumulator, shard *index.ShardAnnotation, matchers ...*labels.Matcher) error {
func (f LazyIndex) Stats(ctx context.Context, userID string, from, through model.Time, acc IndexStatsAccumulator, shard *index.ShardAnnotation, shouldIncludeChunk shouldIncludeChunk, matchers ...*labels.Matcher) error {
i, err := f()
if err != nil {
return err
}
return i.Stats(ctx, userID, from, through, acc, shard, matchers...)
return i.Stats(ctx, userID, from, through, acc, shard, shouldIncludeChunk, matchers...)
}

@ -236,9 +236,9 @@ func (i *MultiIndex) LabelValues(ctx context.Context, userID string, from, throu
return results, nil
}
func (i *MultiIndex) Stats(ctx context.Context, userID string, from, through model.Time, acc IndexStatsAccumulator, shard *index.ShardAnnotation, matchers ...*labels.Matcher) error {
func (i *MultiIndex) Stats(ctx context.Context, userID string, from, through model.Time, acc IndexStatsAccumulator, shard *index.ShardAnnotation, shouldIncludeChunk shouldIncludeChunk, matchers ...*labels.Matcher) error {
_, err := i.forIndices(ctx, from, through, func(ctx context.Context, idx Index) (interface{}, error) {
return nil, idx.Stats(ctx, userID, from, through, acc, shard, matchers...)
return nil, idx.Stats(ctx, userID, from, through, acc, shard, shouldIncludeChunk, matchers...)
})
return err
}

@ -61,7 +61,7 @@ func TestMultiIndex(t *testing.T) {
var indices []Index
dir := t.TempDir()
for i := 0; i < n; i++ {
indices = append(indices, BuildIndex(t, dir, "fake", cases))
indices = append(indices, BuildIndex(t, dir, cases))
}
idx, err := NewMultiIndex(indices...)

@ -89,6 +89,6 @@ func (m *MultiTenantIndex) LabelValues(ctx context.Context, userID string, from,
return m.idx.LabelValues(ctx, userID, from, through, name, withTenantLabelMatcher(userID, matchers)...)
}
func (m *MultiTenantIndex) Stats(ctx context.Context, userID string, from, through model.Time, acc IndexStatsAccumulator, shard *index.ShardAnnotation, matchers ...*labels.Matcher) error {
return m.idx.Stats(ctx, userID, from, through, acc, shard, withTenantLabelMatcher(userID, matchers)...)
func (m *MultiTenantIndex) Stats(ctx context.Context, userID string, from, through model.Time, acc IndexStatsAccumulator, shard *index.ShardAnnotation, shouldIncludeChunk shouldIncludeChunk, matchers ...*labels.Matcher) error {
return m.idx.Stats(ctx, userID, from, through, acc, shard, shouldIncludeChunk, withTenantLabelMatcher(userID, matchers)...)
}

@ -263,15 +263,13 @@ func (i *TSDBIndex) Identifier(string) SingleTenantTSDBIdentifier {
}
}
func (i *TSDBIndex) Stats(ctx context.Context, userID string, from, through model.Time, acc IndexStatsAccumulator, shard *index.ShardAnnotation, matchers ...*labels.Matcher) error {
queryBounds := newBounds(from, through)
func (i *TSDBIndex) Stats(ctx context.Context, userID string, from, through model.Time, acc IndexStatsAccumulator, shard *index.ShardAnnotation, shouldIncludeChunk shouldIncludeChunk, matchers ...*labels.Matcher) error {
if err := i.forSeries(ctx, shard,
func(ls labels.Labels, fp model.Fingerprint, chks []index.ChunkMeta) {
// TODO(owen-d): use logarithmic approach
var addedStream bool
for _, chk := range chks {
if Overlap(queryBounds, chk) {
if shouldIncludeChunk(chk) {
if !addedStream {
acc.AddStream(fp)
addedStream = true

@ -64,7 +64,7 @@ func TestSingleIdx(t *testing.T) {
{
desc: "file",
fn: func() Index {
return BuildIndex(t, t.TempDir(), "fake", cases)
return BuildIndex(t, t.TempDir(), cases)
},
},
{

@ -17,7 +17,7 @@ type LoadableSeries struct {
Chunks index.ChunkMetas
}
func BuildIndex(t *testing.T, dir, tenant string, cases []LoadableSeries) *TSDBFile {
func BuildIndex(t testing.TB, dir string, cases []LoadableSeries) *TSDBFile {
b := NewBuilder()
for _, s := range cases {

@ -84,3 +84,29 @@ func NewDisableableTicker(interval time.Duration) (func(), <-chan time.Time) {
tick := time.NewTicker(interval)
return func() { tick.Stop() }, tick.C
}
// ForInterval splits the given start and end time into given interval.
// The start and end time in splits would be aligned to the interval
// except for the start time of first split and end time of last split which would be kept same as original start/end
// When endTimeInclusive is true, it would keep a gap of 1ms between the splits.
func ForInterval(interval time.Duration, start, end time.Time, endTimeInclusive bool, callback func(start, end time.Time)) {
ogStart := start
startNs := start.UnixNano()
start = time.Unix(0, startNs-startNs%interval.Nanoseconds())
firstInterval := true
for start := start; start.Before(end); start = start.Add(interval) {
newEnd := start.Add(interval)
if !newEnd.Before(end) {
newEnd = end
} else if endTimeInclusive {
newEnd = newEnd.Add(-time.Millisecond)
}
if firstInterval {
callback(ogStart, newEnd)
firstInterval = false
continue
}
callback(start, newEnd)
}
}

@ -131,3 +131,99 @@ func TestNewDisableableTicker_Disabled(t *testing.T) {
break
}
}
type timeInterval struct {
from, through time.Time
}
func TestForInterval(t *testing.T) {
splitInterval := 10 * time.Second
for _, tc := range []struct {
name string
inp timeInterval
expectedIntervals []timeInterval
endTimeInclusive bool
}{
{
name: "range smaller than split interval",
inp: timeInterval{
from: time.Unix(5, 0),
through: time.Unix(8, 0),
},
expectedIntervals: []timeInterval{
{
from: time.Unix(5, 0),
through: time.Unix(8, 0),
},
},
},
{
name: "range exactly equal and aligned to split interval",
inp: timeInterval{
from: time.Unix(10, 0),
through: time.Unix(20, 0),
},
expectedIntervals: []timeInterval{
{
from: time.Unix(10, 0),
through: time.Unix(20, 0),
},
},
},
{
name: "multiple splits with end time not inclusive",
inp: timeInterval{
from: time.Unix(5, 0),
through: time.Unix(28, 0),
},
expectedIntervals: []timeInterval{
{
from: time.Unix(5, 0),
through: time.Unix(10, 0),
},
{
from: time.Unix(10, 0),
through: time.Unix(20, 0),
},
{
from: time.Unix(20, 0),
through: time.Unix(28, 0),
},
},
},
{
name: "multiple splits with end time inclusive",
inp: timeInterval{
from: time.Unix(5, 0),
through: time.Unix(28, 0),
},
endTimeInclusive: true,
expectedIntervals: []timeInterval{
{
from: time.Unix(5, 0),
through: time.Unix(10, 0).Add(-time.Millisecond),
},
{
from: time.Unix(10, 0),
through: time.Unix(20, 0).Add(-time.Millisecond),
},
{
from: time.Unix(20, 0),
through: time.Unix(28, 0),
},
},
},
} {
t.Run(tc.name, func(t *testing.T) {
var actualIntervals []timeInterval
ForInterval(splitInterval, tc.inp.from, tc.inp.through, tc.endTimeInclusive, func(start, end time.Time) {
actualIntervals = append(actualIntervals, timeInterval{
from: start,
through: end,
})
})
require.Equal(t, tc.expectedIntervals, actualIntervals)
})
}
}

Loading…
Cancel
Save