avoid using bloomfilters for chunks in stats calls by avoiding duplicates (#7209)

**What this PR does / why we need it**: Avoid using bloomfilters for chunks deduplication in tsdb `Stats` calls by avoiding fetching duplicate entries. The idea is to split and align queries by [ObjectStorageIndexRequiredPeriod](61794710a7/pkg/storage/config/schema_config.go (L47)) and make each split process chunks with a start time >= start time of the table interval. In other terms, table interval that contains start time of the chunk, owns it. For e.g. if the table interval is 10s, and we have chunks 5-7, 8-12, 11-13. Query with range 6-15 would be split into 6-10, 10-15. query1 would process chunks 5-7, 8-12 and query2 would process chunks 11-13. This check is not applied for the first split so that we do not eliminate any chunks that overlaps the original query intervals but starts at the previous table. For e.g. if the table interval is 10s, and we have chunks 5-7, 8-13, 14-13. Query with range 11-12 should process chunk 8-13 even though its start time <= start time of table we will query for index. The caveat here is that we will overestimate the data we will be processing if the index is not compacted yet since it could have duplicate chunks when RF > 1. I think it is okay since the Stats call is just an estimation and need not be accurate. Removing all the extra processing saves us quite a bit of CPU and memory, as seen from the benchmark comparison between the two implementations: ``` name old time/op new time/op delta IndexClient_Stats-10 187µs ± 0% 34µs ± 1% -82.00% (p=0.008 n=5+5) name old alloc/op new alloc/op delta IndexClient_Stats-10 61.5kB ± 4% 12.5kB ± 2% -79.69% (p=0.008 n=5+5) name old allocs/op new allocs/op delta IndexClient_Stats-10 1.46k ± 0% 0.48k ± 0% -67.28% (p=0.008 n=5+5) ``` **Checklist** - [x] Tests updated
3 years ago · 7f298ff72f
parent bac52165d3
commit 7f298ff72f
19 changed files with 406 additions and 96 deletions
--- a/pkg/querier/queryrange/split_by_interval.go
+++ b/pkg/querier/queryrange/split_by_interval.go
@ -230,7 +230,7 @@ func splitByTime(req queryrangebase.Request, interval time.Duration) ([]queryran

 	switch r := req.(type) {
 	case *LokiRequest:
-		forInterval(interval, r.StartTs, r.EndTs, false, func(start, end time.Time) {
+		util.ForInterval(interval, r.StartTs, r.EndTs, false, func(start, end time.Time) {
 			reqs = append(reqs, &LokiRequest{
 				Query:     r.Query,
 				Limit:     r.Limit,
@ -243,7 +243,10 @@ func splitByTime(req queryrangebase.Request, interval time.Duration) ([]queryran
 			})
 		})
 	case *LokiSeriesRequest:
-		forInterval(interval, r.StartTs, r.EndTs, true, func(start, end time.Time) {
+		// metadata queries have end time inclusive.
+		// Set endTimeInclusive to true so that ForInterval keeps a gap of 1ms between splits to
+		// avoid querying duplicate data in adjacent queries.
+		util.ForInterval(interval, r.StartTs, r.EndTs, true, func(start, end time.Time) {
 			reqs = append(reqs, &LokiSeriesRequest{
 				Match:   r.Match,
 				Path:    r.Path,
@ -253,7 +256,10 @@ func splitByTime(req queryrangebase.Request, interval time.Duration) ([]queryran
 			})
 		})
 	case *LokiLabelNamesRequest:
-		forInterval(interval, r.StartTs, r.EndTs, true, func(start, end time.Time) {
+		// metadata queries have end time inclusive.
+		// Set endTimeInclusive to true so that ForInterval keeps a gap of 1ms between splits to
+		// avoid querying duplicate data in adjacent queries.
+		util.ForInterval(interval, r.StartTs, r.EndTs, true, func(start, end time.Time) {
 			reqs = append(reqs, &LokiLabelNamesRequest{
 				Path:    r.Path,
 				StartTs: start,
@ -266,34 +272,6 @@ func splitByTime(req queryrangebase.Request, interval time.Duration) ([]queryran
 	return reqs, nil
 }

-// forInterval splits the given start and end time into given interval.
-// When endTimeInclusive is true, it would keep a gap of 1ms between the splits.
-// The only queries that have both start and end time inclusive are metadata queries,
-// and without keeping a gap, we would end up querying duplicate data in adjacent queries.
-func forInterval(interval time.Duration, start, end time.Time, endTimeInclusive bool, callback func(start, end time.Time)) {
-	// align the start time by split interval for better query performance of metadata queries and
-	// better cache-ability of query types that are cached.
-	ogStart := start
-	startNs := start.UnixNano()
-	start = time.Unix(0, startNs-startNs%interval.Nanoseconds())
-	firstInterval := true
-
-	for start := start; start.Before(end); start = start.Add(interval) {
-		newEnd := start.Add(interval)
-		if !newEnd.Before(end) {
-			newEnd = end
-		} else if endTimeInclusive {
-			newEnd = newEnd.Add(-time.Millisecond)
-		}
-		if firstInterval {
-			callback(ogStart, newEnd)
-			firstInterval = false
-			continue
-		}
-		callback(start, newEnd)
-	}
-}
-
 // maxRangeVectorDuration returns the maximum range vector duration within a LogQL query.
 func maxRangeVectorDuration(q string) (time.Duration, error) {
 	expr, err := syntax.ParseSampleExpr(q)
@ -347,7 +325,7 @@ func splitMetricByTime(r queryrangebase.Request, interval time.Duration) ([]quer

 	// step is >= configured split interval, let us just split the query interval by step
 	if lokiReq.Step >= interval.Milliseconds() {
-		forInterval(time.Duration(lokiReq.Step*1e6), lokiReq.StartTs, lokiReq.EndTs, false, func(start, end time.Time) {
+		util.ForInterval(time.Duration(lokiReq.Step*1e6), lokiReq.StartTs, lokiReq.EndTs, false, func(start, end time.Time) {
 			reqs = append(reqs, &LokiRequest{
 				Query:     lokiReq.Query,
 				Limit:     lokiReq.Limit,
--- a/pkg/storage/stores/index/stats/stats.go
+++ b/pkg/storage/stores/index/stats/stats.go
@ -44,7 +44,6 @@ func (p *PoolBloom) Get() *Blooms {

 func (p *PoolBloom) Put(x *Blooms) {
 	x.Streams.ClearAll()
-	x.Chunks.ClearAll()
 	x.stats = &Stats{}
 	p.pool.Put(x)
 }
@ -70,11 +69,8 @@ func (p *PoolBloom) Put(x *Blooms) {
 func newBlooms() *Blooms {
 	// 1 million streams @ 1% error =~ 1.14MB
 	streams := bloom.NewWithEstimates(1e6, 0.01)
-	// 10 million chunks @ 1% error =~ 11.43MB
-	chunks := bloom.NewWithEstimates(10e6, 0.01)
 	return &Blooms{
 		Streams: streams,
-		Chunks:  chunks,
 		stats:   &Stats{},
 	}
 }
@ -86,8 +82,8 @@ func newBlooms() *Blooms {
 // statistics prior to running queries.
 type Blooms struct {
 	sync.RWMutex
-	Streams, Chunks *bloom.BloomFilter
-	stats           *Stats
+	Streams *bloom.BloomFilter
+	stats   *Stats
 }

 func (b *Blooms) Stats() Stats { return b.stats.Stats() }
@ -101,16 +97,7 @@ func (b *Blooms) AddStream(fp model.Fingerprint) {
 }

 func (b *Blooms) AddChunk(fp model.Fingerprint, chk index.ChunkMeta) {
-	// fingerprint + mintime + maxtime + checksum
-	ln := 8 + 8 + 8 + 4
-	key := make([]byte, ln)
-	binary.BigEndian.PutUint64(key, uint64(fp))
-	binary.BigEndian.PutUint64(key[8:], uint64(chk.MinTime))
-	binary.BigEndian.PutUint64(key[16:], uint64(chk.MaxTime))
-	binary.BigEndian.PutUint32(key[24:], chk.Checksum)
-	b.add(b.Chunks, key, func() {
-		b.stats.AddChunk(fp, chk)
-	})
+	b.stats.AddChunk(fp, chk)
 }

 func (b *Blooms) add(filter *bloom.BloomFilter, key []byte, update func()) {
--- a/pkg/storage/stores/index/stats/stats_test.go
+++ b/pkg/storage/stores/index/stats/stats_test.go
@ -6,8 +6,6 @@ import (

 	"github.com/prometheus/common/model"
 	"github.com/stretchr/testify/require"
-
-	"github.com/grafana/loki/pkg/storage/stores/tsdb/index"
 )

 func TestStatsBloom_Stream(t *testing.T) {
@ -24,24 +22,3 @@ func TestStatsBloom_Stream(t *testing.T) {

 	require.Equal(t, uint64(2), sb.stats.Streams)
 }
-
-func TestStatsBloom_Chunks(t *testing.T) {
-	sb := BloomPool.Get()
-	var wg sync.WaitGroup
-	for i := 0; i < 10; i++ {
-		wg.Add(1)
-		go func(x int) {
-			sb.AddChunk(model.Fingerprint(x%2), index.ChunkMeta{
-				Checksum: uint32(x) % 4,
-				KB:       1,
-				Entries:  1,
-			})
-			wg.Done()
-		}(i)
-	}
-	wg.Wait()
-
-	require.Equal(t, 4, int(sb.stats.Chunks))
-	require.Equal(t, 4<<10, int(sb.stats.Bytes))
-	require.Equal(t, 4, int(sb.stats.Entries))
-}
--- a/pkg/storage/stores/tsdb/compact_test.go
+++ b/pkg/storage/stores/tsdb/compact_test.go
@ -354,7 +354,7 @@ func TestCompactor(t *testing.T) {

 			var indices []*TSDBIndex
 			for _, cases := range tc.input {
-				idx := BuildIndex(t, dir, "fake", cases)
+				idx := BuildIndex(t, dir, cases)
 				defer idx.Close()
 				casted, ok := idx.Index.(*TSDBIndex)
 				require.Equal(t, true, ok)
--- a/pkg/storage/stores/tsdb/compactor_test.go
+++ b/pkg/storage/stores/tsdb/compactor_test.go
@ -199,6 +199,7 @@ func buildChunkMetas(from, to int64) index.ChunkMetas {
 			MinTime:  i,
 			MaxTime:  i,
 			Checksum: uint32(i),
+			Entries:  1,
 		})
 	}

@ -877,5 +878,5 @@ type dummyChunkData struct {
 }

 func (d dummyChunkData) Entries() int {
-	return 0
+	return 1
 }
--- a/pkg/storage/stores/tsdb/head_manager.go
+++ b/pkg/storage/stores/tsdb/head_manager.go
@ -710,12 +710,12 @@ func (t *tenantHeads) LabelValues(ctx context.Context, userID string, from, thro

 }

-func (t *tenantHeads) Stats(ctx context.Context, userID string, from, through model.Time, acc IndexStatsAccumulator, shard *index.ShardAnnotation, matchers ...*labels.Matcher) error {
+func (t *tenantHeads) Stats(ctx context.Context, userID string, from, through model.Time, acc IndexStatsAccumulator, shard *index.ShardAnnotation, shouldIncludeChunk shouldIncludeChunk, matchers ...*labels.Matcher) error {
 	idx, ok := t.tenantIndex(userID, from, through)
 	if !ok {
 		return nil
 	}
-	return idx.Stats(ctx, userID, from, through, acc, shard, matchers...)
+	return idx.Stats(ctx, userID, from, through, acc, shard, shouldIncludeChunk, matchers...)
 }

 // helper only used in building TSDBs
--- a/pkg/storage/stores/tsdb/index.go
+++ b/pkg/storage/stores/tsdb/index.go
@ -31,6 +31,8 @@ func (r ChunkRef) Less(x ChunkRef) bool {
 	return r.End <= x.End
 }

+type shouldIncludeChunk func(index.ChunkMeta) bool
+
 type Index interface {
 	Bounded
 	SetChunkFilterer(chunkFilter chunk.RequestChunkFilterer)
@ -50,7 +52,7 @@ type Index interface {
 	Series(ctx context.Context, userID string, from, through model.Time, res []Series, shard *index.ShardAnnotation, matchers ...*labels.Matcher) ([]Series, error)
 	LabelNames(ctx context.Context, userID string, from, through model.Time, matchers ...*labels.Matcher) ([]string, error)
 	LabelValues(ctx context.Context, userID string, from, through model.Time, name string, matchers ...*labels.Matcher) ([]string, error)
-	Stats(ctx context.Context, userID string, from, through model.Time, acc IndexStatsAccumulator, shard *index.ShardAnnotation, matchers ...*labels.Matcher) error
+	Stats(ctx context.Context, userID string, from, through model.Time, acc IndexStatsAccumulator, shard *index.ShardAnnotation, shouldIncludeChunk shouldIncludeChunk, matchers ...*labels.Matcher) error
 }

 type NoopIndex struct{}
@ -72,7 +74,7 @@ func (NoopIndex) LabelValues(ctx context.Context, userID string, from, through m
 	return nil, nil
 }

-func (NoopIndex) Stats(ctx context.Context, userID string, from, through model.Time, acc IndexStatsAccumulator, shard *index.ShardAnnotation, matchers ...*labels.Matcher) error {
+func (NoopIndex) Stats(ctx context.Context, userID string, from, through model.Time, acc IndexStatsAccumulator, shard *index.ShardAnnotation, shouldIncludeChunk shouldIncludeChunk, matchers ...*labels.Matcher) error {
 	return nil
 }

--- a/pkg/storage/stores/tsdb/index_client.go
+++ b/pkg/storage/stores/tsdb/index_client.go
@ -2,6 +2,7 @@ package tsdb

 import (
 	"context"
+	"time"

 	"github.com/prometheus/common/model"
 	"github.com/prometheus/prometheus/model/labels"
@ -10,8 +11,10 @@ import (
 	"github.com/grafana/loki/pkg/logql/syntax"
 	"github.com/grafana/loki/pkg/querier/astmapper"
 	"github.com/grafana/loki/pkg/storage/chunk"
+	"github.com/grafana/loki/pkg/storage/config"
 	"github.com/grafana/loki/pkg/storage/stores/index/stats"
 	"github.com/grafana/loki/pkg/storage/stores/tsdb/index"
+	"github.com/grafana/loki/pkg/util"
 	"github.com/grafana/loki/pkg/util/spanlogger"
 )

@ -171,6 +174,27 @@ func (c *IndexClient) Stats(ctx context.Context, userID string, from, through mo
 		return nil, err
 	}

+	// split the query range to align with table intervals i.e. ObjectStorageIndexRequiredPeriod
+	// This is to avoid explicitly deduping chunks by leveraging the table intervals.
+	// The idea is to make each split process chunks that have start time >= start time of the table interval.
+	// In other terms, table interval that contains start time of the chunk, owns it.
+	// For e.g. if the table interval is 10s, and we have chunks 5-7, 8-12, 11-13.
+	// Query with range 6-15 would be split into 6-10, 10-15.
+	// query1 would process chunks 5-7, 8-12 and query2 would process chunks 11-13.
+	// This check is not applied for first query of the split so that
+	// we do not eliminate any chunks that overlaps the original query intervals but starts at the previous table.
+	// For e.g. if the table interval is 10s, and we have chunks 5-7, 8-13, 14-13.
+	// Query with range 11-12 should process chunk 8-13 even though its start time <= start time of table we will query for index.
+	// The caveat here is that we will overestimate the data we will be processing if the index is not compacted yet
+	// since it could have duplicate chunks when RF > 1
+	var intervals []model.Interval
+	util.ForInterval(config.ObjectStorageIndexRequiredPeriod, from.Time(), through.Time(), true, func(start, end time.Time) {
+		intervals = append(intervals, model.Interval{
+			Start: model.TimeFromUnixNano(start.UnixNano()),
+			End:   model.TimeFromUnixNano(end.UnixNano()),
+		})
+	})
+
 	var acc IndexStatsAccumulator
 	if c.opts.UseBloomFilters {
 		blooms := stats.BloomPool.Get()
@ -179,7 +203,23 @@ func (c *IndexClient) Stats(ctx context.Context, userID string, from, through mo
 	} else {
 		acc = &stats.Stats{}
 	}
-	err = c.idx.Stats(ctx, userID, from, through, acc, shard, matchers...)
+
+	queryBounds := newBounds(from, through)
+
+	for idx, interval := range intervals {
+		if err := c.idx.Stats(ctx, userID, interval.Start, interval.End, acc, shard, func(chk index.ChunkMeta) bool {
+			// for the first split, purely do overlap check to also include chunks having
+			// start time earlier than start time of the table interval we are querying.
+			// for all other splits, consider only chunks that have from >= interval.Start
+			// so that we start after the start time of the index table we are querying.
+			if Overlap(queryBounds, chk) && (idx == 0 || chk.From() >= interval.Start) {
+				return true
+			}
+			return false
+		}, matchers...); err != nil {
+			return nil, err
+		}
+	}

 	if err != nil {
 		return nil, err
--- a/pkg/storage/stores/tsdb/index_client_test.go
+++ b/pkg/storage/stores/tsdb/index_client_test.go
@ -0,0 +1,202 @@
+package tsdb
+
+import (
+	"context"
+	"math"
+	"testing"
+	"time"
+
+	"github.com/prometheus/common/model"
+	"github.com/prometheus/prometheus/model/labels"
+	"github.com/stretchr/testify/require"
+
+	"github.com/grafana/loki/pkg/storage/config"
+	index_shipper "github.com/grafana/loki/pkg/storage/stores/indexshipper/index"
+)
+
+type mockIndexShipperIndexIterator struct {
+	tables map[string][]*TSDBFile
+}
+
+func (m mockIndexShipperIndexIterator) ForEach(ctx context.Context, tableName, userID string, callback index_shipper.ForEachIndexCallback) error {
+	indexes := m.tables[tableName]
+	for _, idx := range indexes {
+		if err := callback(false, idx); err != nil {
+			return err
+		}
+	}
+
+	return nil
+}
+
+func BenchmarkIndexClient_Stats(b *testing.B) {
+	tempDir := b.TempDir()
+	tableRanges := config.TableRanges{
+		{
+			Start: 0,
+			End:   math.MaxInt64,
+			PeriodConfig: &config.PeriodConfig{
+				IndexTables: config.PeriodicTableConfig{
+					Period: config.ObjectStorageIndexRequiredPeriod,
+				},
+			},
+		},
+	}
+
+	indexStartToday := model.TimeFromUnixNano(time.Now().Truncate(config.ObjectStorageIndexRequiredPeriod).UnixNano())
+	indexStartYesterday := indexStartToday.Add(-config.ObjectStorageIndexRequiredPeriod)
+
+	tables := map[string][]*TSDBFile{
+		tableRanges[0].PeriodConfig.IndexTables.TableFor(indexStartToday): {
+			BuildIndex(b, tempDir, []LoadableSeries{
+				{
+					Labels: mustParseLabels(`{foo="bar"}`),
+					Chunks: buildChunkMetas(int64(indexStartToday), int64(indexStartToday+99)),
+				},
+			}),
+		},
+
+		tableRanges[0].PeriodConfig.IndexTables.TableFor(indexStartYesterday): {
+			BuildIndex(b, tempDir, []LoadableSeries{
+				{
+					Labels: mustParseLabels(`{foo="bar"}`),
+					Chunks: buildChunkMetas(int64(indexStartYesterday), int64(indexStartYesterday+99)),
+				},
+			}),
+		},
+	}
+
+	idx := newIndexShipperQuerier(mockIndexShipperIndexIterator{tables: tables}, config.TableRanges{
+		{
+			Start:        0,
+			End:          math.MaxInt64,
+			PeriodConfig: &config.PeriodConfig{},
+		},
+	})
+
+	indexClient := NewIndexClient(idx, IndexClientOptions{UseBloomFilters: true})
+
+	b.ResetTimer()
+	b.ReportAllocs()
+	for i := 0; i < b.N; i++ {
+		stats, err := indexClient.Stats(context.Background(), "", indexStartYesterday-1000, model.Now()+1000, labels.MustNewMatcher(labels.MatchEqual, "foo", "bar"))
+		require.NoError(b, err)
+		require.Equal(b, uint64(200), stats.Chunks)
+		require.Equal(b, uint64(200), stats.Entries)
+	}
+
+}
+
+func TestIndexClient_Stats(t *testing.T) {
+	tempDir := t.TempDir()
+	tableRanges := config.TableRanges{
+		{
+			Start: 0,
+			End:   math.MaxInt64,
+			PeriodConfig: &config.PeriodConfig{
+				IndexTables: config.PeriodicTableConfig{
+					Period: config.ObjectStorageIndexRequiredPeriod,
+				},
+			},
+		},
+	}
+
+	indexStartToday := model.TimeFromUnixNano(time.Now().Truncate(config.ObjectStorageIndexRequiredPeriod).UnixNano())
+	indexStartYesterday := indexStartToday.Add(-config.ObjectStorageIndexRequiredPeriod)
+
+	tables := map[string][]*TSDBFile{
+		tableRanges[0].PeriodConfig.IndexTables.TableFor(indexStartToday): {
+			BuildIndex(t, tempDir, []LoadableSeries{
+				{
+					Labels: mustParseLabels(`{foo="bar"}`),
+					Chunks: buildChunkMetas(int64(indexStartToday), int64(indexStartToday+99)),
+				},
+				{
+					Labels: mustParseLabels(`{fizz="buzz"}`),
+					Chunks: buildChunkMetas(int64(indexStartToday), int64(indexStartToday+99)),
+				},
+			}),
+		},
+
+		tableRanges[0].PeriodConfig.IndexTables.TableFor(indexStartYesterday): {
+			BuildIndex(t, tempDir, []LoadableSeries{
+				{
+					Labels: mustParseLabels(`{foo="bar"}`),
+					Chunks: buildChunkMetas(int64(indexStartYesterday), int64(indexStartYesterday+99)),
+				},
+				{
+					Labels: mustParseLabels(`{foo="bar", fizz="buzz"}`),
+					Chunks: buildChunkMetas(int64(indexStartYesterday), int64(indexStartYesterday+99)),
+				},
+				{
+					Labels: mustParseLabels(`{ping="pong"}`),
+					Chunks: buildChunkMetas(int64(indexStartYesterday), int64(indexStartYesterday+99)),
+				},
+			}),
+		},
+	}
+
+	idx := newIndexShipperQuerier(mockIndexShipperIndexIterator{tables: tables}, config.TableRanges{
+		{
+			Start:        0,
+			End:          math.MaxInt64,
+			PeriodConfig: &config.PeriodConfig{},
+		},
+	})
+
+	indexClient := NewIndexClient(idx, IndexClientOptions{UseBloomFilters: true})
+
+	for _, tc := range []struct {
+		name               string
+		queryInterval      model.Interval
+		expectedNumChunks  uint64
+		expectedNumEntries uint64
+		expectedNumStreams uint64
+	}{
+		{
+			name: "request spanning 2 tables",
+			queryInterval: model.Interval{
+				Start: indexStartYesterday,
+				End:   indexStartToday + 1000,
+			},
+			expectedNumChunks:  298, // 2 chunks not included at indexStartYesterday since start time is not inclusive
+			expectedNumEntries: 298,
+			expectedNumStreams: 2,
+		},
+		{
+			name: "request spanning just today",
+			queryInterval: model.Interval{
+				Start: indexStartToday,
+				End:   indexStartToday + 1000,
+			},
+			expectedNumChunks:  99, // 1 chunk not included at indexStartToday since start time is not inclusive
+			expectedNumEntries: 99,
+			expectedNumStreams: 1,
+		},
+		{
+			name: "request selecting just few of the chunks from today",
+			queryInterval: model.Interval{
+				Start: indexStartToday + 50,
+				End:   indexStartToday + 60,
+			},
+			expectedNumChunks:  9, // start and end are not inclusive
+			expectedNumEntries: 9,
+			expectedNumStreams: 1,
+		},
+		{
+			name: "request not touching any chunks",
+			queryInterval: model.Interval{
+				Start: indexStartToday + 2000,
+				End:   indexStartToday + 3000,
+			},
+		},
+	} {
+		t.Run(tc.name, func(t *testing.T) {
+			stats, err := indexClient.Stats(context.Background(), "", tc.queryInterval.Start, tc.queryInterval.End, labels.MustNewMatcher(labels.MatchEqual, "foo", "bar"))
+			require.NoError(t, err)
+			require.Equal(t, tc.expectedNumEntries, stats.Chunks)
+			require.Equal(t, tc.expectedNumEntries, stats.Entries)
+			require.Equal(t, tc.expectedNumStreams, stats.Streams)
+		})
+	}
+}
--- a/pkg/storage/stores/tsdb/index_shipper_querier.go
+++ b/pkg/storage/stores/tsdb/index_shipper_querier.go
@ -10,19 +10,22 @@ import (

 	"github.com/grafana/loki/pkg/storage/chunk"
 	"github.com/grafana/loki/pkg/storage/config"
-	"github.com/grafana/loki/pkg/storage/stores/indexshipper"
 	shipper_index "github.com/grafana/loki/pkg/storage/stores/indexshipper/index"
 	"github.com/grafana/loki/pkg/storage/stores/tsdb/index"
 )

+type indexShipperIterator interface {
+	ForEach(ctx context.Context, tableName, userID string, callback shipper_index.ForEachIndexCallback) error
+}
+
 // indexShipperQuerier is used for querying index from the shipper.
 type indexShipperQuerier struct {
-	shipper     indexshipper.IndexShipper
+	shipper     indexShipperIterator
 	chunkFilter chunk.RequestChunkFilterer
 	tableRanges config.TableRanges
 }

-func newIndexShipperQuerier(shipper indexshipper.IndexShipper, tableRanges config.TableRanges) Index {
+func newIndexShipperQuerier(shipper indexShipperIterator, tableRanges config.TableRanges) Index {
 	return &indexShipperQuerier{shipper: shipper, tableRanges: tableRanges}
 }

@ -112,11 +115,11 @@ func (i *indexShipperQuerier) LabelValues(ctx context.Context, userID string, fr
 	return idx.LabelValues(ctx, userID, from, through, name, matchers...)
 }

-func (i *indexShipperQuerier) Stats(ctx context.Context, userID string, from, through model.Time, acc IndexStatsAccumulator, shard *index.ShardAnnotation, matchers ...*labels.Matcher) error {
+func (i *indexShipperQuerier) Stats(ctx context.Context, userID string, from, through model.Time, acc IndexStatsAccumulator, shard *index.ShardAnnotation, shouldIncludeChunk shouldIncludeChunk, matchers ...*labels.Matcher) error {
 	idx, err := i.indices(ctx, from, through, userID)
 	if err != nil {
 		return err
 	}

-	return idx.Stats(ctx, userID, from, through, acc, shard, matchers...)
+	return idx.Stats(ctx, userID, from, through, acc, shard, shouldIncludeChunk, matchers...)
 }
--- a/pkg/storage/stores/tsdb/lazy_index.go
+++ b/pkg/storage/stores/tsdb/lazy_index.go
@ -65,10 +65,10 @@ func (f LazyIndex) LabelValues(ctx context.Context, userID string, from, through
 	return i.LabelValues(ctx, userID, from, through, name, matchers...)
 }

-func (f LazyIndex) Stats(ctx context.Context, userID string, from, through model.Time, acc IndexStatsAccumulator, shard *index.ShardAnnotation, matchers ...*labels.Matcher) error {
+func (f LazyIndex) Stats(ctx context.Context, userID string, from, through model.Time, acc IndexStatsAccumulator, shard *index.ShardAnnotation, shouldIncludeChunk shouldIncludeChunk, matchers ...*labels.Matcher) error {
 	i, err := f()
 	if err != nil {
 		return err
 	}
-	return i.Stats(ctx, userID, from, through, acc, shard, matchers...)
+	return i.Stats(ctx, userID, from, through, acc, shard, shouldIncludeChunk, matchers...)
 }
--- a/pkg/storage/stores/tsdb/multi_file_index.go
+++ b/pkg/storage/stores/tsdb/multi_file_index.go
@ -236,9 +236,9 @@ func (i *MultiIndex) LabelValues(ctx context.Context, userID string, from, throu
 	return results, nil
 }

-func (i *MultiIndex) Stats(ctx context.Context, userID string, from, through model.Time, acc IndexStatsAccumulator, shard *index.ShardAnnotation, matchers ...*labels.Matcher) error {
+func (i *MultiIndex) Stats(ctx context.Context, userID string, from, through model.Time, acc IndexStatsAccumulator, shard *index.ShardAnnotation, shouldIncludeChunk shouldIncludeChunk, matchers ...*labels.Matcher) error {
 	_, err := i.forIndices(ctx, from, through, func(ctx context.Context, idx Index) (interface{}, error) {
-		return nil, idx.Stats(ctx, userID, from, through, acc, shard, matchers...)
+		return nil, idx.Stats(ctx, userID, from, through, acc, shard, shouldIncludeChunk, matchers...)
 	})
 	return err
 }
--- a/pkg/storage/stores/tsdb/multi_file_index_test.go
+++ b/pkg/storage/stores/tsdb/multi_file_index_test.go
@ -61,7 +61,7 @@ func TestMultiIndex(t *testing.T) {
 	var indices []Index
 	dir := t.TempDir()
 	for i := 0; i < n; i++ {
-		indices = append(indices, BuildIndex(t, dir, "fake", cases))
+		indices = append(indices, BuildIndex(t, dir, cases))
 	}

 	idx, err := NewMultiIndex(indices...)
--- a/pkg/storage/stores/tsdb/multitenant.go
+++ b/pkg/storage/stores/tsdb/multitenant.go
@ -89,6 +89,6 @@ func (m *MultiTenantIndex) LabelValues(ctx context.Context, userID string, from,
 	return m.idx.LabelValues(ctx, userID, from, through, name, withTenantLabelMatcher(userID, matchers)...)
 }

-func (m *MultiTenantIndex) Stats(ctx context.Context, userID string, from, through model.Time, acc IndexStatsAccumulator, shard *index.ShardAnnotation, matchers ...*labels.Matcher) error {
-	return m.idx.Stats(ctx, userID, from, through, acc, shard, withTenantLabelMatcher(userID, matchers)...)
+func (m *MultiTenantIndex) Stats(ctx context.Context, userID string, from, through model.Time, acc IndexStatsAccumulator, shard *index.ShardAnnotation, shouldIncludeChunk shouldIncludeChunk, matchers ...*labels.Matcher) error {
+	return m.idx.Stats(ctx, userID, from, through, acc, shard, shouldIncludeChunk, withTenantLabelMatcher(userID, matchers)...)
 }
--- a/pkg/storage/stores/tsdb/single_file_index.go
+++ b/pkg/storage/stores/tsdb/single_file_index.go
@ -263,15 +263,13 @@ func (i *TSDBIndex) Identifier(string) SingleTenantTSDBIdentifier {
 	}
 }

-func (i *TSDBIndex) Stats(ctx context.Context, userID string, from, through model.Time, acc IndexStatsAccumulator, shard *index.ShardAnnotation, matchers ...*labels.Matcher) error {
-	queryBounds := newBounds(from, through)
-
+func (i *TSDBIndex) Stats(ctx context.Context, userID string, from, through model.Time, acc IndexStatsAccumulator, shard *index.ShardAnnotation, shouldIncludeChunk shouldIncludeChunk, matchers ...*labels.Matcher) error {
 	if err := i.forSeries(ctx, shard,
 		func(ls labels.Labels, fp model.Fingerprint, chks []index.ChunkMeta) {
 			// TODO(owen-d): use logarithmic approach
 			var addedStream bool
 			for _, chk := range chks {
-				if Overlap(queryBounds, chk) {
+				if shouldIncludeChunk(chk) {
 					if !addedStream {
 						acc.AddStream(fp)
 						addedStream = true
--- a/pkg/storage/stores/tsdb/single_file_index_test.go
+++ b/pkg/storage/stores/tsdb/single_file_index_test.go
@ -64,7 +64,7 @@ func TestSingleIdx(t *testing.T) {
 		{
 			desc: "file",
 			fn: func() Index {
-				return BuildIndex(t, t.TempDir(), "fake", cases)
+				return BuildIndex(t, t.TempDir(), cases)
 			},
 		},
 		{
--- a/pkg/storage/stores/tsdb/util_test.go
+++ b/pkg/storage/stores/tsdb/util_test.go
@ -17,7 +17,7 @@ type LoadableSeries struct {
 	Chunks index.ChunkMetas
 }

-func BuildIndex(t *testing.T, dir, tenant string, cases []LoadableSeries) *TSDBFile {
+func BuildIndex(t testing.TB, dir string, cases []LoadableSeries) *TSDBFile {
 	b := NewBuilder()

 	for _, s := range cases {
--- a/pkg/util/time.go
+++ b/pkg/util/time.go
@ -84,3 +84,29 @@ func NewDisableableTicker(interval time.Duration) (func(), <-chan time.Time) {
 	tick := time.NewTicker(interval)
 	return func() { tick.Stop() }, tick.C
 }
+
+// ForInterval splits the given start and end time into given interval.
+// The start and end time in splits would be aligned to the interval
+// except for the start time of first split and end time of last split which would be kept same as original start/end
+// When endTimeInclusive is true, it would keep a gap of 1ms between the splits.
+func ForInterval(interval time.Duration, start, end time.Time, endTimeInclusive bool, callback func(start, end time.Time)) {
+	ogStart := start
+	startNs := start.UnixNano()
+	start = time.Unix(0, startNs-startNs%interval.Nanoseconds())
+	firstInterval := true
+
+	for start := start; start.Before(end); start = start.Add(interval) {
+		newEnd := start.Add(interval)
+		if !newEnd.Before(end) {
+			newEnd = end
+		} else if endTimeInclusive {
+			newEnd = newEnd.Add(-time.Millisecond)
+		}
+		if firstInterval {
+			callback(ogStart, newEnd)
+			firstInterval = false
+			continue
+		}
+		callback(start, newEnd)
+	}
+}
--- a/pkg/util/time_test.go
+++ b/pkg/util/time_test.go
@ -131,3 +131,99 @@ func TestNewDisableableTicker_Disabled(t *testing.T) {
 		break
 	}
 }
+
+type timeInterval struct {
+	from, through time.Time
+}
+
+func TestForInterval(t *testing.T) {
+	splitInterval := 10 * time.Second
+	for _, tc := range []struct {
+		name              string
+		inp               timeInterval
+		expectedIntervals []timeInterval
+		endTimeInclusive  bool
+	}{
+		{
+			name: "range smaller than split interval",
+			inp: timeInterval{
+				from:    time.Unix(5, 0),
+				through: time.Unix(8, 0),
+			},
+			expectedIntervals: []timeInterval{
+				{
+					from:    time.Unix(5, 0),
+					through: time.Unix(8, 0),
+				},
+			},
+		},
+		{
+			name: "range exactly equal and aligned to split interval",
+			inp: timeInterval{
+				from:    time.Unix(10, 0),
+				through: time.Unix(20, 0),
+			},
+			expectedIntervals: []timeInterval{
+				{
+					from:    time.Unix(10, 0),
+					through: time.Unix(20, 0),
+				},
+			},
+		},
+		{
+			name: "multiple splits with end time not inclusive",
+			inp: timeInterval{
+				from:    time.Unix(5, 0),
+				through: time.Unix(28, 0),
+			},
+			expectedIntervals: []timeInterval{
+				{
+					from:    time.Unix(5, 0),
+					through: time.Unix(10, 0),
+				},
+				{
+					from:    time.Unix(10, 0),
+					through: time.Unix(20, 0),
+				},
+				{
+					from:    time.Unix(20, 0),
+					through: time.Unix(28, 0),
+				},
+			},
+		},
+		{
+			name: "multiple splits with end time inclusive",
+			inp: timeInterval{
+				from:    time.Unix(5, 0),
+				through: time.Unix(28, 0),
+			},
+			endTimeInclusive: true,
+			expectedIntervals: []timeInterval{
+				{
+					from:    time.Unix(5, 0),
+					through: time.Unix(10, 0).Add(-time.Millisecond),
+				},
+				{
+					from:    time.Unix(10, 0),
+					through: time.Unix(20, 0).Add(-time.Millisecond),
+				},
+				{
+					from:    time.Unix(20, 0),
+					through: time.Unix(28, 0),
+				},
+			},
+		},
+	} {
+		t.Run(tc.name, func(t *testing.T) {
+			var actualIntervals []timeInterval
+			ForInterval(splitInterval, tc.inp.from, tc.inp.through, tc.endTimeInclusive, func(start, end time.Time) {
+				actualIntervals = append(actualIntervals, timeInterval{
+					from:    start,
+					through: end,
+				})
+			})
+
+			require.Equal(t, tc.expectedIntervals, actualIntervals)
+		})
+	}
+}