mirror of https://github.com/grafana/loki
avoid using bloomfilters for chunks in stats calls by avoiding duplicates (#7209)
**What this PR does / why we need it**:
Avoid using bloomfilters for chunks deduplication in tsdb `Stats` calls
by avoiding fetching duplicate entries.
The idea is to split and align queries by
[ObjectStorageIndexRequiredPeriod](61794710a7/pkg/storage/config/schema_config.go (L47)
)
and make each split process chunks with a start time >= start time of
the table interval.
In other terms, table interval that contains start time of the chunk,
owns it.
For e.g. if the table interval is 10s, and we have chunks 5-7, 8-12,
11-13.
Query with range 6-15 would be split into 6-10, 10-15.
query1 would process chunks 5-7, 8-12 and query2 would process chunks
11-13.
This check is not applied for the first split so that we do not
eliminate any chunks that overlaps the original query intervals but
starts at the previous table.
For e.g. if the table interval is 10s, and we have chunks 5-7, 8-13,
14-13.
Query with range 11-12 should process chunk 8-13 even though its start
time <= start time of table we will query for index.
The caveat here is that we will overestimate the data we will be
processing if the index is not compacted yet since it could have
duplicate chunks when RF > 1. I think it is okay since the Stats call is
just an estimation and need not be accurate.
Removing all the extra processing saves us quite a bit of CPU and
memory, as seen from the benchmark comparison between the two
implementations:
```
name old time/op new time/op delta
IndexClient_Stats-10 187µs ± 0% 34µs ± 1% -82.00% (p=0.008 n=5+5)
name old alloc/op new alloc/op delta
IndexClient_Stats-10 61.5kB ± 4% 12.5kB ± 2% -79.69% (p=0.008 n=5+5)
name old allocs/op new allocs/op delta
IndexClient_Stats-10 1.46k ± 0% 0.48k ± 0% -67.28% (p=0.008 n=5+5)
```
**Checklist**
- [x] Tests updated
pull/7212/head
parent
bac52165d3
commit
7f298ff72f
@ -0,0 +1,202 @@ |
||||
package tsdb |
||||
|
||||
import ( |
||||
"context" |
||||
"math" |
||||
"testing" |
||||
"time" |
||||
|
||||
"github.com/prometheus/common/model" |
||||
"github.com/prometheus/prometheus/model/labels" |
||||
"github.com/stretchr/testify/require" |
||||
|
||||
"github.com/grafana/loki/pkg/storage/config" |
||||
index_shipper "github.com/grafana/loki/pkg/storage/stores/indexshipper/index" |
||||
) |
||||
|
||||
type mockIndexShipperIndexIterator struct { |
||||
tables map[string][]*TSDBFile |
||||
} |
||||
|
||||
func (m mockIndexShipperIndexIterator) ForEach(ctx context.Context, tableName, userID string, callback index_shipper.ForEachIndexCallback) error { |
||||
indexes := m.tables[tableName] |
||||
for _, idx := range indexes { |
||||
if err := callback(false, idx); err != nil { |
||||
return err |
||||
} |
||||
} |
||||
|
||||
return nil |
||||
} |
||||
|
||||
func BenchmarkIndexClient_Stats(b *testing.B) { |
||||
tempDir := b.TempDir() |
||||
tableRanges := config.TableRanges{ |
||||
{ |
||||
Start: 0, |
||||
End: math.MaxInt64, |
||||
PeriodConfig: &config.PeriodConfig{ |
||||
IndexTables: config.PeriodicTableConfig{ |
||||
Period: config.ObjectStorageIndexRequiredPeriod, |
||||
}, |
||||
}, |
||||
}, |
||||
} |
||||
|
||||
indexStartToday := model.TimeFromUnixNano(time.Now().Truncate(config.ObjectStorageIndexRequiredPeriod).UnixNano()) |
||||
indexStartYesterday := indexStartToday.Add(-config.ObjectStorageIndexRequiredPeriod) |
||||
|
||||
tables := map[string][]*TSDBFile{ |
||||
tableRanges[0].PeriodConfig.IndexTables.TableFor(indexStartToday): { |
||||
BuildIndex(b, tempDir, []LoadableSeries{ |
||||
{ |
||||
Labels: mustParseLabels(`{foo="bar"}`), |
||||
Chunks: buildChunkMetas(int64(indexStartToday), int64(indexStartToday+99)), |
||||
}, |
||||
}), |
||||
}, |
||||
|
||||
tableRanges[0].PeriodConfig.IndexTables.TableFor(indexStartYesterday): { |
||||
BuildIndex(b, tempDir, []LoadableSeries{ |
||||
{ |
||||
Labels: mustParseLabels(`{foo="bar"}`), |
||||
Chunks: buildChunkMetas(int64(indexStartYesterday), int64(indexStartYesterday+99)), |
||||
}, |
||||
}), |
||||
}, |
||||
} |
||||
|
||||
idx := newIndexShipperQuerier(mockIndexShipperIndexIterator{tables: tables}, config.TableRanges{ |
||||
{ |
||||
Start: 0, |
||||
End: math.MaxInt64, |
||||
PeriodConfig: &config.PeriodConfig{}, |
||||
}, |
||||
}) |
||||
|
||||
indexClient := NewIndexClient(idx, IndexClientOptions{UseBloomFilters: true}) |
||||
|
||||
b.ResetTimer() |
||||
b.ReportAllocs() |
||||
for i := 0; i < b.N; i++ { |
||||
stats, err := indexClient.Stats(context.Background(), "", indexStartYesterday-1000, model.Now()+1000, labels.MustNewMatcher(labels.MatchEqual, "foo", "bar")) |
||||
require.NoError(b, err) |
||||
require.Equal(b, uint64(200), stats.Chunks) |
||||
require.Equal(b, uint64(200), stats.Entries) |
||||
} |
||||
|
||||
} |
||||
|
||||
func TestIndexClient_Stats(t *testing.T) { |
||||
tempDir := t.TempDir() |
||||
tableRanges := config.TableRanges{ |
||||
{ |
||||
Start: 0, |
||||
End: math.MaxInt64, |
||||
PeriodConfig: &config.PeriodConfig{ |
||||
IndexTables: config.PeriodicTableConfig{ |
||||
Period: config.ObjectStorageIndexRequiredPeriod, |
||||
}, |
||||
}, |
||||
}, |
||||
} |
||||
|
||||
indexStartToday := model.TimeFromUnixNano(time.Now().Truncate(config.ObjectStorageIndexRequiredPeriod).UnixNano()) |
||||
indexStartYesterday := indexStartToday.Add(-config.ObjectStorageIndexRequiredPeriod) |
||||
|
||||
tables := map[string][]*TSDBFile{ |
||||
tableRanges[0].PeriodConfig.IndexTables.TableFor(indexStartToday): { |
||||
BuildIndex(t, tempDir, []LoadableSeries{ |
||||
{ |
||||
Labels: mustParseLabels(`{foo="bar"}`), |
||||
Chunks: buildChunkMetas(int64(indexStartToday), int64(indexStartToday+99)), |
||||
}, |
||||
{ |
||||
Labels: mustParseLabels(`{fizz="buzz"}`), |
||||
Chunks: buildChunkMetas(int64(indexStartToday), int64(indexStartToday+99)), |
||||
}, |
||||
}), |
||||
}, |
||||
|
||||
tableRanges[0].PeriodConfig.IndexTables.TableFor(indexStartYesterday): { |
||||
BuildIndex(t, tempDir, []LoadableSeries{ |
||||
{ |
||||
Labels: mustParseLabels(`{foo="bar"}`), |
||||
Chunks: buildChunkMetas(int64(indexStartYesterday), int64(indexStartYesterday+99)), |
||||
}, |
||||
{ |
||||
Labels: mustParseLabels(`{foo="bar", fizz="buzz"}`), |
||||
Chunks: buildChunkMetas(int64(indexStartYesterday), int64(indexStartYesterday+99)), |
||||
}, |
||||
{ |
||||
Labels: mustParseLabels(`{ping="pong"}`), |
||||
Chunks: buildChunkMetas(int64(indexStartYesterday), int64(indexStartYesterday+99)), |
||||
}, |
||||
}), |
||||
}, |
||||
} |
||||
|
||||
idx := newIndexShipperQuerier(mockIndexShipperIndexIterator{tables: tables}, config.TableRanges{ |
||||
{ |
||||
Start: 0, |
||||
End: math.MaxInt64, |
||||
PeriodConfig: &config.PeriodConfig{}, |
||||
}, |
||||
}) |
||||
|
||||
indexClient := NewIndexClient(idx, IndexClientOptions{UseBloomFilters: true}) |
||||
|
||||
for _, tc := range []struct { |
||||
name string |
||||
queryInterval model.Interval |
||||
expectedNumChunks uint64 |
||||
expectedNumEntries uint64 |
||||
expectedNumStreams uint64 |
||||
}{ |
||||
{ |
||||
name: "request spanning 2 tables", |
||||
queryInterval: model.Interval{ |
||||
Start: indexStartYesterday, |
||||
End: indexStartToday + 1000, |
||||
}, |
||||
expectedNumChunks: 298, // 2 chunks not included at indexStartYesterday since start time is not inclusive
|
||||
expectedNumEntries: 298, |
||||
expectedNumStreams: 2, |
||||
}, |
||||
{ |
||||
name: "request spanning just today", |
||||
queryInterval: model.Interval{ |
||||
Start: indexStartToday, |
||||
End: indexStartToday + 1000, |
||||
}, |
||||
expectedNumChunks: 99, // 1 chunk not included at indexStartToday since start time is not inclusive
|
||||
expectedNumEntries: 99, |
||||
expectedNumStreams: 1, |
||||
}, |
||||
{ |
||||
name: "request selecting just few of the chunks from today", |
||||
queryInterval: model.Interval{ |
||||
Start: indexStartToday + 50, |
||||
End: indexStartToday + 60, |
||||
}, |
||||
expectedNumChunks: 9, // start and end are not inclusive
|
||||
expectedNumEntries: 9, |
||||
expectedNumStreams: 1, |
||||
}, |
||||
{ |
||||
name: "request not touching any chunks", |
||||
queryInterval: model.Interval{ |
||||
Start: indexStartToday + 2000, |
||||
End: indexStartToday + 3000, |
||||
}, |
||||
}, |
||||
} { |
||||
t.Run(tc.name, func(t *testing.T) { |
||||
stats, err := indexClient.Stats(context.Background(), "", tc.queryInterval.Start, tc.queryInterval.End, labels.MustNewMatcher(labels.MatchEqual, "foo", "bar")) |
||||
require.NoError(t, err) |
||||
require.Equal(t, tc.expectedNumEntries, stats.Chunks) |
||||
require.Equal(t, tc.expectedNumEntries, stats.Entries) |
||||
require.Equal(t, tc.expectedNumStreams, stats.Streams) |
||||
}) |
||||
} |
||||
} |
Loading…
Reference in new issue