Querier/Ruler: add histogram to track fetched chunk size distribution (#8682)

**What this PR does / why we need it**:
We are looking at making some changes to the chunk caching strategy, and
we need this data to know what size chunks we're typically requesting.
For example, if the overwhelming majority of chunks are small we may
decide to cache only small chunks to decrease the number of requests to
the object store; we could fit more chunks in cache this way, which may
have a positive performance impact.
pull/8685/head
Danny Kopping 2 years ago committed by GitHub
parent 08ac5336d5
commit ab7a970b94
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
  1. 1
      CHANGELOG.md
  2. 16
      pkg/storage/chunk/fetcher/fetcher.go

@ -6,6 +6,7 @@
##### Enhancements
* [8682](https://github.com/grafana/loki/pull/8682) **dannykopping**: Add fetched chunk size distribution metric `loki_chunk_fetcher_fetched_size_bytes`.
* [8532](https://github.com/grafana/loki/pull/8532) **justcompile**: Adds Storage Class option to S3 objects
* [7951](https://github.com/grafana/loki/pull/7951) **MichelHollands**: Add a count template function to line_format and label_format.
* [7380](https://github.com/grafana/loki/pull/7380) **liguozhong**: metrics query: range vector support streaming agg when no overlap.

@ -38,6 +38,15 @@ var (
Name: "cache_corrupt_chunks_total",
Help: "Total count of corrupt chunks found in cache.",
})
chunkFetchedSize = promauto.NewHistogramVec(prometheus.HistogramOpts{
Namespace: "loki",
Subsystem: "chunk_fetcher",
Name: "fetched_size_bytes",
Help: "Compressed chunk size distribution fetched from storage.",
// TODO: expand these buckets if we ever make larger chunks
// TODO: consider adding `chunk_target_size` to this list in case users set very large chunk sizes
Buckets: []float64{128, 1024, 16 * 1024, 64 * 1024, 128 * 1024, 256 * 1024, 512 * 1024, 1024 * 1024, 1.5 * 1024 * 1024, 2 * 1024 * 1024, 4 * 1024 * 1024},
}, []string{"source"})
)
const chunkDecodeParallelism = 16
@ -173,6 +182,11 @@ func (c *Fetcher) FetchChunks(ctx context.Context, chunks []chunk.Chunk, keys []
if err != nil {
level.Warn(log).Log("msg", "error fetching from cache", "err", err)
}
for _, buf := range cacheBufs {
chunkFetchedSize.WithLabelValues("cache").Observe(float64(len(buf)))
}
fromCache, missing, err := c.processCacheResponse(ctx, chunks, cacheHits, cacheBufs)
if err != nil {
level.Warn(log).Log("msg", "error process response from cache", "err", err)
@ -188,6 +202,8 @@ func (c *Fetcher) FetchChunks(ctx context.Context, chunks []chunk.Chunk, keys []
var bytes int
for _, c := range fromStorage {
bytes += c.Size()
chunkFetchedSize.WithLabelValues("store").Observe(float64(c.Size()))
}
st := stats.FromContext(ctx)

Loading…
Cancel
Save