From f89a51fde872aece651ff493fcfeb3ce05173fb8 Mon Sep 17 00:00:00 2001 From: Bryan Boreham Date: Wed, 23 Aug 2023 09:14:37 +0100 Subject: [PATCH] Tracing: elide small traces for Stats call (#10308) **What this PR does / why we need it**: At work I see a lot of traces getting data dropped due to being tens of megabytes in size. Looking at what was captured, I see many many repeats of these four spans: image Events on the parent trace are cheaper and, for small operations like this, just as useful. Where there was an event line I added a `function` field matching the previous span name. For `storeEntry.Stats()` I didn't add an event line as it doesn't seem to do very much. But please correct me; I'm not hugely familiar with the Loki codebase. **Special notes for your reviewer**: **Checklist** - [x] Reviewed the [`CONTRIBUTING.md`](https://github.com/grafana/loki/blob/main/CONTRIBUTING.md) guide (**required**) - NA Documentation added - NA Tests updated - [x] `CHANGELOG.md` updated - NA If the change is worth mentioning in the release notes, add `add-to-release-notes` label - NA Changes that require user attention or interaction to upgrade are documented in `docs/sources/setup/upgrade/_index.md` - NA For Helm chart changes bump the Helm chart version in `production/helm/loki/Chart.yaml` and update `production/helm/loki/CHANGELOG.md` and `production/helm/loki/README.md`. [Example PR](https://github.com/grafana/loki/commit/d10549e3ece02120974929894ee333d07755d213) --- CHANGELOG.md | 1 + pkg/ingester/instance.go | 24 ++++++++--------- pkg/storage/stores/composite_store_entry.go | 3 --- pkg/storage/stores/tsdb/index_client.go | 29 ++++++++++----------- 4 files changed, 27 insertions(+), 30 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 7b4af32099..4e921688b6 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -57,6 +57,7 @@ * [10281](https://github.com/grafana/loki/pull/10281) **dannykopping**: Track effectiveness of hedged requests. * [10140](https://github.com/grafana/loki/pull/10140) **dannykopping**: Dynamic client-side throttling to avoid object storage rate-limits (GCS only) * [10301](https://github.com/grafana/loki/pull/10301) **wildum**: Promtail: users can now define `additional_fields` in cloudflare configuration. +* [10308](https://github.com/grafana/loki/pull/10308) **bboreham** Tracing: elide small traces for Stats call. ##### Fixes diff --git a/pkg/ingester/instance.go b/pkg/ingester/instance.go index 3af664666a..2d82f95fac 100644 --- a/pkg/ingester/instance.go +++ b/pkg/ingester/instance.go @@ -572,9 +572,6 @@ func (i *instance) Series(ctx context.Context, req *logproto.SeriesRequest) (*lo } func (i *instance) GetStats(ctx context.Context, req *logproto.IndexStatsRequest) (*logproto.IndexStatsResponse, error) { - sp, ctx := opentracing.StartSpanFromContext(ctx, "instance.GetStats") - defer sp.Finish() - matchers, err := syntax.ParseMatchers(req.Matchers, true) if err != nil { return nil, err @@ -614,15 +611,18 @@ func (i *instance) GetStats(ctx context.Context, req *logproto.IndexStatsRequest return nil, err } - sp.LogKV( - "from", from, - "through", through, - "matchers", syntax.MatchersString(matchers), - "streams", res.Streams, - "chunks", res.Chunks, - "bytes", res.Bytes, - "entries", res.Entries, - ) + if sp := opentracing.SpanFromContext(ctx); sp != nil { + sp.LogKV( + "function", "instance.GetStats", + "from", from, + "through", through, + "matchers", syntax.MatchersString(matchers), + "streams", res.Streams, + "chunks", res.Chunks, + "bytes", res.Bytes, + "entries", res.Entries, + ) + } return res, nil } diff --git a/pkg/storage/stores/composite_store_entry.go b/pkg/storage/stores/composite_store_entry.go index c90714310f..60a9eef38d 100644 --- a/pkg/storage/stores/composite_store_entry.go +++ b/pkg/storage/stores/composite_store_entry.go @@ -119,9 +119,6 @@ func (c *storeEntry) LabelValuesForMetricName(ctx context.Context, userID string } func (c *storeEntry) Stats(ctx context.Context, userID string, from, through model.Time, matchers ...*labels.Matcher) (*stats.Stats, error) { - sp, ctx := opentracing.StartSpanFromContext(ctx, "SeriesStore.Stats") - defer sp.Finish() - shortcut, err := c.validateQueryTimeRange(ctx, userID, &from, &through) if err != nil { return nil, err diff --git a/pkg/storage/stores/tsdb/index_client.go b/pkg/storage/stores/tsdb/index_client.go index 6e28b3e16e..7b882e46f7 100644 --- a/pkg/storage/stores/tsdb/index_client.go +++ b/pkg/storage/stores/tsdb/index_client.go @@ -182,9 +182,6 @@ func (c *IndexClient) LabelNamesForMetricName(ctx context.Context, userID string } func (c *IndexClient) Stats(ctx context.Context, userID string, from, through model.Time, matchers ...*labels.Matcher) (*stats.Stats, error) { - sp, ctx := opentracing.StartSpanFromContext(ctx, "IndexClient.Stats") - defer sp.Finish() - matchers, shard, err := cleanMatchers(matchers...) if err != nil { return nil, err @@ -231,18 +228,20 @@ func (c *IndexClient) Stats(ctx context.Context, userID string, from, through mo } res := acc.Stats() - sp.LogKV( - "from", from.Time(), - "through", through.Time(), - "matchers", syntax.MatchersString(matchers), - "shard", shard, - "intervals", len(intervals), - "streams", res.Streams, - "chunks", res.Chunks, - "bytes", res.Bytes, - "entries", res.Entries, - ) - + if sp := opentracing.SpanFromContext(ctx); sp != nil { + sp.LogKV( + "function", "IndexClient.Stats", + "from", from.Time(), + "through", through.Time(), + "matchers", syntax.MatchersString(matchers), + "shard", shard, + "intervals", len(intervals), + "streams", res.Streams, + "chunks", res.Chunks, + "bytes", res.Bytes, + "entries", res.Entries, + ) + } return &res, nil }