Like Prometheus, but for logs.
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
loki/pkg/querier/queryrange/views_test.go

365 lines
8.8 KiB

Lazily decode series protobuf. (#10071) **What this PR does / why we need it**: The protobuf decoding for series responses runs into a memory issue for many series. The response is only merged and passed through the front end. It is more efficient to decode the protobuf encoded series lazily. And `decode`, `merge`, `encode` benchmark shows the benefit of transcoding the protobuf series message into a JSON response directly. This change will not impact production code since it only applies to protobuf encoding messaging between the querier and query frontend that must be explicitly enabled. ``` › go test -v -run=^$ -bench "Benchmark_DecodeMergeEncodeCycle" -memprofile memory_base.prof -count=10 ./pkg/querier/queryrange > before.txt › go test -v -run=^$ -bench "Benchmark_DecodeMergeEncodeCycle" -memprofile memory_base.prof -count=10 ./pkg/querier/queryrange > before.txt › benchstat before.txt after.txt before.txt:5: missing iteration count after.txt:5: missing iteration count goos: linux goarch: amd64 pkg: github.com/grafana/loki/pkg/querier/queryrange cpu: AMD Ryzen 7 3700X 8-Core Processor │ before.txt │ after.txt │ │ sec/op │ sec/op vs base │ _DecodeMergeEncodeCycle-16 2537.7m ± 2% 934.2m ± 1% -63.19% (p=0.000 n=10) │ before.txt │ after.txt │ │ B/op │ B/op vs base │ _DecodeMergeEncodeCycle-16 1723.4Mi ± 0% 641.1Mi ± 0% -62.80% (p=0.000 n=10) │ before.txt │ after.txt │ │ allocs/op │ allocs/op vs base │ _DecodeMergeEncodeCycle-16 20240.6k ± 0% 203.0k ± 0% -99.00% (p=0.000 n=10) ``` **Checklist** - [ ] Reviewed the [`CONTRIBUTING.md`](https://github.com/grafana/loki/blob/main/CONTRIBUTING.md) guide (**required**) - [ ] Documentation added - [x] Tests updated - [ ] `CHANGELOG.md` updated - [ ] If the change is worth mentioning in the release notes, add `add-to-release-notes` label - [ ] Changes that require user attention or interaction to upgrade are documented in `docs/sources/setup/upgrade/_index.md` - [ ] For Helm chart changes bump the Helm chart version in `production/helm/loki/Chart.yaml` and update `production/helm/loki/CHANGELOG.md` and `production/helm/loki/README.md`. [Example PR](https://github.com/grafana/loki/commit/d10549e3ece02120974929894ee333d07755d213)
2 years ago
package queryrange
import (
"bytes"
"context"
"io"
"net/http"
"net/url"
"strings"
"testing"
"github.com/prometheus/prometheus/model/labels"
"github.com/stretchr/testify/require"
"github.com/grafana/loki/v3/pkg/logproto"
"github.com/grafana/loki/v3/pkg/logqlmodel/stats"
Lazily decode series protobuf. (#10071) **What this PR does / why we need it**: The protobuf decoding for series responses runs into a memory issue for many series. The response is only merged and passed through the front end. It is more efficient to decode the protobuf encoded series lazily. And `decode`, `merge`, `encode` benchmark shows the benefit of transcoding the protobuf series message into a JSON response directly. This change will not impact production code since it only applies to protobuf encoding messaging between the querier and query frontend that must be explicitly enabled. ``` › go test -v -run=^$ -bench "Benchmark_DecodeMergeEncodeCycle" -memprofile memory_base.prof -count=10 ./pkg/querier/queryrange > before.txt › go test -v -run=^$ -bench "Benchmark_DecodeMergeEncodeCycle" -memprofile memory_base.prof -count=10 ./pkg/querier/queryrange > before.txt › benchstat before.txt after.txt before.txt:5: missing iteration count after.txt:5: missing iteration count goos: linux goarch: amd64 pkg: github.com/grafana/loki/pkg/querier/queryrange cpu: AMD Ryzen 7 3700X 8-Core Processor │ before.txt │ after.txt │ │ sec/op │ sec/op vs base │ _DecodeMergeEncodeCycle-16 2537.7m ± 2% 934.2m ± 1% -63.19% (p=0.000 n=10) │ before.txt │ after.txt │ │ B/op │ B/op vs base │ _DecodeMergeEncodeCycle-16 1723.4Mi ± 0% 641.1Mi ± 0% -62.80% (p=0.000 n=10) │ before.txt │ after.txt │ │ allocs/op │ allocs/op vs base │ _DecodeMergeEncodeCycle-16 20240.6k ± 0% 203.0k ± 0% -99.00% (p=0.000 n=10) ``` **Checklist** - [ ] Reviewed the [`CONTRIBUTING.md`](https://github.com/grafana/loki/blob/main/CONTRIBUTING.md) guide (**required**) - [ ] Documentation added - [x] Tests updated - [ ] `CHANGELOG.md` updated - [ ] If the change is worth mentioning in the release notes, add `add-to-release-notes` label - [ ] Changes that require user attention or interaction to upgrade are documented in `docs/sources/setup/upgrade/_index.md` - [ ] For Helm chart changes bump the Helm chart version in `production/helm/loki/Chart.yaml` and update `production/helm/loki/CHANGELOG.md` and `production/helm/loki/README.md`. [Example PR](https://github.com/grafana/loki/commit/d10549e3ece02120974929894ee333d07755d213)
2 years ago
"github.com/grafana/loki/v3/pkg/querier/queryrange/queryrangebase"
"github.com/grafana/loki/v3/pkg/util/marshal"
Lazily decode series protobuf. (#10071) **What this PR does / why we need it**: The protobuf decoding for series responses runs into a memory issue for many series. The response is only merged and passed through the front end. It is more efficient to decode the protobuf encoded series lazily. And `decode`, `merge`, `encode` benchmark shows the benefit of transcoding the protobuf series message into a JSON response directly. This change will not impact production code since it only applies to protobuf encoding messaging between the querier and query frontend that must be explicitly enabled. ``` › go test -v -run=^$ -bench "Benchmark_DecodeMergeEncodeCycle" -memprofile memory_base.prof -count=10 ./pkg/querier/queryrange > before.txt › go test -v -run=^$ -bench "Benchmark_DecodeMergeEncodeCycle" -memprofile memory_base.prof -count=10 ./pkg/querier/queryrange > before.txt › benchstat before.txt after.txt before.txt:5: missing iteration count after.txt:5: missing iteration count goos: linux goarch: amd64 pkg: github.com/grafana/loki/pkg/querier/queryrange cpu: AMD Ryzen 7 3700X 8-Core Processor │ before.txt │ after.txt │ │ sec/op │ sec/op vs base │ _DecodeMergeEncodeCycle-16 2537.7m ± 2% 934.2m ± 1% -63.19% (p=0.000 n=10) │ before.txt │ after.txt │ │ B/op │ B/op vs base │ _DecodeMergeEncodeCycle-16 1723.4Mi ± 0% 641.1Mi ± 0% -62.80% (p=0.000 n=10) │ before.txt │ after.txt │ │ allocs/op │ allocs/op vs base │ _DecodeMergeEncodeCycle-16 20240.6k ± 0% 203.0k ± 0% -99.00% (p=0.000 n=10) ``` **Checklist** - [ ] Reviewed the [`CONTRIBUTING.md`](https://github.com/grafana/loki/blob/main/CONTRIBUTING.md) guide (**required**) - [ ] Documentation added - [x] Tests updated - [ ] `CHANGELOG.md` updated - [ ] If the change is worth mentioning in the release notes, add `add-to-release-notes` label - [ ] Changes that require user attention or interaction to upgrade are documented in `docs/sources/setup/upgrade/_index.md` - [ ] For Helm chart changes bump the Helm chart version in `production/helm/loki/Chart.yaml` and update `production/helm/loki/CHANGELOG.md` and `production/helm/loki/README.md`. [Example PR](https://github.com/grafana/loki/commit/d10549e3ece02120974929894ee333d07755d213)
2 years ago
)
func TestGetLokiSeriesResponse(t *testing.T) {
p := QueryResponse{
Response: &QueryResponse_Series{
Series: &LokiSeriesResponse{
Status: "success",
Data: []logproto.SeriesIdentifier{
{
Optimize series response format by using repeated fileds. (#11498) **What this PR does / why we need it**: The Protobuf map type is encodied as a repeated field of map entries. Decoding them to a slice is much faster than decoding them into a map. Since Loki is not using the fast key check for a map we can use the slice decoding. This change also allows us to decode the JSON directly into the right protobuf struct. This doulbes the JSON decoding speed and reduces the memory pressure by ~40%. ``` › go test -bench=. -run=^$ -count=10 ./pkg/querier/queryrange > before.log › go test -bench=. -run=^$ -count=10 ./pkg/querier/queryrange > after.log › benchstat before.log after.log goos: darwin goarch: arm64 pkg: github.com/grafana/loki/pkg/querier/queryrange │ before.log │ after.log │ │ sec/op │ sec/op vs base │ ResponseMerge/mergeStreams_unlimited-10 32.36m ± 0% 32.63m ± 2% ~ (p=0.393 n=10) ResponseMerge/mergeOrderedNonOverlappingStreams_unlimited-10 1.050m ± 1% 1.080m ± 3% +2.84% (p=0.005 n=10) ResponseMerge/mergeStreams_limited-10 33.02m ± 0% 32.60m ± 1% -1.29% (p=0.004 n=10) ResponseMerge/mergeOrderedNonOverlappingStreams_limited-10 15.11m ± 0% 15.07m ± 0% ~ (p=0.075 n=10) _CodecDecodeLogs-10 4.395m ± 1% 4.364m ± 0% -0.72% (p=0.005 n=10) _CodecDecodeSamples-10 16.97m ± 0% 16.84m ± 2% -0.77% (p=0.023 n=10) _CodecDecodeSeries/application/vnd.google.protobuf-10 745.8µ ± 8% 736.8µ ± 12% ~ (p=0.739 n=10) _CodecDecodeSeries/application/json;_charset=utf-8-10 15.37m ± 1% 10.60m ± 0% -31.03% (p=0.000 n=10) _MergeResponses-10 1186.9m ± 2% 149.8m ± 1% -87.38% (p=0.000 n=10) _UnwrapSeries-10 9.399m ± 1% 4.049m ± 0% -56.92% (p=0.000 n=10) _DecodeMergeEncodeCycle-10 666.0m ± 3% 194.6m ± 0% -70.79% (p=0.000 n=10) geomean 18.87m 12.51m -33.70% │ before.log │ after.log │ │ B/op │ B/op vs base │ _CodecDecodeLogs-10 3.649Mi ± 0% 3.649Mi ± 0% ~ (p=0.364 n=10) _CodecDecodeSamples-10 18.12Mi ± 0% 18.12Mi ± 0% ~ (p=0.926 n=10) _CodecDecodeSeries/application/vnd.google.protobuf-10 7.647Mi ± 0% 7.647Mi ± 0% ~ (p=0.587 n=10) _CodecDecodeSeries/application/json;_charset=utf-8-10 27.94Mi ± 0% 16.99Mi ± 0% -39.18% (p=0.000 n=10) _MergeResponses-10 2.362Mi ± 0% 2.408Mi ± 0% +1.98% (p=0.000 n=10) _UnwrapSeries-10 19.495Mi ± 0% 8.595Mi ± 0% -55.91% (p=0.000 n=10) _DecodeMergeEncodeCycle-10 772.3Mi ± 0% 772.3Mi ± 0% ~ (p=0.912 n=10) geomean 17.50Mi 14.54Mi -16.91% │ before.log │ after.log │ │ allocs/op │ allocs/op vs base │ _CodecDecodeLogs-10 41.10k ± 0% 41.10k ± 0% ~ (p=1.000 n=10) ¹ _CodecDecodeSamples-10 411.9k ± 0% 411.9k ± 0% ~ (p=1.000 n=10) _CodecDecodeSeries/application/vnd.google.protobuf-10 32.00 ± 0% 32.00 ± 0% ~ (p=1.000 n=10) ¹ _CodecDecodeSeries/application/json;_charset=utf-8-10 304.2k ± 0% 298.1k ± 0% -2.01% (p=0.000 n=10) _MergeResponses-10 100.1k ± 0% 100.1k ± 0% -0.00% (p=0.002 n=10) _UnwrapSeries-10 201.1k ± 0% 198.0k ± 0% -1.54% (p=0.000 n=10) _DecodeMergeEncodeCycle-10 203.1k ± 0% 203.1k ± 0% ~ (p=0.621 n=10) geomean 48.95k 48.70k -0.51% ¹ all samples are equal ``` **Checklist** - [x] Reviewed the [`CONTRIBUTING.md`](https://github.com/grafana/loki/blob/main/CONTRIBUTING.md) guide (**required**) - [ ] Documentation added - [x] Tests updated - [ ] `CHANGELOG.md` updated - [ ] If the change is worth mentioning in the release notes, add `add-to-release-notes` label - [ ] Changes that require user attention or interaction to upgrade are documented in `docs/sources/setup/upgrade/_index.md` - [ ] For Helm chart changes bump the Helm chart version in `production/helm/loki/Chart.yaml` and update `production/helm/loki/CHANGELOG.md` and `production/helm/loki/README.md`. [Example PR](https://github.com/grafana/loki/commit/d10549e3ece02120974929894ee333d07755d213) - [ ] If the change is deprecating or removing a configuration option, update the `deprecated-config.yaml` and `deleted-config.yaml` files respectively in the `tools/deprecated-config-checker` directory. [Example PR](https://github.com/grafana/loki/pull/10840/commits/0d4416a4b03739583349934b96f272fb4f685d15)
2 years ago
Labels: []logproto.SeriesIdentifier_LabelsEntry{
{Key: "foo", Value: "bar"},
{Key: "baz", Value: "woof"},
Lazily decode series protobuf. (#10071) **What this PR does / why we need it**: The protobuf decoding for series responses runs into a memory issue for many series. The response is only merged and passed through the front end. It is more efficient to decode the protobuf encoded series lazily. And `decode`, `merge`, `encode` benchmark shows the benefit of transcoding the protobuf series message into a JSON response directly. This change will not impact production code since it only applies to protobuf encoding messaging between the querier and query frontend that must be explicitly enabled. ``` › go test -v -run=^$ -bench "Benchmark_DecodeMergeEncodeCycle" -memprofile memory_base.prof -count=10 ./pkg/querier/queryrange > before.txt › go test -v -run=^$ -bench "Benchmark_DecodeMergeEncodeCycle" -memprofile memory_base.prof -count=10 ./pkg/querier/queryrange > before.txt › benchstat before.txt after.txt before.txt:5: missing iteration count after.txt:5: missing iteration count goos: linux goarch: amd64 pkg: github.com/grafana/loki/pkg/querier/queryrange cpu: AMD Ryzen 7 3700X 8-Core Processor │ before.txt │ after.txt │ │ sec/op │ sec/op vs base │ _DecodeMergeEncodeCycle-16 2537.7m ± 2% 934.2m ± 1% -63.19% (p=0.000 n=10) │ before.txt │ after.txt │ │ B/op │ B/op vs base │ _DecodeMergeEncodeCycle-16 1723.4Mi ± 0% 641.1Mi ± 0% -62.80% (p=0.000 n=10) │ before.txt │ after.txt │ │ allocs/op │ allocs/op vs base │ _DecodeMergeEncodeCycle-16 20240.6k ± 0% 203.0k ± 0% -99.00% (p=0.000 n=10) ``` **Checklist** - [ ] Reviewed the [`CONTRIBUTING.md`](https://github.com/grafana/loki/blob/main/CONTRIBUTING.md) guide (**required**) - [ ] Documentation added - [x] Tests updated - [ ] `CHANGELOG.md` updated - [ ] If the change is worth mentioning in the release notes, add `add-to-release-notes` label - [ ] Changes that require user attention or interaction to upgrade are documented in `docs/sources/setup/upgrade/_index.md` - [ ] For Helm chart changes bump the Helm chart version in `production/helm/loki/Chart.yaml` and update `production/helm/loki/CHANGELOG.md` and `production/helm/loki/README.md`. [Example PR](https://github.com/grafana/loki/commit/d10549e3ece02120974929894ee333d07755d213)
2 years ago
},
},
},
}},
}
buf, err := p.Marshal()
require.NoError(t, err)
view, err := GetLokiSeriesResponseView(buf)
require.NoError(t, err)
actual := make([]string, 0)
err = view.ForEachSeries(func(identifier *SeriesIdentifierView) error {
return identifier.ForEachLabel(func(name, value string) error {
actual = append(actual, name+value)
return nil
})
})
require.NoError(t, err)
require.ElementsMatch(t, actual, []string{"foobar", "bazwoof"})
}
func TestSeriesIdentifierViewHash(t *testing.T) {
identifier := &logproto.SeriesIdentifier{
Optimize series response format by using repeated fileds. (#11498) **What this PR does / why we need it**: The Protobuf map type is encodied as a repeated field of map entries. Decoding them to a slice is much faster than decoding them into a map. Since Loki is not using the fast key check for a map we can use the slice decoding. This change also allows us to decode the JSON directly into the right protobuf struct. This doulbes the JSON decoding speed and reduces the memory pressure by ~40%. ``` › go test -bench=. -run=^$ -count=10 ./pkg/querier/queryrange > before.log › go test -bench=. -run=^$ -count=10 ./pkg/querier/queryrange > after.log › benchstat before.log after.log goos: darwin goarch: arm64 pkg: github.com/grafana/loki/pkg/querier/queryrange │ before.log │ after.log │ │ sec/op │ sec/op vs base │ ResponseMerge/mergeStreams_unlimited-10 32.36m ± 0% 32.63m ± 2% ~ (p=0.393 n=10) ResponseMerge/mergeOrderedNonOverlappingStreams_unlimited-10 1.050m ± 1% 1.080m ± 3% +2.84% (p=0.005 n=10) ResponseMerge/mergeStreams_limited-10 33.02m ± 0% 32.60m ± 1% -1.29% (p=0.004 n=10) ResponseMerge/mergeOrderedNonOverlappingStreams_limited-10 15.11m ± 0% 15.07m ± 0% ~ (p=0.075 n=10) _CodecDecodeLogs-10 4.395m ± 1% 4.364m ± 0% -0.72% (p=0.005 n=10) _CodecDecodeSamples-10 16.97m ± 0% 16.84m ± 2% -0.77% (p=0.023 n=10) _CodecDecodeSeries/application/vnd.google.protobuf-10 745.8µ ± 8% 736.8µ ± 12% ~ (p=0.739 n=10) _CodecDecodeSeries/application/json;_charset=utf-8-10 15.37m ± 1% 10.60m ± 0% -31.03% (p=0.000 n=10) _MergeResponses-10 1186.9m ± 2% 149.8m ± 1% -87.38% (p=0.000 n=10) _UnwrapSeries-10 9.399m ± 1% 4.049m ± 0% -56.92% (p=0.000 n=10) _DecodeMergeEncodeCycle-10 666.0m ± 3% 194.6m ± 0% -70.79% (p=0.000 n=10) geomean 18.87m 12.51m -33.70% │ before.log │ after.log │ │ B/op │ B/op vs base │ _CodecDecodeLogs-10 3.649Mi ± 0% 3.649Mi ± 0% ~ (p=0.364 n=10) _CodecDecodeSamples-10 18.12Mi ± 0% 18.12Mi ± 0% ~ (p=0.926 n=10) _CodecDecodeSeries/application/vnd.google.protobuf-10 7.647Mi ± 0% 7.647Mi ± 0% ~ (p=0.587 n=10) _CodecDecodeSeries/application/json;_charset=utf-8-10 27.94Mi ± 0% 16.99Mi ± 0% -39.18% (p=0.000 n=10) _MergeResponses-10 2.362Mi ± 0% 2.408Mi ± 0% +1.98% (p=0.000 n=10) _UnwrapSeries-10 19.495Mi ± 0% 8.595Mi ± 0% -55.91% (p=0.000 n=10) _DecodeMergeEncodeCycle-10 772.3Mi ± 0% 772.3Mi ± 0% ~ (p=0.912 n=10) geomean 17.50Mi 14.54Mi -16.91% │ before.log │ after.log │ │ allocs/op │ allocs/op vs base │ _CodecDecodeLogs-10 41.10k ± 0% 41.10k ± 0% ~ (p=1.000 n=10) ¹ _CodecDecodeSamples-10 411.9k ± 0% 411.9k ± 0% ~ (p=1.000 n=10) _CodecDecodeSeries/application/vnd.google.protobuf-10 32.00 ± 0% 32.00 ± 0% ~ (p=1.000 n=10) ¹ _CodecDecodeSeries/application/json;_charset=utf-8-10 304.2k ± 0% 298.1k ± 0% -2.01% (p=0.000 n=10) _MergeResponses-10 100.1k ± 0% 100.1k ± 0% -0.00% (p=0.002 n=10) _UnwrapSeries-10 201.1k ± 0% 198.0k ± 0% -1.54% (p=0.000 n=10) _DecodeMergeEncodeCycle-10 203.1k ± 0% 203.1k ± 0% ~ (p=0.621 n=10) geomean 48.95k 48.70k -0.51% ¹ all samples are equal ``` **Checklist** - [x] Reviewed the [`CONTRIBUTING.md`](https://github.com/grafana/loki/blob/main/CONTRIBUTING.md) guide (**required**) - [ ] Documentation added - [x] Tests updated - [ ] `CHANGELOG.md` updated - [ ] If the change is worth mentioning in the release notes, add `add-to-release-notes` label - [ ] Changes that require user attention or interaction to upgrade are documented in `docs/sources/setup/upgrade/_index.md` - [ ] For Helm chart changes bump the Helm chart version in `production/helm/loki/Chart.yaml` and update `production/helm/loki/CHANGELOG.md` and `production/helm/loki/README.md`. [Example PR](https://github.com/grafana/loki/commit/d10549e3ece02120974929894ee333d07755d213) - [ ] If the change is deprecating or removing a configuration option, update the `deprecated-config.yaml` and `deleted-config.yaml` files respectively in the `tools/deprecated-config-checker` directory. [Example PR](https://github.com/grafana/loki/pull/10840/commits/0d4416a4b03739583349934b96f272fb4f685d15)
2 years ago
Labels: []logproto.SeriesIdentifier_LabelsEntry{
{Key: "foo", Value: "bar"},
{Key: "baz", Value: "woof"},
Lazily decode series protobuf. (#10071) **What this PR does / why we need it**: The protobuf decoding for series responses runs into a memory issue for many series. The response is only merged and passed through the front end. It is more efficient to decode the protobuf encoded series lazily. And `decode`, `merge`, `encode` benchmark shows the benefit of transcoding the protobuf series message into a JSON response directly. This change will not impact production code since it only applies to protobuf encoding messaging between the querier and query frontend that must be explicitly enabled. ``` › go test -v -run=^$ -bench "Benchmark_DecodeMergeEncodeCycle" -memprofile memory_base.prof -count=10 ./pkg/querier/queryrange > before.txt › go test -v -run=^$ -bench "Benchmark_DecodeMergeEncodeCycle" -memprofile memory_base.prof -count=10 ./pkg/querier/queryrange > before.txt › benchstat before.txt after.txt before.txt:5: missing iteration count after.txt:5: missing iteration count goos: linux goarch: amd64 pkg: github.com/grafana/loki/pkg/querier/queryrange cpu: AMD Ryzen 7 3700X 8-Core Processor │ before.txt │ after.txt │ │ sec/op │ sec/op vs base │ _DecodeMergeEncodeCycle-16 2537.7m ± 2% 934.2m ± 1% -63.19% (p=0.000 n=10) │ before.txt │ after.txt │ │ B/op │ B/op vs base │ _DecodeMergeEncodeCycle-16 1723.4Mi ± 0% 641.1Mi ± 0% -62.80% (p=0.000 n=10) │ before.txt │ after.txt │ │ allocs/op │ allocs/op vs base │ _DecodeMergeEncodeCycle-16 20240.6k ± 0% 203.0k ± 0% -99.00% (p=0.000 n=10) ``` **Checklist** - [ ] Reviewed the [`CONTRIBUTING.md`](https://github.com/grafana/loki/blob/main/CONTRIBUTING.md) guide (**required**) - [ ] Documentation added - [x] Tests updated - [ ] `CHANGELOG.md` updated - [ ] If the change is worth mentioning in the release notes, add `add-to-release-notes` label - [ ] Changes that require user attention or interaction to upgrade are documented in `docs/sources/setup/upgrade/_index.md` - [ ] For Helm chart changes bump the Helm chart version in `production/helm/loki/Chart.yaml` and update `production/helm/loki/CHANGELOG.md` and `production/helm/loki/README.md`. [Example PR](https://github.com/grafana/loki/commit/d10549e3ece02120974929894ee333d07755d213)
2 years ago
},
}
data, err := identifier.Marshal()
require.NoError(t, err)
view := &SeriesIdentifierView{buffer: data}
b := make([]byte, 0, 1024)
keyLabelPairs := make([]string, 0)
var actual uint64
actual, keyLabelPairs, err = view.Hash(b, keyLabelPairs)
require.NoError(t, err)
require.ElementsMatch(t, keyLabelPairs, []string{"baz\xffwoof\xff", "foo\xffbar\xff"})
Optimize series response format by using repeated fileds. (#11498) **What this PR does / why we need it**: The Protobuf map type is encodied as a repeated field of map entries. Decoding them to a slice is much faster than decoding them into a map. Since Loki is not using the fast key check for a map we can use the slice decoding. This change also allows us to decode the JSON directly into the right protobuf struct. This doulbes the JSON decoding speed and reduces the memory pressure by ~40%. ``` › go test -bench=. -run=^$ -count=10 ./pkg/querier/queryrange > before.log › go test -bench=. -run=^$ -count=10 ./pkg/querier/queryrange > after.log › benchstat before.log after.log goos: darwin goarch: arm64 pkg: github.com/grafana/loki/pkg/querier/queryrange │ before.log │ after.log │ │ sec/op │ sec/op vs base │ ResponseMerge/mergeStreams_unlimited-10 32.36m ± 0% 32.63m ± 2% ~ (p=0.393 n=10) ResponseMerge/mergeOrderedNonOverlappingStreams_unlimited-10 1.050m ± 1% 1.080m ± 3% +2.84% (p=0.005 n=10) ResponseMerge/mergeStreams_limited-10 33.02m ± 0% 32.60m ± 1% -1.29% (p=0.004 n=10) ResponseMerge/mergeOrderedNonOverlappingStreams_limited-10 15.11m ± 0% 15.07m ± 0% ~ (p=0.075 n=10) _CodecDecodeLogs-10 4.395m ± 1% 4.364m ± 0% -0.72% (p=0.005 n=10) _CodecDecodeSamples-10 16.97m ± 0% 16.84m ± 2% -0.77% (p=0.023 n=10) _CodecDecodeSeries/application/vnd.google.protobuf-10 745.8µ ± 8% 736.8µ ± 12% ~ (p=0.739 n=10) _CodecDecodeSeries/application/json;_charset=utf-8-10 15.37m ± 1% 10.60m ± 0% -31.03% (p=0.000 n=10) _MergeResponses-10 1186.9m ± 2% 149.8m ± 1% -87.38% (p=0.000 n=10) _UnwrapSeries-10 9.399m ± 1% 4.049m ± 0% -56.92% (p=0.000 n=10) _DecodeMergeEncodeCycle-10 666.0m ± 3% 194.6m ± 0% -70.79% (p=0.000 n=10) geomean 18.87m 12.51m -33.70% │ before.log │ after.log │ │ B/op │ B/op vs base │ _CodecDecodeLogs-10 3.649Mi ± 0% 3.649Mi ± 0% ~ (p=0.364 n=10) _CodecDecodeSamples-10 18.12Mi ± 0% 18.12Mi ± 0% ~ (p=0.926 n=10) _CodecDecodeSeries/application/vnd.google.protobuf-10 7.647Mi ± 0% 7.647Mi ± 0% ~ (p=0.587 n=10) _CodecDecodeSeries/application/json;_charset=utf-8-10 27.94Mi ± 0% 16.99Mi ± 0% -39.18% (p=0.000 n=10) _MergeResponses-10 2.362Mi ± 0% 2.408Mi ± 0% +1.98% (p=0.000 n=10) _UnwrapSeries-10 19.495Mi ± 0% 8.595Mi ± 0% -55.91% (p=0.000 n=10) _DecodeMergeEncodeCycle-10 772.3Mi ± 0% 772.3Mi ± 0% ~ (p=0.912 n=10) geomean 17.50Mi 14.54Mi -16.91% │ before.log │ after.log │ │ allocs/op │ allocs/op vs base │ _CodecDecodeLogs-10 41.10k ± 0% 41.10k ± 0% ~ (p=1.000 n=10) ¹ _CodecDecodeSamples-10 411.9k ± 0% 411.9k ± 0% ~ (p=1.000 n=10) _CodecDecodeSeries/application/vnd.google.protobuf-10 32.00 ± 0% 32.00 ± 0% ~ (p=1.000 n=10) ¹ _CodecDecodeSeries/application/json;_charset=utf-8-10 304.2k ± 0% 298.1k ± 0% -2.01% (p=0.000 n=10) _MergeResponses-10 100.1k ± 0% 100.1k ± 0% -0.00% (p=0.002 n=10) _UnwrapSeries-10 201.1k ± 0% 198.0k ± 0% -1.54% (p=0.000 n=10) _DecodeMergeEncodeCycle-10 203.1k ± 0% 203.1k ± 0% ~ (p=0.621 n=10) geomean 48.95k 48.70k -0.51% ¹ all samples are equal ``` **Checklist** - [x] Reviewed the [`CONTRIBUTING.md`](https://github.com/grafana/loki/blob/main/CONTRIBUTING.md) guide (**required**) - [ ] Documentation added - [x] Tests updated - [ ] `CHANGELOG.md` updated - [ ] If the change is worth mentioning in the release notes, add `add-to-release-notes` label - [ ] Changes that require user attention or interaction to upgrade are documented in `docs/sources/setup/upgrade/_index.md` - [ ] For Helm chart changes bump the Helm chart version in `production/helm/loki/Chart.yaml` and update `production/helm/loki/CHANGELOG.md` and `production/helm/loki/README.md`. [Example PR](https://github.com/grafana/loki/commit/d10549e3ece02120974929894ee333d07755d213) - [ ] If the change is deprecating or removing a configuration option, update the `deprecated-config.yaml` and `deleted-config.yaml` files respectively in the `tools/deprecated-config-checker` directory. [Example PR](https://github.com/grafana/loki/pull/10840/commits/0d4416a4b03739583349934b96f272fb4f685d15)
2 years ago
expected := identifier.Hash(b)
Lazily decode series protobuf. (#10071) **What this PR does / why we need it**: The protobuf decoding for series responses runs into a memory issue for many series. The response is only merged and passed through the front end. It is more efficient to decode the protobuf encoded series lazily. And `decode`, `merge`, `encode` benchmark shows the benefit of transcoding the protobuf series message into a JSON response directly. This change will not impact production code since it only applies to protobuf encoding messaging between the querier and query frontend that must be explicitly enabled. ``` › go test -v -run=^$ -bench "Benchmark_DecodeMergeEncodeCycle" -memprofile memory_base.prof -count=10 ./pkg/querier/queryrange > before.txt › go test -v -run=^$ -bench "Benchmark_DecodeMergeEncodeCycle" -memprofile memory_base.prof -count=10 ./pkg/querier/queryrange > before.txt › benchstat before.txt after.txt before.txt:5: missing iteration count after.txt:5: missing iteration count goos: linux goarch: amd64 pkg: github.com/grafana/loki/pkg/querier/queryrange cpu: AMD Ryzen 7 3700X 8-Core Processor │ before.txt │ after.txt │ │ sec/op │ sec/op vs base │ _DecodeMergeEncodeCycle-16 2537.7m ± 2% 934.2m ± 1% -63.19% (p=0.000 n=10) │ before.txt │ after.txt │ │ B/op │ B/op vs base │ _DecodeMergeEncodeCycle-16 1723.4Mi ± 0% 641.1Mi ± 0% -62.80% (p=0.000 n=10) │ before.txt │ after.txt │ │ allocs/op │ allocs/op vs base │ _DecodeMergeEncodeCycle-16 20240.6k ± 0% 203.0k ± 0% -99.00% (p=0.000 n=10) ``` **Checklist** - [ ] Reviewed the [`CONTRIBUTING.md`](https://github.com/grafana/loki/blob/main/CONTRIBUTING.md) guide (**required**) - [ ] Documentation added - [x] Tests updated - [ ] `CHANGELOG.md` updated - [ ] If the change is worth mentioning in the release notes, add `add-to-release-notes` label - [ ] Changes that require user attention or interaction to upgrade are documented in `docs/sources/setup/upgrade/_index.md` - [ ] For Helm chart changes bump the Helm chart version in `production/helm/loki/Chart.yaml` and update `production/helm/loki/CHANGELOG.md` and `production/helm/loki/README.md`. [Example PR](https://github.com/grafana/loki/commit/d10549e3ece02120974929894ee333d07755d213)
2 years ago
require.Equal(t, expected, actual)
}
func TestSeriesIdentifierViewForEachLabel(t *testing.T) {
identifier := &logproto.SeriesIdentifier{
Optimize series response format by using repeated fileds. (#11498) **What this PR does / why we need it**: The Protobuf map type is encodied as a repeated field of map entries. Decoding them to a slice is much faster than decoding them into a map. Since Loki is not using the fast key check for a map we can use the slice decoding. This change also allows us to decode the JSON directly into the right protobuf struct. This doulbes the JSON decoding speed and reduces the memory pressure by ~40%. ``` › go test -bench=. -run=^$ -count=10 ./pkg/querier/queryrange > before.log › go test -bench=. -run=^$ -count=10 ./pkg/querier/queryrange > after.log › benchstat before.log after.log goos: darwin goarch: arm64 pkg: github.com/grafana/loki/pkg/querier/queryrange │ before.log │ after.log │ │ sec/op │ sec/op vs base │ ResponseMerge/mergeStreams_unlimited-10 32.36m ± 0% 32.63m ± 2% ~ (p=0.393 n=10) ResponseMerge/mergeOrderedNonOverlappingStreams_unlimited-10 1.050m ± 1% 1.080m ± 3% +2.84% (p=0.005 n=10) ResponseMerge/mergeStreams_limited-10 33.02m ± 0% 32.60m ± 1% -1.29% (p=0.004 n=10) ResponseMerge/mergeOrderedNonOverlappingStreams_limited-10 15.11m ± 0% 15.07m ± 0% ~ (p=0.075 n=10) _CodecDecodeLogs-10 4.395m ± 1% 4.364m ± 0% -0.72% (p=0.005 n=10) _CodecDecodeSamples-10 16.97m ± 0% 16.84m ± 2% -0.77% (p=0.023 n=10) _CodecDecodeSeries/application/vnd.google.protobuf-10 745.8µ ± 8% 736.8µ ± 12% ~ (p=0.739 n=10) _CodecDecodeSeries/application/json;_charset=utf-8-10 15.37m ± 1% 10.60m ± 0% -31.03% (p=0.000 n=10) _MergeResponses-10 1186.9m ± 2% 149.8m ± 1% -87.38% (p=0.000 n=10) _UnwrapSeries-10 9.399m ± 1% 4.049m ± 0% -56.92% (p=0.000 n=10) _DecodeMergeEncodeCycle-10 666.0m ± 3% 194.6m ± 0% -70.79% (p=0.000 n=10) geomean 18.87m 12.51m -33.70% │ before.log │ after.log │ │ B/op │ B/op vs base │ _CodecDecodeLogs-10 3.649Mi ± 0% 3.649Mi ± 0% ~ (p=0.364 n=10) _CodecDecodeSamples-10 18.12Mi ± 0% 18.12Mi ± 0% ~ (p=0.926 n=10) _CodecDecodeSeries/application/vnd.google.protobuf-10 7.647Mi ± 0% 7.647Mi ± 0% ~ (p=0.587 n=10) _CodecDecodeSeries/application/json;_charset=utf-8-10 27.94Mi ± 0% 16.99Mi ± 0% -39.18% (p=0.000 n=10) _MergeResponses-10 2.362Mi ± 0% 2.408Mi ± 0% +1.98% (p=0.000 n=10) _UnwrapSeries-10 19.495Mi ± 0% 8.595Mi ± 0% -55.91% (p=0.000 n=10) _DecodeMergeEncodeCycle-10 772.3Mi ± 0% 772.3Mi ± 0% ~ (p=0.912 n=10) geomean 17.50Mi 14.54Mi -16.91% │ before.log │ after.log │ │ allocs/op │ allocs/op vs base │ _CodecDecodeLogs-10 41.10k ± 0% 41.10k ± 0% ~ (p=1.000 n=10) ¹ _CodecDecodeSamples-10 411.9k ± 0% 411.9k ± 0% ~ (p=1.000 n=10) _CodecDecodeSeries/application/vnd.google.protobuf-10 32.00 ± 0% 32.00 ± 0% ~ (p=1.000 n=10) ¹ _CodecDecodeSeries/application/json;_charset=utf-8-10 304.2k ± 0% 298.1k ± 0% -2.01% (p=0.000 n=10) _MergeResponses-10 100.1k ± 0% 100.1k ± 0% -0.00% (p=0.002 n=10) _UnwrapSeries-10 201.1k ± 0% 198.0k ± 0% -1.54% (p=0.000 n=10) _DecodeMergeEncodeCycle-10 203.1k ± 0% 203.1k ± 0% ~ (p=0.621 n=10) geomean 48.95k 48.70k -0.51% ¹ all samples are equal ``` **Checklist** - [x] Reviewed the [`CONTRIBUTING.md`](https://github.com/grafana/loki/blob/main/CONTRIBUTING.md) guide (**required**) - [ ] Documentation added - [x] Tests updated - [ ] `CHANGELOG.md` updated - [ ] If the change is worth mentioning in the release notes, add `add-to-release-notes` label - [ ] Changes that require user attention or interaction to upgrade are documented in `docs/sources/setup/upgrade/_index.md` - [ ] For Helm chart changes bump the Helm chart version in `production/helm/loki/Chart.yaml` and update `production/helm/loki/CHANGELOG.md` and `production/helm/loki/README.md`. [Example PR](https://github.com/grafana/loki/commit/d10549e3ece02120974929894ee333d07755d213) - [ ] If the change is deprecating or removing a configuration option, update the `deprecated-config.yaml` and `deleted-config.yaml` files respectively in the `tools/deprecated-config-checker` directory. [Example PR](https://github.com/grafana/loki/pull/10840/commits/0d4416a4b03739583349934b96f272fb4f685d15)
2 years ago
Labels: []logproto.SeriesIdentifier_LabelsEntry{
{Key: "foo", Value: "bar"},
{Key: "baz", Value: "woof"},
Lazily decode series protobuf. (#10071) **What this PR does / why we need it**: The protobuf decoding for series responses runs into a memory issue for many series. The response is only merged and passed through the front end. It is more efficient to decode the protobuf encoded series lazily. And `decode`, `merge`, `encode` benchmark shows the benefit of transcoding the protobuf series message into a JSON response directly. This change will not impact production code since it only applies to protobuf encoding messaging between the querier and query frontend that must be explicitly enabled. ``` › go test -v -run=^$ -bench "Benchmark_DecodeMergeEncodeCycle" -memprofile memory_base.prof -count=10 ./pkg/querier/queryrange > before.txt › go test -v -run=^$ -bench "Benchmark_DecodeMergeEncodeCycle" -memprofile memory_base.prof -count=10 ./pkg/querier/queryrange > before.txt › benchstat before.txt after.txt before.txt:5: missing iteration count after.txt:5: missing iteration count goos: linux goarch: amd64 pkg: github.com/grafana/loki/pkg/querier/queryrange cpu: AMD Ryzen 7 3700X 8-Core Processor │ before.txt │ after.txt │ │ sec/op │ sec/op vs base │ _DecodeMergeEncodeCycle-16 2537.7m ± 2% 934.2m ± 1% -63.19% (p=0.000 n=10) │ before.txt │ after.txt │ │ B/op │ B/op vs base │ _DecodeMergeEncodeCycle-16 1723.4Mi ± 0% 641.1Mi ± 0% -62.80% (p=0.000 n=10) │ before.txt │ after.txt │ │ allocs/op │ allocs/op vs base │ _DecodeMergeEncodeCycle-16 20240.6k ± 0% 203.0k ± 0% -99.00% (p=0.000 n=10) ``` **Checklist** - [ ] Reviewed the [`CONTRIBUTING.md`](https://github.com/grafana/loki/blob/main/CONTRIBUTING.md) guide (**required**) - [ ] Documentation added - [x] Tests updated - [ ] `CHANGELOG.md` updated - [ ] If the change is worth mentioning in the release notes, add `add-to-release-notes` label - [ ] Changes that require user attention or interaction to upgrade are documented in `docs/sources/setup/upgrade/_index.md` - [ ] For Helm chart changes bump the Helm chart version in `production/helm/loki/Chart.yaml` and update `production/helm/loki/CHANGELOG.md` and `production/helm/loki/README.md`. [Example PR](https://github.com/grafana/loki/commit/d10549e3ece02120974929894ee333d07755d213)
2 years ago
},
}
data, err := identifier.Marshal()
require.NoError(t, err)
view := &SeriesIdentifierView{buffer: data}
s := make([]string, 0)
err = view.ForEachLabel(func(name, value string) error {
s = append(s, name, value)
return nil
})
require.NoError(t, err)
require.ElementsMatch(t, s, []string{"baz", "woof", "foo", "bar"})
}
func TestSeriesResponseViewForEach(t *testing.T) {
response := &LokiSeriesResponse{
Data: []logproto.SeriesIdentifier{
{
Optimize series response format by using repeated fileds. (#11498) **What this PR does / why we need it**: The Protobuf map type is encodied as a repeated field of map entries. Decoding them to a slice is much faster than decoding them into a map. Since Loki is not using the fast key check for a map we can use the slice decoding. This change also allows us to decode the JSON directly into the right protobuf struct. This doulbes the JSON decoding speed and reduces the memory pressure by ~40%. ``` › go test -bench=. -run=^$ -count=10 ./pkg/querier/queryrange > before.log › go test -bench=. -run=^$ -count=10 ./pkg/querier/queryrange > after.log › benchstat before.log after.log goos: darwin goarch: arm64 pkg: github.com/grafana/loki/pkg/querier/queryrange │ before.log │ after.log │ │ sec/op │ sec/op vs base │ ResponseMerge/mergeStreams_unlimited-10 32.36m ± 0% 32.63m ± 2% ~ (p=0.393 n=10) ResponseMerge/mergeOrderedNonOverlappingStreams_unlimited-10 1.050m ± 1% 1.080m ± 3% +2.84% (p=0.005 n=10) ResponseMerge/mergeStreams_limited-10 33.02m ± 0% 32.60m ± 1% -1.29% (p=0.004 n=10) ResponseMerge/mergeOrderedNonOverlappingStreams_limited-10 15.11m ± 0% 15.07m ± 0% ~ (p=0.075 n=10) _CodecDecodeLogs-10 4.395m ± 1% 4.364m ± 0% -0.72% (p=0.005 n=10) _CodecDecodeSamples-10 16.97m ± 0% 16.84m ± 2% -0.77% (p=0.023 n=10) _CodecDecodeSeries/application/vnd.google.protobuf-10 745.8µ ± 8% 736.8µ ± 12% ~ (p=0.739 n=10) _CodecDecodeSeries/application/json;_charset=utf-8-10 15.37m ± 1% 10.60m ± 0% -31.03% (p=0.000 n=10) _MergeResponses-10 1186.9m ± 2% 149.8m ± 1% -87.38% (p=0.000 n=10) _UnwrapSeries-10 9.399m ± 1% 4.049m ± 0% -56.92% (p=0.000 n=10) _DecodeMergeEncodeCycle-10 666.0m ± 3% 194.6m ± 0% -70.79% (p=0.000 n=10) geomean 18.87m 12.51m -33.70% │ before.log │ after.log │ │ B/op │ B/op vs base │ _CodecDecodeLogs-10 3.649Mi ± 0% 3.649Mi ± 0% ~ (p=0.364 n=10) _CodecDecodeSamples-10 18.12Mi ± 0% 18.12Mi ± 0% ~ (p=0.926 n=10) _CodecDecodeSeries/application/vnd.google.protobuf-10 7.647Mi ± 0% 7.647Mi ± 0% ~ (p=0.587 n=10) _CodecDecodeSeries/application/json;_charset=utf-8-10 27.94Mi ± 0% 16.99Mi ± 0% -39.18% (p=0.000 n=10) _MergeResponses-10 2.362Mi ± 0% 2.408Mi ± 0% +1.98% (p=0.000 n=10) _UnwrapSeries-10 19.495Mi ± 0% 8.595Mi ± 0% -55.91% (p=0.000 n=10) _DecodeMergeEncodeCycle-10 772.3Mi ± 0% 772.3Mi ± 0% ~ (p=0.912 n=10) geomean 17.50Mi 14.54Mi -16.91% │ before.log │ after.log │ │ allocs/op │ allocs/op vs base │ _CodecDecodeLogs-10 41.10k ± 0% 41.10k ± 0% ~ (p=1.000 n=10) ¹ _CodecDecodeSamples-10 411.9k ± 0% 411.9k ± 0% ~ (p=1.000 n=10) _CodecDecodeSeries/application/vnd.google.protobuf-10 32.00 ± 0% 32.00 ± 0% ~ (p=1.000 n=10) ¹ _CodecDecodeSeries/application/json;_charset=utf-8-10 304.2k ± 0% 298.1k ± 0% -2.01% (p=0.000 n=10) _MergeResponses-10 100.1k ± 0% 100.1k ± 0% -0.00% (p=0.002 n=10) _UnwrapSeries-10 201.1k ± 0% 198.0k ± 0% -1.54% (p=0.000 n=10) _DecodeMergeEncodeCycle-10 203.1k ± 0% 203.1k ± 0% ~ (p=0.621 n=10) geomean 48.95k 48.70k -0.51% ¹ all samples are equal ``` **Checklist** - [x] Reviewed the [`CONTRIBUTING.md`](https://github.com/grafana/loki/blob/main/CONTRIBUTING.md) guide (**required**) - [ ] Documentation added - [x] Tests updated - [ ] `CHANGELOG.md` updated - [ ] If the change is worth mentioning in the release notes, add `add-to-release-notes` label - [ ] Changes that require user attention or interaction to upgrade are documented in `docs/sources/setup/upgrade/_index.md` - [ ] For Helm chart changes bump the Helm chart version in `production/helm/loki/Chart.yaml` and update `production/helm/loki/CHANGELOG.md` and `production/helm/loki/README.md`. [Example PR](https://github.com/grafana/loki/commit/d10549e3ece02120974929894ee333d07755d213) - [ ] If the change is deprecating or removing a configuration option, update the `deprecated-config.yaml` and `deleted-config.yaml` files respectively in the `tools/deprecated-config-checker` directory. [Example PR](https://github.com/grafana/loki/pull/10840/commits/0d4416a4b03739583349934b96f272fb4f685d15)
2 years ago
Labels: []logproto.SeriesIdentifier_LabelsEntry{
{Key: "i", Value: "1"},
{Key: "baz", Value: "woof"},
},
Lazily decode series protobuf. (#10071) **What this PR does / why we need it**: The protobuf decoding for series responses runs into a memory issue for many series. The response is only merged and passed through the front end. It is more efficient to decode the protobuf encoded series lazily. And `decode`, `merge`, `encode` benchmark shows the benefit of transcoding the protobuf series message into a JSON response directly. This change will not impact production code since it only applies to protobuf encoding messaging between the querier and query frontend that must be explicitly enabled. ``` › go test -v -run=^$ -bench "Benchmark_DecodeMergeEncodeCycle" -memprofile memory_base.prof -count=10 ./pkg/querier/queryrange > before.txt › go test -v -run=^$ -bench "Benchmark_DecodeMergeEncodeCycle" -memprofile memory_base.prof -count=10 ./pkg/querier/queryrange > before.txt › benchstat before.txt after.txt before.txt:5: missing iteration count after.txt:5: missing iteration count goos: linux goarch: amd64 pkg: github.com/grafana/loki/pkg/querier/queryrange cpu: AMD Ryzen 7 3700X 8-Core Processor │ before.txt │ after.txt │ │ sec/op │ sec/op vs base │ _DecodeMergeEncodeCycle-16 2537.7m ± 2% 934.2m ± 1% -63.19% (p=0.000 n=10) │ before.txt │ after.txt │ │ B/op │ B/op vs base │ _DecodeMergeEncodeCycle-16 1723.4Mi ± 0% 641.1Mi ± 0% -62.80% (p=0.000 n=10) │ before.txt │ after.txt │ │ allocs/op │ allocs/op vs base │ _DecodeMergeEncodeCycle-16 20240.6k ± 0% 203.0k ± 0% -99.00% (p=0.000 n=10) ``` **Checklist** - [ ] Reviewed the [`CONTRIBUTING.md`](https://github.com/grafana/loki/blob/main/CONTRIBUTING.md) guide (**required**) - [ ] Documentation added - [x] Tests updated - [ ] `CHANGELOG.md` updated - [ ] If the change is worth mentioning in the release notes, add `add-to-release-notes` label - [ ] Changes that require user attention or interaction to upgrade are documented in `docs/sources/setup/upgrade/_index.md` - [ ] For Helm chart changes bump the Helm chart version in `production/helm/loki/Chart.yaml` and update `production/helm/loki/CHANGELOG.md` and `production/helm/loki/README.md`. [Example PR](https://github.com/grafana/loki/commit/d10549e3ece02120974929894ee333d07755d213)
2 years ago
},
{
Optimize series response format by using repeated fileds. (#11498) **What this PR does / why we need it**: The Protobuf map type is encodied as a repeated field of map entries. Decoding them to a slice is much faster than decoding them into a map. Since Loki is not using the fast key check for a map we can use the slice decoding. This change also allows us to decode the JSON directly into the right protobuf struct. This doulbes the JSON decoding speed and reduces the memory pressure by ~40%. ``` › go test -bench=. -run=^$ -count=10 ./pkg/querier/queryrange > before.log › go test -bench=. -run=^$ -count=10 ./pkg/querier/queryrange > after.log › benchstat before.log after.log goos: darwin goarch: arm64 pkg: github.com/grafana/loki/pkg/querier/queryrange │ before.log │ after.log │ │ sec/op │ sec/op vs base │ ResponseMerge/mergeStreams_unlimited-10 32.36m ± 0% 32.63m ± 2% ~ (p=0.393 n=10) ResponseMerge/mergeOrderedNonOverlappingStreams_unlimited-10 1.050m ± 1% 1.080m ± 3% +2.84% (p=0.005 n=10) ResponseMerge/mergeStreams_limited-10 33.02m ± 0% 32.60m ± 1% -1.29% (p=0.004 n=10) ResponseMerge/mergeOrderedNonOverlappingStreams_limited-10 15.11m ± 0% 15.07m ± 0% ~ (p=0.075 n=10) _CodecDecodeLogs-10 4.395m ± 1% 4.364m ± 0% -0.72% (p=0.005 n=10) _CodecDecodeSamples-10 16.97m ± 0% 16.84m ± 2% -0.77% (p=0.023 n=10) _CodecDecodeSeries/application/vnd.google.protobuf-10 745.8µ ± 8% 736.8µ ± 12% ~ (p=0.739 n=10) _CodecDecodeSeries/application/json;_charset=utf-8-10 15.37m ± 1% 10.60m ± 0% -31.03% (p=0.000 n=10) _MergeResponses-10 1186.9m ± 2% 149.8m ± 1% -87.38% (p=0.000 n=10) _UnwrapSeries-10 9.399m ± 1% 4.049m ± 0% -56.92% (p=0.000 n=10) _DecodeMergeEncodeCycle-10 666.0m ± 3% 194.6m ± 0% -70.79% (p=0.000 n=10) geomean 18.87m 12.51m -33.70% │ before.log │ after.log │ │ B/op │ B/op vs base │ _CodecDecodeLogs-10 3.649Mi ± 0% 3.649Mi ± 0% ~ (p=0.364 n=10) _CodecDecodeSamples-10 18.12Mi ± 0% 18.12Mi ± 0% ~ (p=0.926 n=10) _CodecDecodeSeries/application/vnd.google.protobuf-10 7.647Mi ± 0% 7.647Mi ± 0% ~ (p=0.587 n=10) _CodecDecodeSeries/application/json;_charset=utf-8-10 27.94Mi ± 0% 16.99Mi ± 0% -39.18% (p=0.000 n=10) _MergeResponses-10 2.362Mi ± 0% 2.408Mi ± 0% +1.98% (p=0.000 n=10) _UnwrapSeries-10 19.495Mi ± 0% 8.595Mi ± 0% -55.91% (p=0.000 n=10) _DecodeMergeEncodeCycle-10 772.3Mi ± 0% 772.3Mi ± 0% ~ (p=0.912 n=10) geomean 17.50Mi 14.54Mi -16.91% │ before.log │ after.log │ │ allocs/op │ allocs/op vs base │ _CodecDecodeLogs-10 41.10k ± 0% 41.10k ± 0% ~ (p=1.000 n=10) ¹ _CodecDecodeSamples-10 411.9k ± 0% 411.9k ± 0% ~ (p=1.000 n=10) _CodecDecodeSeries/application/vnd.google.protobuf-10 32.00 ± 0% 32.00 ± 0% ~ (p=1.000 n=10) ¹ _CodecDecodeSeries/application/json;_charset=utf-8-10 304.2k ± 0% 298.1k ± 0% -2.01% (p=0.000 n=10) _MergeResponses-10 100.1k ± 0% 100.1k ± 0% -0.00% (p=0.002 n=10) _UnwrapSeries-10 201.1k ± 0% 198.0k ± 0% -1.54% (p=0.000 n=10) _DecodeMergeEncodeCycle-10 203.1k ± 0% 203.1k ± 0% ~ (p=0.621 n=10) geomean 48.95k 48.70k -0.51% ¹ all samples are equal ``` **Checklist** - [x] Reviewed the [`CONTRIBUTING.md`](https://github.com/grafana/loki/blob/main/CONTRIBUTING.md) guide (**required**) - [ ] Documentation added - [x] Tests updated - [ ] `CHANGELOG.md` updated - [ ] If the change is worth mentioning in the release notes, add `add-to-release-notes` label - [ ] Changes that require user attention or interaction to upgrade are documented in `docs/sources/setup/upgrade/_index.md` - [ ] For Helm chart changes bump the Helm chart version in `production/helm/loki/Chart.yaml` and update `production/helm/loki/CHANGELOG.md` and `production/helm/loki/README.md`. [Example PR](https://github.com/grafana/loki/commit/d10549e3ece02120974929894ee333d07755d213) - [ ] If the change is deprecating or removing a configuration option, update the `deprecated-config.yaml` and `deleted-config.yaml` files respectively in the `tools/deprecated-config-checker` directory. [Example PR](https://github.com/grafana/loki/pull/10840/commits/0d4416a4b03739583349934b96f272fb4f685d15)
2 years ago
Labels: []logproto.SeriesIdentifier_LabelsEntry{
{Key: "i", Value: "2"},
{Key: "foo", Value: "bar"},
},
Lazily decode series protobuf. (#10071) **What this PR does / why we need it**: The protobuf decoding for series responses runs into a memory issue for many series. The response is only merged and passed through the front end. It is more efficient to decode the protobuf encoded series lazily. And `decode`, `merge`, `encode` benchmark shows the benefit of transcoding the protobuf series message into a JSON response directly. This change will not impact production code since it only applies to protobuf encoding messaging between the querier and query frontend that must be explicitly enabled. ``` › go test -v -run=^$ -bench "Benchmark_DecodeMergeEncodeCycle" -memprofile memory_base.prof -count=10 ./pkg/querier/queryrange > before.txt › go test -v -run=^$ -bench "Benchmark_DecodeMergeEncodeCycle" -memprofile memory_base.prof -count=10 ./pkg/querier/queryrange > before.txt › benchstat before.txt after.txt before.txt:5: missing iteration count after.txt:5: missing iteration count goos: linux goarch: amd64 pkg: github.com/grafana/loki/pkg/querier/queryrange cpu: AMD Ryzen 7 3700X 8-Core Processor │ before.txt │ after.txt │ │ sec/op │ sec/op vs base │ _DecodeMergeEncodeCycle-16 2537.7m ± 2% 934.2m ± 1% -63.19% (p=0.000 n=10) │ before.txt │ after.txt │ │ B/op │ B/op vs base │ _DecodeMergeEncodeCycle-16 1723.4Mi ± 0% 641.1Mi ± 0% -62.80% (p=0.000 n=10) │ before.txt │ after.txt │ │ allocs/op │ allocs/op vs base │ _DecodeMergeEncodeCycle-16 20240.6k ± 0% 203.0k ± 0% -99.00% (p=0.000 n=10) ``` **Checklist** - [ ] Reviewed the [`CONTRIBUTING.md`](https://github.com/grafana/loki/blob/main/CONTRIBUTING.md) guide (**required**) - [ ] Documentation added - [x] Tests updated - [ ] `CHANGELOG.md` updated - [ ] If the change is worth mentioning in the release notes, add `add-to-release-notes` label - [ ] Changes that require user attention or interaction to upgrade are documented in `docs/sources/setup/upgrade/_index.md` - [ ] For Helm chart changes bump the Helm chart version in `production/helm/loki/Chart.yaml` and update `production/helm/loki/CHANGELOG.md` and `production/helm/loki/README.md`. [Example PR](https://github.com/grafana/loki/commit/d10549e3ece02120974929894ee333d07755d213)
2 years ago
},
},
}
data, err := response.Marshal()
require.NoError(t, err)
view := &LokiSeriesResponseView{buffer: data}
actualHashes := make([]uint64, 0)
err = view.ForEachSeries(func(s *SeriesIdentifierView) error {
b := make([]byte, 0, 1024)
keyLabelPairs := make([]string, 0)
hash, _, err := s.Hash(b, keyLabelPairs)
if err != nil {
return err
}
actualHashes = append(actualHashes, hash)
return nil
})
require.NoError(t, err)
expectedHashes := make([]uint64, 0)
for _, id := range response.Data {
b := make([]byte, 0, 1024)
Optimize series response format by using repeated fileds. (#11498) **What this PR does / why we need it**: The Protobuf map type is encodied as a repeated field of map entries. Decoding them to a slice is much faster than decoding them into a map. Since Loki is not using the fast key check for a map we can use the slice decoding. This change also allows us to decode the JSON directly into the right protobuf struct. This doulbes the JSON decoding speed and reduces the memory pressure by ~40%. ``` › go test -bench=. -run=^$ -count=10 ./pkg/querier/queryrange > before.log › go test -bench=. -run=^$ -count=10 ./pkg/querier/queryrange > after.log › benchstat before.log after.log goos: darwin goarch: arm64 pkg: github.com/grafana/loki/pkg/querier/queryrange │ before.log │ after.log │ │ sec/op │ sec/op vs base │ ResponseMerge/mergeStreams_unlimited-10 32.36m ± 0% 32.63m ± 2% ~ (p=0.393 n=10) ResponseMerge/mergeOrderedNonOverlappingStreams_unlimited-10 1.050m ± 1% 1.080m ± 3% +2.84% (p=0.005 n=10) ResponseMerge/mergeStreams_limited-10 33.02m ± 0% 32.60m ± 1% -1.29% (p=0.004 n=10) ResponseMerge/mergeOrderedNonOverlappingStreams_limited-10 15.11m ± 0% 15.07m ± 0% ~ (p=0.075 n=10) _CodecDecodeLogs-10 4.395m ± 1% 4.364m ± 0% -0.72% (p=0.005 n=10) _CodecDecodeSamples-10 16.97m ± 0% 16.84m ± 2% -0.77% (p=0.023 n=10) _CodecDecodeSeries/application/vnd.google.protobuf-10 745.8µ ± 8% 736.8µ ± 12% ~ (p=0.739 n=10) _CodecDecodeSeries/application/json;_charset=utf-8-10 15.37m ± 1% 10.60m ± 0% -31.03% (p=0.000 n=10) _MergeResponses-10 1186.9m ± 2% 149.8m ± 1% -87.38% (p=0.000 n=10) _UnwrapSeries-10 9.399m ± 1% 4.049m ± 0% -56.92% (p=0.000 n=10) _DecodeMergeEncodeCycle-10 666.0m ± 3% 194.6m ± 0% -70.79% (p=0.000 n=10) geomean 18.87m 12.51m -33.70% │ before.log │ after.log │ │ B/op │ B/op vs base │ _CodecDecodeLogs-10 3.649Mi ± 0% 3.649Mi ± 0% ~ (p=0.364 n=10) _CodecDecodeSamples-10 18.12Mi ± 0% 18.12Mi ± 0% ~ (p=0.926 n=10) _CodecDecodeSeries/application/vnd.google.protobuf-10 7.647Mi ± 0% 7.647Mi ± 0% ~ (p=0.587 n=10) _CodecDecodeSeries/application/json;_charset=utf-8-10 27.94Mi ± 0% 16.99Mi ± 0% -39.18% (p=0.000 n=10) _MergeResponses-10 2.362Mi ± 0% 2.408Mi ± 0% +1.98% (p=0.000 n=10) _UnwrapSeries-10 19.495Mi ± 0% 8.595Mi ± 0% -55.91% (p=0.000 n=10) _DecodeMergeEncodeCycle-10 772.3Mi ± 0% 772.3Mi ± 0% ~ (p=0.912 n=10) geomean 17.50Mi 14.54Mi -16.91% │ before.log │ after.log │ │ allocs/op │ allocs/op vs base │ _CodecDecodeLogs-10 41.10k ± 0% 41.10k ± 0% ~ (p=1.000 n=10) ¹ _CodecDecodeSamples-10 411.9k ± 0% 411.9k ± 0% ~ (p=1.000 n=10) _CodecDecodeSeries/application/vnd.google.protobuf-10 32.00 ± 0% 32.00 ± 0% ~ (p=1.000 n=10) ¹ _CodecDecodeSeries/application/json;_charset=utf-8-10 304.2k ± 0% 298.1k ± 0% -2.01% (p=0.000 n=10) _MergeResponses-10 100.1k ± 0% 100.1k ± 0% -0.00% (p=0.002 n=10) _UnwrapSeries-10 201.1k ± 0% 198.0k ± 0% -1.54% (p=0.000 n=10) _DecodeMergeEncodeCycle-10 203.1k ± 0% 203.1k ± 0% ~ (p=0.621 n=10) geomean 48.95k 48.70k -0.51% ¹ all samples are equal ``` **Checklist** - [x] Reviewed the [`CONTRIBUTING.md`](https://github.com/grafana/loki/blob/main/CONTRIBUTING.md) guide (**required**) - [ ] Documentation added - [x] Tests updated - [ ] `CHANGELOG.md` updated - [ ] If the change is worth mentioning in the release notes, add `add-to-release-notes` label - [ ] Changes that require user attention or interaction to upgrade are documented in `docs/sources/setup/upgrade/_index.md` - [ ] For Helm chart changes bump the Helm chart version in `production/helm/loki/Chart.yaml` and update `production/helm/loki/CHANGELOG.md` and `production/helm/loki/README.md`. [Example PR](https://github.com/grafana/loki/commit/d10549e3ece02120974929894ee333d07755d213) - [ ] If the change is deprecating or removing a configuration option, update the `deprecated-config.yaml` and `deleted-config.yaml` files respectively in the `tools/deprecated-config-checker` directory. [Example PR](https://github.com/grafana/loki/pull/10840/commits/0d4416a4b03739583349934b96f272fb4f685d15)
2 years ago
hash := id.Hash(b)
Lazily decode series protobuf. (#10071) **What this PR does / why we need it**: The protobuf decoding for series responses runs into a memory issue for many series. The response is only merged and passed through the front end. It is more efficient to decode the protobuf encoded series lazily. And `decode`, `merge`, `encode` benchmark shows the benefit of transcoding the protobuf series message into a JSON response directly. This change will not impact production code since it only applies to protobuf encoding messaging between the querier and query frontend that must be explicitly enabled. ``` › go test -v -run=^$ -bench "Benchmark_DecodeMergeEncodeCycle" -memprofile memory_base.prof -count=10 ./pkg/querier/queryrange > before.txt › go test -v -run=^$ -bench "Benchmark_DecodeMergeEncodeCycle" -memprofile memory_base.prof -count=10 ./pkg/querier/queryrange > before.txt › benchstat before.txt after.txt before.txt:5: missing iteration count after.txt:5: missing iteration count goos: linux goarch: amd64 pkg: github.com/grafana/loki/pkg/querier/queryrange cpu: AMD Ryzen 7 3700X 8-Core Processor │ before.txt │ after.txt │ │ sec/op │ sec/op vs base │ _DecodeMergeEncodeCycle-16 2537.7m ± 2% 934.2m ± 1% -63.19% (p=0.000 n=10) │ before.txt │ after.txt │ │ B/op │ B/op vs base │ _DecodeMergeEncodeCycle-16 1723.4Mi ± 0% 641.1Mi ± 0% -62.80% (p=0.000 n=10) │ before.txt │ after.txt │ │ allocs/op │ allocs/op vs base │ _DecodeMergeEncodeCycle-16 20240.6k ± 0% 203.0k ± 0% -99.00% (p=0.000 n=10) ``` **Checklist** - [ ] Reviewed the [`CONTRIBUTING.md`](https://github.com/grafana/loki/blob/main/CONTRIBUTING.md) guide (**required**) - [ ] Documentation added - [x] Tests updated - [ ] `CHANGELOG.md` updated - [ ] If the change is worth mentioning in the release notes, add `add-to-release-notes` label - [ ] Changes that require user attention or interaction to upgrade are documented in `docs/sources/setup/upgrade/_index.md` - [ ] For Helm chart changes bump the Helm chart version in `production/helm/loki/Chart.yaml` and update `production/helm/loki/CHANGELOG.md` and `production/helm/loki/README.md`. [Example PR](https://github.com/grafana/loki/commit/d10549e3ece02120974929894ee333d07755d213)
2 years ago
expectedHashes = append(expectedHashes, hash)
}
require.ElementsMatch(t, expectedHashes, actualHashes)
}
func TestMergedViewDeduplication(t *testing.T) {
responses := []*LokiSeriesResponse{
{
Data: []logproto.SeriesIdentifier{
{
Optimize series response format by using repeated fileds. (#11498) **What this PR does / why we need it**: The Protobuf map type is encodied as a repeated field of map entries. Decoding them to a slice is much faster than decoding them into a map. Since Loki is not using the fast key check for a map we can use the slice decoding. This change also allows us to decode the JSON directly into the right protobuf struct. This doulbes the JSON decoding speed and reduces the memory pressure by ~40%. ``` › go test -bench=. -run=^$ -count=10 ./pkg/querier/queryrange > before.log › go test -bench=. -run=^$ -count=10 ./pkg/querier/queryrange > after.log › benchstat before.log after.log goos: darwin goarch: arm64 pkg: github.com/grafana/loki/pkg/querier/queryrange │ before.log │ after.log │ │ sec/op │ sec/op vs base │ ResponseMerge/mergeStreams_unlimited-10 32.36m ± 0% 32.63m ± 2% ~ (p=0.393 n=10) ResponseMerge/mergeOrderedNonOverlappingStreams_unlimited-10 1.050m ± 1% 1.080m ± 3% +2.84% (p=0.005 n=10) ResponseMerge/mergeStreams_limited-10 33.02m ± 0% 32.60m ± 1% -1.29% (p=0.004 n=10) ResponseMerge/mergeOrderedNonOverlappingStreams_limited-10 15.11m ± 0% 15.07m ± 0% ~ (p=0.075 n=10) _CodecDecodeLogs-10 4.395m ± 1% 4.364m ± 0% -0.72% (p=0.005 n=10) _CodecDecodeSamples-10 16.97m ± 0% 16.84m ± 2% -0.77% (p=0.023 n=10) _CodecDecodeSeries/application/vnd.google.protobuf-10 745.8µ ± 8% 736.8µ ± 12% ~ (p=0.739 n=10) _CodecDecodeSeries/application/json;_charset=utf-8-10 15.37m ± 1% 10.60m ± 0% -31.03% (p=0.000 n=10) _MergeResponses-10 1186.9m ± 2% 149.8m ± 1% -87.38% (p=0.000 n=10) _UnwrapSeries-10 9.399m ± 1% 4.049m ± 0% -56.92% (p=0.000 n=10) _DecodeMergeEncodeCycle-10 666.0m ± 3% 194.6m ± 0% -70.79% (p=0.000 n=10) geomean 18.87m 12.51m -33.70% │ before.log │ after.log │ │ B/op │ B/op vs base │ _CodecDecodeLogs-10 3.649Mi ± 0% 3.649Mi ± 0% ~ (p=0.364 n=10) _CodecDecodeSamples-10 18.12Mi ± 0% 18.12Mi ± 0% ~ (p=0.926 n=10) _CodecDecodeSeries/application/vnd.google.protobuf-10 7.647Mi ± 0% 7.647Mi ± 0% ~ (p=0.587 n=10) _CodecDecodeSeries/application/json;_charset=utf-8-10 27.94Mi ± 0% 16.99Mi ± 0% -39.18% (p=0.000 n=10) _MergeResponses-10 2.362Mi ± 0% 2.408Mi ± 0% +1.98% (p=0.000 n=10) _UnwrapSeries-10 19.495Mi ± 0% 8.595Mi ± 0% -55.91% (p=0.000 n=10) _DecodeMergeEncodeCycle-10 772.3Mi ± 0% 772.3Mi ± 0% ~ (p=0.912 n=10) geomean 17.50Mi 14.54Mi -16.91% │ before.log │ after.log │ │ allocs/op │ allocs/op vs base │ _CodecDecodeLogs-10 41.10k ± 0% 41.10k ± 0% ~ (p=1.000 n=10) ¹ _CodecDecodeSamples-10 411.9k ± 0% 411.9k ± 0% ~ (p=1.000 n=10) _CodecDecodeSeries/application/vnd.google.protobuf-10 32.00 ± 0% 32.00 ± 0% ~ (p=1.000 n=10) ¹ _CodecDecodeSeries/application/json;_charset=utf-8-10 304.2k ± 0% 298.1k ± 0% -2.01% (p=0.000 n=10) _MergeResponses-10 100.1k ± 0% 100.1k ± 0% -0.00% (p=0.002 n=10) _UnwrapSeries-10 201.1k ± 0% 198.0k ± 0% -1.54% (p=0.000 n=10) _DecodeMergeEncodeCycle-10 203.1k ± 0% 203.1k ± 0% ~ (p=0.621 n=10) geomean 48.95k 48.70k -0.51% ¹ all samples are equal ``` **Checklist** - [x] Reviewed the [`CONTRIBUTING.md`](https://github.com/grafana/loki/blob/main/CONTRIBUTING.md) guide (**required**) - [ ] Documentation added - [x] Tests updated - [ ] `CHANGELOG.md` updated - [ ] If the change is worth mentioning in the release notes, add `add-to-release-notes` label - [ ] Changes that require user attention or interaction to upgrade are documented in `docs/sources/setup/upgrade/_index.md` - [ ] For Helm chart changes bump the Helm chart version in `production/helm/loki/Chart.yaml` and update `production/helm/loki/CHANGELOG.md` and `production/helm/loki/README.md`. [Example PR](https://github.com/grafana/loki/commit/d10549e3ece02120974929894ee333d07755d213) - [ ] If the change is deprecating or removing a configuration option, update the `deprecated-config.yaml` and `deleted-config.yaml` files respectively in the `tools/deprecated-config-checker` directory. [Example PR](https://github.com/grafana/loki/pull/10840/commits/0d4416a4b03739583349934b96f272fb4f685d15)
2 years ago
Labels: []logproto.SeriesIdentifier_LabelsEntry{
{Key: "i", Value: "1"},
{Key: "baz", Value: "woof"},
},
Lazily decode series protobuf. (#10071) **What this PR does / why we need it**: The protobuf decoding for series responses runs into a memory issue for many series. The response is only merged and passed through the front end. It is more efficient to decode the protobuf encoded series lazily. And `decode`, `merge`, `encode` benchmark shows the benefit of transcoding the protobuf series message into a JSON response directly. This change will not impact production code since it only applies to protobuf encoding messaging between the querier and query frontend that must be explicitly enabled. ``` › go test -v -run=^$ -bench "Benchmark_DecodeMergeEncodeCycle" -memprofile memory_base.prof -count=10 ./pkg/querier/queryrange > before.txt › go test -v -run=^$ -bench "Benchmark_DecodeMergeEncodeCycle" -memprofile memory_base.prof -count=10 ./pkg/querier/queryrange > before.txt › benchstat before.txt after.txt before.txt:5: missing iteration count after.txt:5: missing iteration count goos: linux goarch: amd64 pkg: github.com/grafana/loki/pkg/querier/queryrange cpu: AMD Ryzen 7 3700X 8-Core Processor │ before.txt │ after.txt │ │ sec/op │ sec/op vs base │ _DecodeMergeEncodeCycle-16 2537.7m ± 2% 934.2m ± 1% -63.19% (p=0.000 n=10) │ before.txt │ after.txt │ │ B/op │ B/op vs base │ _DecodeMergeEncodeCycle-16 1723.4Mi ± 0% 641.1Mi ± 0% -62.80% (p=0.000 n=10) │ before.txt │ after.txt │ │ allocs/op │ allocs/op vs base │ _DecodeMergeEncodeCycle-16 20240.6k ± 0% 203.0k ± 0% -99.00% (p=0.000 n=10) ``` **Checklist** - [ ] Reviewed the [`CONTRIBUTING.md`](https://github.com/grafana/loki/blob/main/CONTRIBUTING.md) guide (**required**) - [ ] Documentation added - [x] Tests updated - [ ] `CHANGELOG.md` updated - [ ] If the change is worth mentioning in the release notes, add `add-to-release-notes` label - [ ] Changes that require user attention or interaction to upgrade are documented in `docs/sources/setup/upgrade/_index.md` - [ ] For Helm chart changes bump the Helm chart version in `production/helm/loki/Chart.yaml` and update `production/helm/loki/CHANGELOG.md` and `production/helm/loki/README.md`. [Example PR](https://github.com/grafana/loki/commit/d10549e3ece02120974929894ee333d07755d213)
2 years ago
},
{
Optimize series response format by using repeated fileds. (#11498) **What this PR does / why we need it**: The Protobuf map type is encodied as a repeated field of map entries. Decoding them to a slice is much faster than decoding them into a map. Since Loki is not using the fast key check for a map we can use the slice decoding. This change also allows us to decode the JSON directly into the right protobuf struct. This doulbes the JSON decoding speed and reduces the memory pressure by ~40%. ``` › go test -bench=. -run=^$ -count=10 ./pkg/querier/queryrange > before.log › go test -bench=. -run=^$ -count=10 ./pkg/querier/queryrange > after.log › benchstat before.log after.log goos: darwin goarch: arm64 pkg: github.com/grafana/loki/pkg/querier/queryrange │ before.log │ after.log │ │ sec/op │ sec/op vs base │ ResponseMerge/mergeStreams_unlimited-10 32.36m ± 0% 32.63m ± 2% ~ (p=0.393 n=10) ResponseMerge/mergeOrderedNonOverlappingStreams_unlimited-10 1.050m ± 1% 1.080m ± 3% +2.84% (p=0.005 n=10) ResponseMerge/mergeStreams_limited-10 33.02m ± 0% 32.60m ± 1% -1.29% (p=0.004 n=10) ResponseMerge/mergeOrderedNonOverlappingStreams_limited-10 15.11m ± 0% 15.07m ± 0% ~ (p=0.075 n=10) _CodecDecodeLogs-10 4.395m ± 1% 4.364m ± 0% -0.72% (p=0.005 n=10) _CodecDecodeSamples-10 16.97m ± 0% 16.84m ± 2% -0.77% (p=0.023 n=10) _CodecDecodeSeries/application/vnd.google.protobuf-10 745.8µ ± 8% 736.8µ ± 12% ~ (p=0.739 n=10) _CodecDecodeSeries/application/json;_charset=utf-8-10 15.37m ± 1% 10.60m ± 0% -31.03% (p=0.000 n=10) _MergeResponses-10 1186.9m ± 2% 149.8m ± 1% -87.38% (p=0.000 n=10) _UnwrapSeries-10 9.399m ± 1% 4.049m ± 0% -56.92% (p=0.000 n=10) _DecodeMergeEncodeCycle-10 666.0m ± 3% 194.6m ± 0% -70.79% (p=0.000 n=10) geomean 18.87m 12.51m -33.70% │ before.log │ after.log │ │ B/op │ B/op vs base │ _CodecDecodeLogs-10 3.649Mi ± 0% 3.649Mi ± 0% ~ (p=0.364 n=10) _CodecDecodeSamples-10 18.12Mi ± 0% 18.12Mi ± 0% ~ (p=0.926 n=10) _CodecDecodeSeries/application/vnd.google.protobuf-10 7.647Mi ± 0% 7.647Mi ± 0% ~ (p=0.587 n=10) _CodecDecodeSeries/application/json;_charset=utf-8-10 27.94Mi ± 0% 16.99Mi ± 0% -39.18% (p=0.000 n=10) _MergeResponses-10 2.362Mi ± 0% 2.408Mi ± 0% +1.98% (p=0.000 n=10) _UnwrapSeries-10 19.495Mi ± 0% 8.595Mi ± 0% -55.91% (p=0.000 n=10) _DecodeMergeEncodeCycle-10 772.3Mi ± 0% 772.3Mi ± 0% ~ (p=0.912 n=10) geomean 17.50Mi 14.54Mi -16.91% │ before.log │ after.log │ │ allocs/op │ allocs/op vs base │ _CodecDecodeLogs-10 41.10k ± 0% 41.10k ± 0% ~ (p=1.000 n=10) ¹ _CodecDecodeSamples-10 411.9k ± 0% 411.9k ± 0% ~ (p=1.000 n=10) _CodecDecodeSeries/application/vnd.google.protobuf-10 32.00 ± 0% 32.00 ± 0% ~ (p=1.000 n=10) ¹ _CodecDecodeSeries/application/json;_charset=utf-8-10 304.2k ± 0% 298.1k ± 0% -2.01% (p=0.000 n=10) _MergeResponses-10 100.1k ± 0% 100.1k ± 0% -0.00% (p=0.002 n=10) _UnwrapSeries-10 201.1k ± 0% 198.0k ± 0% -1.54% (p=0.000 n=10) _DecodeMergeEncodeCycle-10 203.1k ± 0% 203.1k ± 0% ~ (p=0.621 n=10) geomean 48.95k 48.70k -0.51% ¹ all samples are equal ``` **Checklist** - [x] Reviewed the [`CONTRIBUTING.md`](https://github.com/grafana/loki/blob/main/CONTRIBUTING.md) guide (**required**) - [ ] Documentation added - [x] Tests updated - [ ] `CHANGELOG.md` updated - [ ] If the change is worth mentioning in the release notes, add `add-to-release-notes` label - [ ] Changes that require user attention or interaction to upgrade are documented in `docs/sources/setup/upgrade/_index.md` - [ ] For Helm chart changes bump the Helm chart version in `production/helm/loki/Chart.yaml` and update `production/helm/loki/CHANGELOG.md` and `production/helm/loki/README.md`. [Example PR](https://github.com/grafana/loki/commit/d10549e3ece02120974929894ee333d07755d213) - [ ] If the change is deprecating or removing a configuration option, update the `deprecated-config.yaml` and `deleted-config.yaml` files respectively in the `tools/deprecated-config-checker` directory. [Example PR](https://github.com/grafana/loki/pull/10840/commits/0d4416a4b03739583349934b96f272fb4f685d15)
2 years ago
Labels: []logproto.SeriesIdentifier_LabelsEntry{
{Key: "i", Value: "2"},
{Key: "foo", Value: "bar"},
},
Lazily decode series protobuf. (#10071) **What this PR does / why we need it**: The protobuf decoding for series responses runs into a memory issue for many series. The response is only merged and passed through the front end. It is more efficient to decode the protobuf encoded series lazily. And `decode`, `merge`, `encode` benchmark shows the benefit of transcoding the protobuf series message into a JSON response directly. This change will not impact production code since it only applies to protobuf encoding messaging between the querier and query frontend that must be explicitly enabled. ``` › go test -v -run=^$ -bench "Benchmark_DecodeMergeEncodeCycle" -memprofile memory_base.prof -count=10 ./pkg/querier/queryrange > before.txt › go test -v -run=^$ -bench "Benchmark_DecodeMergeEncodeCycle" -memprofile memory_base.prof -count=10 ./pkg/querier/queryrange > before.txt › benchstat before.txt after.txt before.txt:5: missing iteration count after.txt:5: missing iteration count goos: linux goarch: amd64 pkg: github.com/grafana/loki/pkg/querier/queryrange cpu: AMD Ryzen 7 3700X 8-Core Processor │ before.txt │ after.txt │ │ sec/op │ sec/op vs base │ _DecodeMergeEncodeCycle-16 2537.7m ± 2% 934.2m ± 1% -63.19% (p=0.000 n=10) │ before.txt │ after.txt │ │ B/op │ B/op vs base │ _DecodeMergeEncodeCycle-16 1723.4Mi ± 0% 641.1Mi ± 0% -62.80% (p=0.000 n=10) │ before.txt │ after.txt │ │ allocs/op │ allocs/op vs base │ _DecodeMergeEncodeCycle-16 20240.6k ± 0% 203.0k ± 0% -99.00% (p=0.000 n=10) ``` **Checklist** - [ ] Reviewed the [`CONTRIBUTING.md`](https://github.com/grafana/loki/blob/main/CONTRIBUTING.md) guide (**required**) - [ ] Documentation added - [x] Tests updated - [ ] `CHANGELOG.md` updated - [ ] If the change is worth mentioning in the release notes, add `add-to-release-notes` label - [ ] Changes that require user attention or interaction to upgrade are documented in `docs/sources/setup/upgrade/_index.md` - [ ] For Helm chart changes bump the Helm chart version in `production/helm/loki/Chart.yaml` and update `production/helm/loki/CHANGELOG.md` and `production/helm/loki/README.md`. [Example PR](https://github.com/grafana/loki/commit/d10549e3ece02120974929894ee333d07755d213)
2 years ago
},
},
},
{
Data: []logproto.SeriesIdentifier{
{
Optimize series response format by using repeated fileds. (#11498) **What this PR does / why we need it**: The Protobuf map type is encodied as a repeated field of map entries. Decoding them to a slice is much faster than decoding them into a map. Since Loki is not using the fast key check for a map we can use the slice decoding. This change also allows us to decode the JSON directly into the right protobuf struct. This doulbes the JSON decoding speed and reduces the memory pressure by ~40%. ``` › go test -bench=. -run=^$ -count=10 ./pkg/querier/queryrange > before.log › go test -bench=. -run=^$ -count=10 ./pkg/querier/queryrange > after.log › benchstat before.log after.log goos: darwin goarch: arm64 pkg: github.com/grafana/loki/pkg/querier/queryrange │ before.log │ after.log │ │ sec/op │ sec/op vs base │ ResponseMerge/mergeStreams_unlimited-10 32.36m ± 0% 32.63m ± 2% ~ (p=0.393 n=10) ResponseMerge/mergeOrderedNonOverlappingStreams_unlimited-10 1.050m ± 1% 1.080m ± 3% +2.84% (p=0.005 n=10) ResponseMerge/mergeStreams_limited-10 33.02m ± 0% 32.60m ± 1% -1.29% (p=0.004 n=10) ResponseMerge/mergeOrderedNonOverlappingStreams_limited-10 15.11m ± 0% 15.07m ± 0% ~ (p=0.075 n=10) _CodecDecodeLogs-10 4.395m ± 1% 4.364m ± 0% -0.72% (p=0.005 n=10) _CodecDecodeSamples-10 16.97m ± 0% 16.84m ± 2% -0.77% (p=0.023 n=10) _CodecDecodeSeries/application/vnd.google.protobuf-10 745.8µ ± 8% 736.8µ ± 12% ~ (p=0.739 n=10) _CodecDecodeSeries/application/json;_charset=utf-8-10 15.37m ± 1% 10.60m ± 0% -31.03% (p=0.000 n=10) _MergeResponses-10 1186.9m ± 2% 149.8m ± 1% -87.38% (p=0.000 n=10) _UnwrapSeries-10 9.399m ± 1% 4.049m ± 0% -56.92% (p=0.000 n=10) _DecodeMergeEncodeCycle-10 666.0m ± 3% 194.6m ± 0% -70.79% (p=0.000 n=10) geomean 18.87m 12.51m -33.70% │ before.log │ after.log │ │ B/op │ B/op vs base │ _CodecDecodeLogs-10 3.649Mi ± 0% 3.649Mi ± 0% ~ (p=0.364 n=10) _CodecDecodeSamples-10 18.12Mi ± 0% 18.12Mi ± 0% ~ (p=0.926 n=10) _CodecDecodeSeries/application/vnd.google.protobuf-10 7.647Mi ± 0% 7.647Mi ± 0% ~ (p=0.587 n=10) _CodecDecodeSeries/application/json;_charset=utf-8-10 27.94Mi ± 0% 16.99Mi ± 0% -39.18% (p=0.000 n=10) _MergeResponses-10 2.362Mi ± 0% 2.408Mi ± 0% +1.98% (p=0.000 n=10) _UnwrapSeries-10 19.495Mi ± 0% 8.595Mi ± 0% -55.91% (p=0.000 n=10) _DecodeMergeEncodeCycle-10 772.3Mi ± 0% 772.3Mi ± 0% ~ (p=0.912 n=10) geomean 17.50Mi 14.54Mi -16.91% │ before.log │ after.log │ │ allocs/op │ allocs/op vs base │ _CodecDecodeLogs-10 41.10k ± 0% 41.10k ± 0% ~ (p=1.000 n=10) ¹ _CodecDecodeSamples-10 411.9k ± 0% 411.9k ± 0% ~ (p=1.000 n=10) _CodecDecodeSeries/application/vnd.google.protobuf-10 32.00 ± 0% 32.00 ± 0% ~ (p=1.000 n=10) ¹ _CodecDecodeSeries/application/json;_charset=utf-8-10 304.2k ± 0% 298.1k ± 0% -2.01% (p=0.000 n=10) _MergeResponses-10 100.1k ± 0% 100.1k ± 0% -0.00% (p=0.002 n=10) _UnwrapSeries-10 201.1k ± 0% 198.0k ± 0% -1.54% (p=0.000 n=10) _DecodeMergeEncodeCycle-10 203.1k ± 0% 203.1k ± 0% ~ (p=0.621 n=10) geomean 48.95k 48.70k -0.51% ¹ all samples are equal ``` **Checklist** - [x] Reviewed the [`CONTRIBUTING.md`](https://github.com/grafana/loki/blob/main/CONTRIBUTING.md) guide (**required**) - [ ] Documentation added - [x] Tests updated - [ ] `CHANGELOG.md` updated - [ ] If the change is worth mentioning in the release notes, add `add-to-release-notes` label - [ ] Changes that require user attention or interaction to upgrade are documented in `docs/sources/setup/upgrade/_index.md` - [ ] For Helm chart changes bump the Helm chart version in `production/helm/loki/Chart.yaml` and update `production/helm/loki/CHANGELOG.md` and `production/helm/loki/README.md`. [Example PR](https://github.com/grafana/loki/commit/d10549e3ece02120974929894ee333d07755d213) - [ ] If the change is deprecating or removing a configuration option, update the `deprecated-config.yaml` and `deleted-config.yaml` files respectively in the `tools/deprecated-config-checker` directory. [Example PR](https://github.com/grafana/loki/pull/10840/commits/0d4416a4b03739583349934b96f272fb4f685d15)
2 years ago
Labels: []logproto.SeriesIdentifier_LabelsEntry{
{Key: "i", Value: "3"},
{Key: "baz", Value: "woof"},
},
Lazily decode series protobuf. (#10071) **What this PR does / why we need it**: The protobuf decoding for series responses runs into a memory issue for many series. The response is only merged and passed through the front end. It is more efficient to decode the protobuf encoded series lazily. And `decode`, `merge`, `encode` benchmark shows the benefit of transcoding the protobuf series message into a JSON response directly. This change will not impact production code since it only applies to protobuf encoding messaging between the querier and query frontend that must be explicitly enabled. ``` › go test -v -run=^$ -bench "Benchmark_DecodeMergeEncodeCycle" -memprofile memory_base.prof -count=10 ./pkg/querier/queryrange > before.txt › go test -v -run=^$ -bench "Benchmark_DecodeMergeEncodeCycle" -memprofile memory_base.prof -count=10 ./pkg/querier/queryrange > before.txt › benchstat before.txt after.txt before.txt:5: missing iteration count after.txt:5: missing iteration count goos: linux goarch: amd64 pkg: github.com/grafana/loki/pkg/querier/queryrange cpu: AMD Ryzen 7 3700X 8-Core Processor │ before.txt │ after.txt │ │ sec/op │ sec/op vs base │ _DecodeMergeEncodeCycle-16 2537.7m ± 2% 934.2m ± 1% -63.19% (p=0.000 n=10) │ before.txt │ after.txt │ │ B/op │ B/op vs base │ _DecodeMergeEncodeCycle-16 1723.4Mi ± 0% 641.1Mi ± 0% -62.80% (p=0.000 n=10) │ before.txt │ after.txt │ │ allocs/op │ allocs/op vs base │ _DecodeMergeEncodeCycle-16 20240.6k ± 0% 203.0k ± 0% -99.00% (p=0.000 n=10) ``` **Checklist** - [ ] Reviewed the [`CONTRIBUTING.md`](https://github.com/grafana/loki/blob/main/CONTRIBUTING.md) guide (**required**) - [ ] Documentation added - [x] Tests updated - [ ] `CHANGELOG.md` updated - [ ] If the change is worth mentioning in the release notes, add `add-to-release-notes` label - [ ] Changes that require user attention or interaction to upgrade are documented in `docs/sources/setup/upgrade/_index.md` - [ ] For Helm chart changes bump the Helm chart version in `production/helm/loki/Chart.yaml` and update `production/helm/loki/CHANGELOG.md` and `production/helm/loki/README.md`. [Example PR](https://github.com/grafana/loki/commit/d10549e3ece02120974929894ee333d07755d213)
2 years ago
},
{
Optimize series response format by using repeated fileds. (#11498) **What this PR does / why we need it**: The Protobuf map type is encodied as a repeated field of map entries. Decoding them to a slice is much faster than decoding them into a map. Since Loki is not using the fast key check for a map we can use the slice decoding. This change also allows us to decode the JSON directly into the right protobuf struct. This doulbes the JSON decoding speed and reduces the memory pressure by ~40%. ``` › go test -bench=. -run=^$ -count=10 ./pkg/querier/queryrange > before.log › go test -bench=. -run=^$ -count=10 ./pkg/querier/queryrange > after.log › benchstat before.log after.log goos: darwin goarch: arm64 pkg: github.com/grafana/loki/pkg/querier/queryrange │ before.log │ after.log │ │ sec/op │ sec/op vs base │ ResponseMerge/mergeStreams_unlimited-10 32.36m ± 0% 32.63m ± 2% ~ (p=0.393 n=10) ResponseMerge/mergeOrderedNonOverlappingStreams_unlimited-10 1.050m ± 1% 1.080m ± 3% +2.84% (p=0.005 n=10) ResponseMerge/mergeStreams_limited-10 33.02m ± 0% 32.60m ± 1% -1.29% (p=0.004 n=10) ResponseMerge/mergeOrderedNonOverlappingStreams_limited-10 15.11m ± 0% 15.07m ± 0% ~ (p=0.075 n=10) _CodecDecodeLogs-10 4.395m ± 1% 4.364m ± 0% -0.72% (p=0.005 n=10) _CodecDecodeSamples-10 16.97m ± 0% 16.84m ± 2% -0.77% (p=0.023 n=10) _CodecDecodeSeries/application/vnd.google.protobuf-10 745.8µ ± 8% 736.8µ ± 12% ~ (p=0.739 n=10) _CodecDecodeSeries/application/json;_charset=utf-8-10 15.37m ± 1% 10.60m ± 0% -31.03% (p=0.000 n=10) _MergeResponses-10 1186.9m ± 2% 149.8m ± 1% -87.38% (p=0.000 n=10) _UnwrapSeries-10 9.399m ± 1% 4.049m ± 0% -56.92% (p=0.000 n=10) _DecodeMergeEncodeCycle-10 666.0m ± 3% 194.6m ± 0% -70.79% (p=0.000 n=10) geomean 18.87m 12.51m -33.70% │ before.log │ after.log │ │ B/op │ B/op vs base │ _CodecDecodeLogs-10 3.649Mi ± 0% 3.649Mi ± 0% ~ (p=0.364 n=10) _CodecDecodeSamples-10 18.12Mi ± 0% 18.12Mi ± 0% ~ (p=0.926 n=10) _CodecDecodeSeries/application/vnd.google.protobuf-10 7.647Mi ± 0% 7.647Mi ± 0% ~ (p=0.587 n=10) _CodecDecodeSeries/application/json;_charset=utf-8-10 27.94Mi ± 0% 16.99Mi ± 0% -39.18% (p=0.000 n=10) _MergeResponses-10 2.362Mi ± 0% 2.408Mi ± 0% +1.98% (p=0.000 n=10) _UnwrapSeries-10 19.495Mi ± 0% 8.595Mi ± 0% -55.91% (p=0.000 n=10) _DecodeMergeEncodeCycle-10 772.3Mi ± 0% 772.3Mi ± 0% ~ (p=0.912 n=10) geomean 17.50Mi 14.54Mi -16.91% │ before.log │ after.log │ │ allocs/op │ allocs/op vs base │ _CodecDecodeLogs-10 41.10k ± 0% 41.10k ± 0% ~ (p=1.000 n=10) ¹ _CodecDecodeSamples-10 411.9k ± 0% 411.9k ± 0% ~ (p=1.000 n=10) _CodecDecodeSeries/application/vnd.google.protobuf-10 32.00 ± 0% 32.00 ± 0% ~ (p=1.000 n=10) ¹ _CodecDecodeSeries/application/json;_charset=utf-8-10 304.2k ± 0% 298.1k ± 0% -2.01% (p=0.000 n=10) _MergeResponses-10 100.1k ± 0% 100.1k ± 0% -0.00% (p=0.002 n=10) _UnwrapSeries-10 201.1k ± 0% 198.0k ± 0% -1.54% (p=0.000 n=10) _DecodeMergeEncodeCycle-10 203.1k ± 0% 203.1k ± 0% ~ (p=0.621 n=10) geomean 48.95k 48.70k -0.51% ¹ all samples are equal ``` **Checklist** - [x] Reviewed the [`CONTRIBUTING.md`](https://github.com/grafana/loki/blob/main/CONTRIBUTING.md) guide (**required**) - [ ] Documentation added - [x] Tests updated - [ ] `CHANGELOG.md` updated - [ ] If the change is worth mentioning in the release notes, add `add-to-release-notes` label - [ ] Changes that require user attention or interaction to upgrade are documented in `docs/sources/setup/upgrade/_index.md` - [ ] For Helm chart changes bump the Helm chart version in `production/helm/loki/Chart.yaml` and update `production/helm/loki/CHANGELOG.md` and `production/helm/loki/README.md`. [Example PR](https://github.com/grafana/loki/commit/d10549e3ece02120974929894ee333d07755d213) - [ ] If the change is deprecating or removing a configuration option, update the `deprecated-config.yaml` and `deleted-config.yaml` files respectively in the `tools/deprecated-config-checker` directory. [Example PR](https://github.com/grafana/loki/pull/10840/commits/0d4416a4b03739583349934b96f272fb4f685d15)
2 years ago
Labels: []logproto.SeriesIdentifier_LabelsEntry{
{Key: "i", Value: "2"},
{Key: "foo", Value: "bar"},
},
Lazily decode series protobuf. (#10071) **What this PR does / why we need it**: The protobuf decoding for series responses runs into a memory issue for many series. The response is only merged and passed through the front end. It is more efficient to decode the protobuf encoded series lazily. And `decode`, `merge`, `encode` benchmark shows the benefit of transcoding the protobuf series message into a JSON response directly. This change will not impact production code since it only applies to protobuf encoding messaging between the querier and query frontend that must be explicitly enabled. ``` › go test -v -run=^$ -bench "Benchmark_DecodeMergeEncodeCycle" -memprofile memory_base.prof -count=10 ./pkg/querier/queryrange > before.txt › go test -v -run=^$ -bench "Benchmark_DecodeMergeEncodeCycle" -memprofile memory_base.prof -count=10 ./pkg/querier/queryrange > before.txt › benchstat before.txt after.txt before.txt:5: missing iteration count after.txt:5: missing iteration count goos: linux goarch: amd64 pkg: github.com/grafana/loki/pkg/querier/queryrange cpu: AMD Ryzen 7 3700X 8-Core Processor │ before.txt │ after.txt │ │ sec/op │ sec/op vs base │ _DecodeMergeEncodeCycle-16 2537.7m ± 2% 934.2m ± 1% -63.19% (p=0.000 n=10) │ before.txt │ after.txt │ │ B/op │ B/op vs base │ _DecodeMergeEncodeCycle-16 1723.4Mi ± 0% 641.1Mi ± 0% -62.80% (p=0.000 n=10) │ before.txt │ after.txt │ │ allocs/op │ allocs/op vs base │ _DecodeMergeEncodeCycle-16 20240.6k ± 0% 203.0k ± 0% -99.00% (p=0.000 n=10) ``` **Checklist** - [ ] Reviewed the [`CONTRIBUTING.md`](https://github.com/grafana/loki/blob/main/CONTRIBUTING.md) guide (**required**) - [ ] Documentation added - [x] Tests updated - [ ] `CHANGELOG.md` updated - [ ] If the change is worth mentioning in the release notes, add `add-to-release-notes` label - [ ] Changes that require user attention or interaction to upgrade are documented in `docs/sources/setup/upgrade/_index.md` - [ ] For Helm chart changes bump the Helm chart version in `production/helm/loki/Chart.yaml` and update `production/helm/loki/CHANGELOG.md` and `production/helm/loki/README.md`. [Example PR](https://github.com/grafana/loki/commit/d10549e3ece02120974929894ee333d07755d213)
2 years ago
},
},
},
}
view := &MergedSeriesResponseView{}
for _, r := range responses {
data, err := r.Marshal()
require.NoError(t, err)
view.responses = append(view.responses, &LokiSeriesResponseView{buffer: data, headers: nil})
}
count := 0
err := view.ForEachUniqueSeries(func(_ *SeriesIdentifierView) error {
Lazily decode series protobuf. (#10071) **What this PR does / why we need it**: The protobuf decoding for series responses runs into a memory issue for many series. The response is only merged and passed through the front end. It is more efficient to decode the protobuf encoded series lazily. And `decode`, `merge`, `encode` benchmark shows the benefit of transcoding the protobuf series message into a JSON response directly. This change will not impact production code since it only applies to protobuf encoding messaging between the querier and query frontend that must be explicitly enabled. ``` › go test -v -run=^$ -bench "Benchmark_DecodeMergeEncodeCycle" -memprofile memory_base.prof -count=10 ./pkg/querier/queryrange > before.txt › go test -v -run=^$ -bench "Benchmark_DecodeMergeEncodeCycle" -memprofile memory_base.prof -count=10 ./pkg/querier/queryrange > before.txt › benchstat before.txt after.txt before.txt:5: missing iteration count after.txt:5: missing iteration count goos: linux goarch: amd64 pkg: github.com/grafana/loki/pkg/querier/queryrange cpu: AMD Ryzen 7 3700X 8-Core Processor │ before.txt │ after.txt │ │ sec/op │ sec/op vs base │ _DecodeMergeEncodeCycle-16 2537.7m ± 2% 934.2m ± 1% -63.19% (p=0.000 n=10) │ before.txt │ after.txt │ │ B/op │ B/op vs base │ _DecodeMergeEncodeCycle-16 1723.4Mi ± 0% 641.1Mi ± 0% -62.80% (p=0.000 n=10) │ before.txt │ after.txt │ │ allocs/op │ allocs/op vs base │ _DecodeMergeEncodeCycle-16 20240.6k ± 0% 203.0k ± 0% -99.00% (p=0.000 n=10) ``` **Checklist** - [ ] Reviewed the [`CONTRIBUTING.md`](https://github.com/grafana/loki/blob/main/CONTRIBUTING.md) guide (**required**) - [ ] Documentation added - [x] Tests updated - [ ] `CHANGELOG.md` updated - [ ] If the change is worth mentioning in the release notes, add `add-to-release-notes` label - [ ] Changes that require user attention or interaction to upgrade are documented in `docs/sources/setup/upgrade/_index.md` - [ ] For Helm chart changes bump the Helm chart version in `production/helm/loki/Chart.yaml` and update `production/helm/loki/CHANGELOG.md` and `production/helm/loki/README.md`. [Example PR](https://github.com/grafana/loki/commit/d10549e3ece02120974929894ee333d07755d213)
2 years ago
count++
return nil
})
require.NoError(t, err)
require.Equal(t, 3, count)
}
func TestMergedViewMaterialize(t *testing.T) {
responses := []*LokiSeriesResponse{
{
Data: []logproto.SeriesIdentifier{
{
Optimize series response format by using repeated fileds. (#11498) **What this PR does / why we need it**: The Protobuf map type is encodied as a repeated field of map entries. Decoding them to a slice is much faster than decoding them into a map. Since Loki is not using the fast key check for a map we can use the slice decoding. This change also allows us to decode the JSON directly into the right protobuf struct. This doulbes the JSON decoding speed and reduces the memory pressure by ~40%. ``` › go test -bench=. -run=^$ -count=10 ./pkg/querier/queryrange > before.log › go test -bench=. -run=^$ -count=10 ./pkg/querier/queryrange > after.log › benchstat before.log after.log goos: darwin goarch: arm64 pkg: github.com/grafana/loki/pkg/querier/queryrange │ before.log │ after.log │ │ sec/op │ sec/op vs base │ ResponseMerge/mergeStreams_unlimited-10 32.36m ± 0% 32.63m ± 2% ~ (p=0.393 n=10) ResponseMerge/mergeOrderedNonOverlappingStreams_unlimited-10 1.050m ± 1% 1.080m ± 3% +2.84% (p=0.005 n=10) ResponseMerge/mergeStreams_limited-10 33.02m ± 0% 32.60m ± 1% -1.29% (p=0.004 n=10) ResponseMerge/mergeOrderedNonOverlappingStreams_limited-10 15.11m ± 0% 15.07m ± 0% ~ (p=0.075 n=10) _CodecDecodeLogs-10 4.395m ± 1% 4.364m ± 0% -0.72% (p=0.005 n=10) _CodecDecodeSamples-10 16.97m ± 0% 16.84m ± 2% -0.77% (p=0.023 n=10) _CodecDecodeSeries/application/vnd.google.protobuf-10 745.8µ ± 8% 736.8µ ± 12% ~ (p=0.739 n=10) _CodecDecodeSeries/application/json;_charset=utf-8-10 15.37m ± 1% 10.60m ± 0% -31.03% (p=0.000 n=10) _MergeResponses-10 1186.9m ± 2% 149.8m ± 1% -87.38% (p=0.000 n=10) _UnwrapSeries-10 9.399m ± 1% 4.049m ± 0% -56.92% (p=0.000 n=10) _DecodeMergeEncodeCycle-10 666.0m ± 3% 194.6m ± 0% -70.79% (p=0.000 n=10) geomean 18.87m 12.51m -33.70% │ before.log │ after.log │ │ B/op │ B/op vs base │ _CodecDecodeLogs-10 3.649Mi ± 0% 3.649Mi ± 0% ~ (p=0.364 n=10) _CodecDecodeSamples-10 18.12Mi ± 0% 18.12Mi ± 0% ~ (p=0.926 n=10) _CodecDecodeSeries/application/vnd.google.protobuf-10 7.647Mi ± 0% 7.647Mi ± 0% ~ (p=0.587 n=10) _CodecDecodeSeries/application/json;_charset=utf-8-10 27.94Mi ± 0% 16.99Mi ± 0% -39.18% (p=0.000 n=10) _MergeResponses-10 2.362Mi ± 0% 2.408Mi ± 0% +1.98% (p=0.000 n=10) _UnwrapSeries-10 19.495Mi ± 0% 8.595Mi ± 0% -55.91% (p=0.000 n=10) _DecodeMergeEncodeCycle-10 772.3Mi ± 0% 772.3Mi ± 0% ~ (p=0.912 n=10) geomean 17.50Mi 14.54Mi -16.91% │ before.log │ after.log │ │ allocs/op │ allocs/op vs base │ _CodecDecodeLogs-10 41.10k ± 0% 41.10k ± 0% ~ (p=1.000 n=10) ¹ _CodecDecodeSamples-10 411.9k ± 0% 411.9k ± 0% ~ (p=1.000 n=10) _CodecDecodeSeries/application/vnd.google.protobuf-10 32.00 ± 0% 32.00 ± 0% ~ (p=1.000 n=10) ¹ _CodecDecodeSeries/application/json;_charset=utf-8-10 304.2k ± 0% 298.1k ± 0% -2.01% (p=0.000 n=10) _MergeResponses-10 100.1k ± 0% 100.1k ± 0% -0.00% (p=0.002 n=10) _UnwrapSeries-10 201.1k ± 0% 198.0k ± 0% -1.54% (p=0.000 n=10) _DecodeMergeEncodeCycle-10 203.1k ± 0% 203.1k ± 0% ~ (p=0.621 n=10) geomean 48.95k 48.70k -0.51% ¹ all samples are equal ``` **Checklist** - [x] Reviewed the [`CONTRIBUTING.md`](https://github.com/grafana/loki/blob/main/CONTRIBUTING.md) guide (**required**) - [ ] Documentation added - [x] Tests updated - [ ] `CHANGELOG.md` updated - [ ] If the change is worth mentioning in the release notes, add `add-to-release-notes` label - [ ] Changes that require user attention or interaction to upgrade are documented in `docs/sources/setup/upgrade/_index.md` - [ ] For Helm chart changes bump the Helm chart version in `production/helm/loki/Chart.yaml` and update `production/helm/loki/CHANGELOG.md` and `production/helm/loki/README.md`. [Example PR](https://github.com/grafana/loki/commit/d10549e3ece02120974929894ee333d07755d213) - [ ] If the change is deprecating or removing a configuration option, update the `deprecated-config.yaml` and `deleted-config.yaml` files respectively in the `tools/deprecated-config-checker` directory. [Example PR](https://github.com/grafana/loki/pull/10840/commits/0d4416a4b03739583349934b96f272fb4f685d15)
2 years ago
Labels: []logproto.SeriesIdentifier_LabelsEntry{
{Key: "i", Value: "1"},
{Key: "baz", Value: "woof"},
},
Lazily decode series protobuf. (#10071) **What this PR does / why we need it**: The protobuf decoding for series responses runs into a memory issue for many series. The response is only merged and passed through the front end. It is more efficient to decode the protobuf encoded series lazily. And `decode`, `merge`, `encode` benchmark shows the benefit of transcoding the protobuf series message into a JSON response directly. This change will not impact production code since it only applies to protobuf encoding messaging between the querier and query frontend that must be explicitly enabled. ``` › go test -v -run=^$ -bench "Benchmark_DecodeMergeEncodeCycle" -memprofile memory_base.prof -count=10 ./pkg/querier/queryrange > before.txt › go test -v -run=^$ -bench "Benchmark_DecodeMergeEncodeCycle" -memprofile memory_base.prof -count=10 ./pkg/querier/queryrange > before.txt › benchstat before.txt after.txt before.txt:5: missing iteration count after.txt:5: missing iteration count goos: linux goarch: amd64 pkg: github.com/grafana/loki/pkg/querier/queryrange cpu: AMD Ryzen 7 3700X 8-Core Processor │ before.txt │ after.txt │ │ sec/op │ sec/op vs base │ _DecodeMergeEncodeCycle-16 2537.7m ± 2% 934.2m ± 1% -63.19% (p=0.000 n=10) │ before.txt │ after.txt │ │ B/op │ B/op vs base │ _DecodeMergeEncodeCycle-16 1723.4Mi ± 0% 641.1Mi ± 0% -62.80% (p=0.000 n=10) │ before.txt │ after.txt │ │ allocs/op │ allocs/op vs base │ _DecodeMergeEncodeCycle-16 20240.6k ± 0% 203.0k ± 0% -99.00% (p=0.000 n=10) ``` **Checklist** - [ ] Reviewed the [`CONTRIBUTING.md`](https://github.com/grafana/loki/blob/main/CONTRIBUTING.md) guide (**required**) - [ ] Documentation added - [x] Tests updated - [ ] `CHANGELOG.md` updated - [ ] If the change is worth mentioning in the release notes, add `add-to-release-notes` label - [ ] Changes that require user attention or interaction to upgrade are documented in `docs/sources/setup/upgrade/_index.md` - [ ] For Helm chart changes bump the Helm chart version in `production/helm/loki/Chart.yaml` and update `production/helm/loki/CHANGELOG.md` and `production/helm/loki/README.md`. [Example PR](https://github.com/grafana/loki/commit/d10549e3ece02120974929894ee333d07755d213)
2 years ago
},
{
Optimize series response format by using repeated fileds. (#11498) **What this PR does / why we need it**: The Protobuf map type is encodied as a repeated field of map entries. Decoding them to a slice is much faster than decoding them into a map. Since Loki is not using the fast key check for a map we can use the slice decoding. This change also allows us to decode the JSON directly into the right protobuf struct. This doulbes the JSON decoding speed and reduces the memory pressure by ~40%. ``` › go test -bench=. -run=^$ -count=10 ./pkg/querier/queryrange > before.log › go test -bench=. -run=^$ -count=10 ./pkg/querier/queryrange > after.log › benchstat before.log after.log goos: darwin goarch: arm64 pkg: github.com/grafana/loki/pkg/querier/queryrange │ before.log │ after.log │ │ sec/op │ sec/op vs base │ ResponseMerge/mergeStreams_unlimited-10 32.36m ± 0% 32.63m ± 2% ~ (p=0.393 n=10) ResponseMerge/mergeOrderedNonOverlappingStreams_unlimited-10 1.050m ± 1% 1.080m ± 3% +2.84% (p=0.005 n=10) ResponseMerge/mergeStreams_limited-10 33.02m ± 0% 32.60m ± 1% -1.29% (p=0.004 n=10) ResponseMerge/mergeOrderedNonOverlappingStreams_limited-10 15.11m ± 0% 15.07m ± 0% ~ (p=0.075 n=10) _CodecDecodeLogs-10 4.395m ± 1% 4.364m ± 0% -0.72% (p=0.005 n=10) _CodecDecodeSamples-10 16.97m ± 0% 16.84m ± 2% -0.77% (p=0.023 n=10) _CodecDecodeSeries/application/vnd.google.protobuf-10 745.8µ ± 8% 736.8µ ± 12% ~ (p=0.739 n=10) _CodecDecodeSeries/application/json;_charset=utf-8-10 15.37m ± 1% 10.60m ± 0% -31.03% (p=0.000 n=10) _MergeResponses-10 1186.9m ± 2% 149.8m ± 1% -87.38% (p=0.000 n=10) _UnwrapSeries-10 9.399m ± 1% 4.049m ± 0% -56.92% (p=0.000 n=10) _DecodeMergeEncodeCycle-10 666.0m ± 3% 194.6m ± 0% -70.79% (p=0.000 n=10) geomean 18.87m 12.51m -33.70% │ before.log │ after.log │ │ B/op │ B/op vs base │ _CodecDecodeLogs-10 3.649Mi ± 0% 3.649Mi ± 0% ~ (p=0.364 n=10) _CodecDecodeSamples-10 18.12Mi ± 0% 18.12Mi ± 0% ~ (p=0.926 n=10) _CodecDecodeSeries/application/vnd.google.protobuf-10 7.647Mi ± 0% 7.647Mi ± 0% ~ (p=0.587 n=10) _CodecDecodeSeries/application/json;_charset=utf-8-10 27.94Mi ± 0% 16.99Mi ± 0% -39.18% (p=0.000 n=10) _MergeResponses-10 2.362Mi ± 0% 2.408Mi ± 0% +1.98% (p=0.000 n=10) _UnwrapSeries-10 19.495Mi ± 0% 8.595Mi ± 0% -55.91% (p=0.000 n=10) _DecodeMergeEncodeCycle-10 772.3Mi ± 0% 772.3Mi ± 0% ~ (p=0.912 n=10) geomean 17.50Mi 14.54Mi -16.91% │ before.log │ after.log │ │ allocs/op │ allocs/op vs base │ _CodecDecodeLogs-10 41.10k ± 0% 41.10k ± 0% ~ (p=1.000 n=10) ¹ _CodecDecodeSamples-10 411.9k ± 0% 411.9k ± 0% ~ (p=1.000 n=10) _CodecDecodeSeries/application/vnd.google.protobuf-10 32.00 ± 0% 32.00 ± 0% ~ (p=1.000 n=10) ¹ _CodecDecodeSeries/application/json;_charset=utf-8-10 304.2k ± 0% 298.1k ± 0% -2.01% (p=0.000 n=10) _MergeResponses-10 100.1k ± 0% 100.1k ± 0% -0.00% (p=0.002 n=10) _UnwrapSeries-10 201.1k ± 0% 198.0k ± 0% -1.54% (p=0.000 n=10) _DecodeMergeEncodeCycle-10 203.1k ± 0% 203.1k ± 0% ~ (p=0.621 n=10) geomean 48.95k 48.70k -0.51% ¹ all samples are equal ``` **Checklist** - [x] Reviewed the [`CONTRIBUTING.md`](https://github.com/grafana/loki/blob/main/CONTRIBUTING.md) guide (**required**) - [ ] Documentation added - [x] Tests updated - [ ] `CHANGELOG.md` updated - [ ] If the change is worth mentioning in the release notes, add `add-to-release-notes` label - [ ] Changes that require user attention or interaction to upgrade are documented in `docs/sources/setup/upgrade/_index.md` - [ ] For Helm chart changes bump the Helm chart version in `production/helm/loki/Chart.yaml` and update `production/helm/loki/CHANGELOG.md` and `production/helm/loki/README.md`. [Example PR](https://github.com/grafana/loki/commit/d10549e3ece02120974929894ee333d07755d213) - [ ] If the change is deprecating or removing a configuration option, update the `deprecated-config.yaml` and `deleted-config.yaml` files respectively in the `tools/deprecated-config-checker` directory. [Example PR](https://github.com/grafana/loki/pull/10840/commits/0d4416a4b03739583349934b96f272fb4f685d15)
2 years ago
Labels: []logproto.SeriesIdentifier_LabelsEntry{
{Key: "i", Value: "2"},
{Key: "foo", Value: "bar"},
},
Lazily decode series protobuf. (#10071) **What this PR does / why we need it**: The protobuf decoding for series responses runs into a memory issue for many series. The response is only merged and passed through the front end. It is more efficient to decode the protobuf encoded series lazily. And `decode`, `merge`, `encode` benchmark shows the benefit of transcoding the protobuf series message into a JSON response directly. This change will not impact production code since it only applies to protobuf encoding messaging between the querier and query frontend that must be explicitly enabled. ``` › go test -v -run=^$ -bench "Benchmark_DecodeMergeEncodeCycle" -memprofile memory_base.prof -count=10 ./pkg/querier/queryrange > before.txt › go test -v -run=^$ -bench "Benchmark_DecodeMergeEncodeCycle" -memprofile memory_base.prof -count=10 ./pkg/querier/queryrange > before.txt › benchstat before.txt after.txt before.txt:5: missing iteration count after.txt:5: missing iteration count goos: linux goarch: amd64 pkg: github.com/grafana/loki/pkg/querier/queryrange cpu: AMD Ryzen 7 3700X 8-Core Processor │ before.txt │ after.txt │ │ sec/op │ sec/op vs base │ _DecodeMergeEncodeCycle-16 2537.7m ± 2% 934.2m ± 1% -63.19% (p=0.000 n=10) │ before.txt │ after.txt │ │ B/op │ B/op vs base │ _DecodeMergeEncodeCycle-16 1723.4Mi ± 0% 641.1Mi ± 0% -62.80% (p=0.000 n=10) │ before.txt │ after.txt │ │ allocs/op │ allocs/op vs base │ _DecodeMergeEncodeCycle-16 20240.6k ± 0% 203.0k ± 0% -99.00% (p=0.000 n=10) ``` **Checklist** - [ ] Reviewed the [`CONTRIBUTING.md`](https://github.com/grafana/loki/blob/main/CONTRIBUTING.md) guide (**required**) - [ ] Documentation added - [x] Tests updated - [ ] `CHANGELOG.md` updated - [ ] If the change is worth mentioning in the release notes, add `add-to-release-notes` label - [ ] Changes that require user attention or interaction to upgrade are documented in `docs/sources/setup/upgrade/_index.md` - [ ] For Helm chart changes bump the Helm chart version in `production/helm/loki/Chart.yaml` and update `production/helm/loki/CHANGELOG.md` and `production/helm/loki/README.md`. [Example PR](https://github.com/grafana/loki/commit/d10549e3ece02120974929894ee333d07755d213)
2 years ago
},
},
},
{
Data: []logproto.SeriesIdentifier{
{
Optimize series response format by using repeated fileds. (#11498) **What this PR does / why we need it**: The Protobuf map type is encodied as a repeated field of map entries. Decoding them to a slice is much faster than decoding them into a map. Since Loki is not using the fast key check for a map we can use the slice decoding. This change also allows us to decode the JSON directly into the right protobuf struct. This doulbes the JSON decoding speed and reduces the memory pressure by ~40%. ``` › go test -bench=. -run=^$ -count=10 ./pkg/querier/queryrange > before.log › go test -bench=. -run=^$ -count=10 ./pkg/querier/queryrange > after.log › benchstat before.log after.log goos: darwin goarch: arm64 pkg: github.com/grafana/loki/pkg/querier/queryrange │ before.log │ after.log │ │ sec/op │ sec/op vs base │ ResponseMerge/mergeStreams_unlimited-10 32.36m ± 0% 32.63m ± 2% ~ (p=0.393 n=10) ResponseMerge/mergeOrderedNonOverlappingStreams_unlimited-10 1.050m ± 1% 1.080m ± 3% +2.84% (p=0.005 n=10) ResponseMerge/mergeStreams_limited-10 33.02m ± 0% 32.60m ± 1% -1.29% (p=0.004 n=10) ResponseMerge/mergeOrderedNonOverlappingStreams_limited-10 15.11m ± 0% 15.07m ± 0% ~ (p=0.075 n=10) _CodecDecodeLogs-10 4.395m ± 1% 4.364m ± 0% -0.72% (p=0.005 n=10) _CodecDecodeSamples-10 16.97m ± 0% 16.84m ± 2% -0.77% (p=0.023 n=10) _CodecDecodeSeries/application/vnd.google.protobuf-10 745.8µ ± 8% 736.8µ ± 12% ~ (p=0.739 n=10) _CodecDecodeSeries/application/json;_charset=utf-8-10 15.37m ± 1% 10.60m ± 0% -31.03% (p=0.000 n=10) _MergeResponses-10 1186.9m ± 2% 149.8m ± 1% -87.38% (p=0.000 n=10) _UnwrapSeries-10 9.399m ± 1% 4.049m ± 0% -56.92% (p=0.000 n=10) _DecodeMergeEncodeCycle-10 666.0m ± 3% 194.6m ± 0% -70.79% (p=0.000 n=10) geomean 18.87m 12.51m -33.70% │ before.log │ after.log │ │ B/op │ B/op vs base │ _CodecDecodeLogs-10 3.649Mi ± 0% 3.649Mi ± 0% ~ (p=0.364 n=10) _CodecDecodeSamples-10 18.12Mi ± 0% 18.12Mi ± 0% ~ (p=0.926 n=10) _CodecDecodeSeries/application/vnd.google.protobuf-10 7.647Mi ± 0% 7.647Mi ± 0% ~ (p=0.587 n=10) _CodecDecodeSeries/application/json;_charset=utf-8-10 27.94Mi ± 0% 16.99Mi ± 0% -39.18% (p=0.000 n=10) _MergeResponses-10 2.362Mi ± 0% 2.408Mi ± 0% +1.98% (p=0.000 n=10) _UnwrapSeries-10 19.495Mi ± 0% 8.595Mi ± 0% -55.91% (p=0.000 n=10) _DecodeMergeEncodeCycle-10 772.3Mi ± 0% 772.3Mi ± 0% ~ (p=0.912 n=10) geomean 17.50Mi 14.54Mi -16.91% │ before.log │ after.log │ │ allocs/op │ allocs/op vs base │ _CodecDecodeLogs-10 41.10k ± 0% 41.10k ± 0% ~ (p=1.000 n=10) ¹ _CodecDecodeSamples-10 411.9k ± 0% 411.9k ± 0% ~ (p=1.000 n=10) _CodecDecodeSeries/application/vnd.google.protobuf-10 32.00 ± 0% 32.00 ± 0% ~ (p=1.000 n=10) ¹ _CodecDecodeSeries/application/json;_charset=utf-8-10 304.2k ± 0% 298.1k ± 0% -2.01% (p=0.000 n=10) _MergeResponses-10 100.1k ± 0% 100.1k ± 0% -0.00% (p=0.002 n=10) _UnwrapSeries-10 201.1k ± 0% 198.0k ± 0% -1.54% (p=0.000 n=10) _DecodeMergeEncodeCycle-10 203.1k ± 0% 203.1k ± 0% ~ (p=0.621 n=10) geomean 48.95k 48.70k -0.51% ¹ all samples are equal ``` **Checklist** - [x] Reviewed the [`CONTRIBUTING.md`](https://github.com/grafana/loki/blob/main/CONTRIBUTING.md) guide (**required**) - [ ] Documentation added - [x] Tests updated - [ ] `CHANGELOG.md` updated - [ ] If the change is worth mentioning in the release notes, add `add-to-release-notes` label - [ ] Changes that require user attention or interaction to upgrade are documented in `docs/sources/setup/upgrade/_index.md` - [ ] For Helm chart changes bump the Helm chart version in `production/helm/loki/Chart.yaml` and update `production/helm/loki/CHANGELOG.md` and `production/helm/loki/README.md`. [Example PR](https://github.com/grafana/loki/commit/d10549e3ece02120974929894ee333d07755d213) - [ ] If the change is deprecating or removing a configuration option, update the `deprecated-config.yaml` and `deleted-config.yaml` files respectively in the `tools/deprecated-config-checker` directory. [Example PR](https://github.com/grafana/loki/pull/10840/commits/0d4416a4b03739583349934b96f272fb4f685d15)
2 years ago
Labels: []logproto.SeriesIdentifier_LabelsEntry{
{Key: "i", Value: "3"},
{Key: "baz", Value: "woof"},
},
Lazily decode series protobuf. (#10071) **What this PR does / why we need it**: The protobuf decoding for series responses runs into a memory issue for many series. The response is only merged and passed through the front end. It is more efficient to decode the protobuf encoded series lazily. And `decode`, `merge`, `encode` benchmark shows the benefit of transcoding the protobuf series message into a JSON response directly. This change will not impact production code since it only applies to protobuf encoding messaging between the querier and query frontend that must be explicitly enabled. ``` › go test -v -run=^$ -bench "Benchmark_DecodeMergeEncodeCycle" -memprofile memory_base.prof -count=10 ./pkg/querier/queryrange > before.txt › go test -v -run=^$ -bench "Benchmark_DecodeMergeEncodeCycle" -memprofile memory_base.prof -count=10 ./pkg/querier/queryrange > before.txt › benchstat before.txt after.txt before.txt:5: missing iteration count after.txt:5: missing iteration count goos: linux goarch: amd64 pkg: github.com/grafana/loki/pkg/querier/queryrange cpu: AMD Ryzen 7 3700X 8-Core Processor │ before.txt │ after.txt │ │ sec/op │ sec/op vs base │ _DecodeMergeEncodeCycle-16 2537.7m ± 2% 934.2m ± 1% -63.19% (p=0.000 n=10) │ before.txt │ after.txt │ │ B/op │ B/op vs base │ _DecodeMergeEncodeCycle-16 1723.4Mi ± 0% 641.1Mi ± 0% -62.80% (p=0.000 n=10) │ before.txt │ after.txt │ │ allocs/op │ allocs/op vs base │ _DecodeMergeEncodeCycle-16 20240.6k ± 0% 203.0k ± 0% -99.00% (p=0.000 n=10) ``` **Checklist** - [ ] Reviewed the [`CONTRIBUTING.md`](https://github.com/grafana/loki/blob/main/CONTRIBUTING.md) guide (**required**) - [ ] Documentation added - [x] Tests updated - [ ] `CHANGELOG.md` updated - [ ] If the change is worth mentioning in the release notes, add `add-to-release-notes` label - [ ] Changes that require user attention or interaction to upgrade are documented in `docs/sources/setup/upgrade/_index.md` - [ ] For Helm chart changes bump the Helm chart version in `production/helm/loki/Chart.yaml` and update `production/helm/loki/CHANGELOG.md` and `production/helm/loki/README.md`. [Example PR](https://github.com/grafana/loki/commit/d10549e3ece02120974929894ee333d07755d213)
2 years ago
},
{
Optimize series response format by using repeated fileds. (#11498) **What this PR does / why we need it**: The Protobuf map type is encodied as a repeated field of map entries. Decoding them to a slice is much faster than decoding them into a map. Since Loki is not using the fast key check for a map we can use the slice decoding. This change also allows us to decode the JSON directly into the right protobuf struct. This doulbes the JSON decoding speed and reduces the memory pressure by ~40%. ``` › go test -bench=. -run=^$ -count=10 ./pkg/querier/queryrange > before.log › go test -bench=. -run=^$ -count=10 ./pkg/querier/queryrange > after.log › benchstat before.log after.log goos: darwin goarch: arm64 pkg: github.com/grafana/loki/pkg/querier/queryrange │ before.log │ after.log │ │ sec/op │ sec/op vs base │ ResponseMerge/mergeStreams_unlimited-10 32.36m ± 0% 32.63m ± 2% ~ (p=0.393 n=10) ResponseMerge/mergeOrderedNonOverlappingStreams_unlimited-10 1.050m ± 1% 1.080m ± 3% +2.84% (p=0.005 n=10) ResponseMerge/mergeStreams_limited-10 33.02m ± 0% 32.60m ± 1% -1.29% (p=0.004 n=10) ResponseMerge/mergeOrderedNonOverlappingStreams_limited-10 15.11m ± 0% 15.07m ± 0% ~ (p=0.075 n=10) _CodecDecodeLogs-10 4.395m ± 1% 4.364m ± 0% -0.72% (p=0.005 n=10) _CodecDecodeSamples-10 16.97m ± 0% 16.84m ± 2% -0.77% (p=0.023 n=10) _CodecDecodeSeries/application/vnd.google.protobuf-10 745.8µ ± 8% 736.8µ ± 12% ~ (p=0.739 n=10) _CodecDecodeSeries/application/json;_charset=utf-8-10 15.37m ± 1% 10.60m ± 0% -31.03% (p=0.000 n=10) _MergeResponses-10 1186.9m ± 2% 149.8m ± 1% -87.38% (p=0.000 n=10) _UnwrapSeries-10 9.399m ± 1% 4.049m ± 0% -56.92% (p=0.000 n=10) _DecodeMergeEncodeCycle-10 666.0m ± 3% 194.6m ± 0% -70.79% (p=0.000 n=10) geomean 18.87m 12.51m -33.70% │ before.log │ after.log │ │ B/op │ B/op vs base │ _CodecDecodeLogs-10 3.649Mi ± 0% 3.649Mi ± 0% ~ (p=0.364 n=10) _CodecDecodeSamples-10 18.12Mi ± 0% 18.12Mi ± 0% ~ (p=0.926 n=10) _CodecDecodeSeries/application/vnd.google.protobuf-10 7.647Mi ± 0% 7.647Mi ± 0% ~ (p=0.587 n=10) _CodecDecodeSeries/application/json;_charset=utf-8-10 27.94Mi ± 0% 16.99Mi ± 0% -39.18% (p=0.000 n=10) _MergeResponses-10 2.362Mi ± 0% 2.408Mi ± 0% +1.98% (p=0.000 n=10) _UnwrapSeries-10 19.495Mi ± 0% 8.595Mi ± 0% -55.91% (p=0.000 n=10) _DecodeMergeEncodeCycle-10 772.3Mi ± 0% 772.3Mi ± 0% ~ (p=0.912 n=10) geomean 17.50Mi 14.54Mi -16.91% │ before.log │ after.log │ │ allocs/op │ allocs/op vs base │ _CodecDecodeLogs-10 41.10k ± 0% 41.10k ± 0% ~ (p=1.000 n=10) ¹ _CodecDecodeSamples-10 411.9k ± 0% 411.9k ± 0% ~ (p=1.000 n=10) _CodecDecodeSeries/application/vnd.google.protobuf-10 32.00 ± 0% 32.00 ± 0% ~ (p=1.000 n=10) ¹ _CodecDecodeSeries/application/json;_charset=utf-8-10 304.2k ± 0% 298.1k ± 0% -2.01% (p=0.000 n=10) _MergeResponses-10 100.1k ± 0% 100.1k ± 0% -0.00% (p=0.002 n=10) _UnwrapSeries-10 201.1k ± 0% 198.0k ± 0% -1.54% (p=0.000 n=10) _DecodeMergeEncodeCycle-10 203.1k ± 0% 203.1k ± 0% ~ (p=0.621 n=10) geomean 48.95k 48.70k -0.51% ¹ all samples are equal ``` **Checklist** - [x] Reviewed the [`CONTRIBUTING.md`](https://github.com/grafana/loki/blob/main/CONTRIBUTING.md) guide (**required**) - [ ] Documentation added - [x] Tests updated - [ ] `CHANGELOG.md` updated - [ ] If the change is worth mentioning in the release notes, add `add-to-release-notes` label - [ ] Changes that require user attention or interaction to upgrade are documented in `docs/sources/setup/upgrade/_index.md` - [ ] For Helm chart changes bump the Helm chart version in `production/helm/loki/Chart.yaml` and update `production/helm/loki/CHANGELOG.md` and `production/helm/loki/README.md`. [Example PR](https://github.com/grafana/loki/commit/d10549e3ece02120974929894ee333d07755d213) - [ ] If the change is deprecating or removing a configuration option, update the `deprecated-config.yaml` and `deleted-config.yaml` files respectively in the `tools/deprecated-config-checker` directory. [Example PR](https://github.com/grafana/loki/pull/10840/commits/0d4416a4b03739583349934b96f272fb4f685d15)
2 years ago
Labels: []logproto.SeriesIdentifier_LabelsEntry{
{Key: "i", Value: "2"},
{Key: "foo", Value: "bar"},
},
Lazily decode series protobuf. (#10071) **What this PR does / why we need it**: The protobuf decoding for series responses runs into a memory issue for many series. The response is only merged and passed through the front end. It is more efficient to decode the protobuf encoded series lazily. And `decode`, `merge`, `encode` benchmark shows the benefit of transcoding the protobuf series message into a JSON response directly. This change will not impact production code since it only applies to protobuf encoding messaging between the querier and query frontend that must be explicitly enabled. ``` › go test -v -run=^$ -bench "Benchmark_DecodeMergeEncodeCycle" -memprofile memory_base.prof -count=10 ./pkg/querier/queryrange > before.txt › go test -v -run=^$ -bench "Benchmark_DecodeMergeEncodeCycle" -memprofile memory_base.prof -count=10 ./pkg/querier/queryrange > before.txt › benchstat before.txt after.txt before.txt:5: missing iteration count after.txt:5: missing iteration count goos: linux goarch: amd64 pkg: github.com/grafana/loki/pkg/querier/queryrange cpu: AMD Ryzen 7 3700X 8-Core Processor │ before.txt │ after.txt │ │ sec/op │ sec/op vs base │ _DecodeMergeEncodeCycle-16 2537.7m ± 2% 934.2m ± 1% -63.19% (p=0.000 n=10) │ before.txt │ after.txt │ │ B/op │ B/op vs base │ _DecodeMergeEncodeCycle-16 1723.4Mi ± 0% 641.1Mi ± 0% -62.80% (p=0.000 n=10) │ before.txt │ after.txt │ │ allocs/op │ allocs/op vs base │ _DecodeMergeEncodeCycle-16 20240.6k ± 0% 203.0k ± 0% -99.00% (p=0.000 n=10) ``` **Checklist** - [ ] Reviewed the [`CONTRIBUTING.md`](https://github.com/grafana/loki/blob/main/CONTRIBUTING.md) guide (**required**) - [ ] Documentation added - [x] Tests updated - [ ] `CHANGELOG.md` updated - [ ] If the change is worth mentioning in the release notes, add `add-to-release-notes` label - [ ] Changes that require user attention or interaction to upgrade are documented in `docs/sources/setup/upgrade/_index.md` - [ ] For Helm chart changes bump the Helm chart version in `production/helm/loki/Chart.yaml` and update `production/helm/loki/CHANGELOG.md` and `production/helm/loki/README.md`. [Example PR](https://github.com/grafana/loki/commit/d10549e3ece02120974929894ee333d07755d213)
2 years ago
},
},
},
}
view := &MergedSeriesResponseView{}
for _, r := range responses {
data, err := r.Marshal()
require.NoError(t, err)
view.responses = append(view.responses, &LokiSeriesResponseView{buffer: data, headers: nil})
}
mat, err := view.Materialize()
require.NoError(t, err)
require.Len(t, mat.Data, 3)
series := make([]string, 0)
for _, d := range mat.Data {
Optimize series response format by using repeated fileds. (#11498) **What this PR does / why we need it**: The Protobuf map type is encodied as a repeated field of map entries. Decoding them to a slice is much faster than decoding them into a map. Since Loki is not using the fast key check for a map we can use the slice decoding. This change also allows us to decode the JSON directly into the right protobuf struct. This doulbes the JSON decoding speed and reduces the memory pressure by ~40%. ``` › go test -bench=. -run=^$ -count=10 ./pkg/querier/queryrange > before.log › go test -bench=. -run=^$ -count=10 ./pkg/querier/queryrange > after.log › benchstat before.log after.log goos: darwin goarch: arm64 pkg: github.com/grafana/loki/pkg/querier/queryrange │ before.log │ after.log │ │ sec/op │ sec/op vs base │ ResponseMerge/mergeStreams_unlimited-10 32.36m ± 0% 32.63m ± 2% ~ (p=0.393 n=10) ResponseMerge/mergeOrderedNonOverlappingStreams_unlimited-10 1.050m ± 1% 1.080m ± 3% +2.84% (p=0.005 n=10) ResponseMerge/mergeStreams_limited-10 33.02m ± 0% 32.60m ± 1% -1.29% (p=0.004 n=10) ResponseMerge/mergeOrderedNonOverlappingStreams_limited-10 15.11m ± 0% 15.07m ± 0% ~ (p=0.075 n=10) _CodecDecodeLogs-10 4.395m ± 1% 4.364m ± 0% -0.72% (p=0.005 n=10) _CodecDecodeSamples-10 16.97m ± 0% 16.84m ± 2% -0.77% (p=0.023 n=10) _CodecDecodeSeries/application/vnd.google.protobuf-10 745.8µ ± 8% 736.8µ ± 12% ~ (p=0.739 n=10) _CodecDecodeSeries/application/json;_charset=utf-8-10 15.37m ± 1% 10.60m ± 0% -31.03% (p=0.000 n=10) _MergeResponses-10 1186.9m ± 2% 149.8m ± 1% -87.38% (p=0.000 n=10) _UnwrapSeries-10 9.399m ± 1% 4.049m ± 0% -56.92% (p=0.000 n=10) _DecodeMergeEncodeCycle-10 666.0m ± 3% 194.6m ± 0% -70.79% (p=0.000 n=10) geomean 18.87m 12.51m -33.70% │ before.log │ after.log │ │ B/op │ B/op vs base │ _CodecDecodeLogs-10 3.649Mi ± 0% 3.649Mi ± 0% ~ (p=0.364 n=10) _CodecDecodeSamples-10 18.12Mi ± 0% 18.12Mi ± 0% ~ (p=0.926 n=10) _CodecDecodeSeries/application/vnd.google.protobuf-10 7.647Mi ± 0% 7.647Mi ± 0% ~ (p=0.587 n=10) _CodecDecodeSeries/application/json;_charset=utf-8-10 27.94Mi ± 0% 16.99Mi ± 0% -39.18% (p=0.000 n=10) _MergeResponses-10 2.362Mi ± 0% 2.408Mi ± 0% +1.98% (p=0.000 n=10) _UnwrapSeries-10 19.495Mi ± 0% 8.595Mi ± 0% -55.91% (p=0.000 n=10) _DecodeMergeEncodeCycle-10 772.3Mi ± 0% 772.3Mi ± 0% ~ (p=0.912 n=10) geomean 17.50Mi 14.54Mi -16.91% │ before.log │ after.log │ │ allocs/op │ allocs/op vs base │ _CodecDecodeLogs-10 41.10k ± 0% 41.10k ± 0% ~ (p=1.000 n=10) ¹ _CodecDecodeSamples-10 411.9k ± 0% 411.9k ± 0% ~ (p=1.000 n=10) _CodecDecodeSeries/application/vnd.google.protobuf-10 32.00 ± 0% 32.00 ± 0% ~ (p=1.000 n=10) ¹ _CodecDecodeSeries/application/json;_charset=utf-8-10 304.2k ± 0% 298.1k ± 0% -2.01% (p=0.000 n=10) _MergeResponses-10 100.1k ± 0% 100.1k ± 0% -0.00% (p=0.002 n=10) _UnwrapSeries-10 201.1k ± 0% 198.0k ± 0% -1.54% (p=0.000 n=10) _DecodeMergeEncodeCycle-10 203.1k ± 0% 203.1k ± 0% ~ (p=0.621 n=10) geomean 48.95k 48.70k -0.51% ¹ all samples are equal ``` **Checklist** - [x] Reviewed the [`CONTRIBUTING.md`](https://github.com/grafana/loki/blob/main/CONTRIBUTING.md) guide (**required**) - [ ] Documentation added - [x] Tests updated - [ ] `CHANGELOG.md` updated - [ ] If the change is worth mentioning in the release notes, add `add-to-release-notes` label - [ ] Changes that require user attention or interaction to upgrade are documented in `docs/sources/setup/upgrade/_index.md` - [ ] For Helm chart changes bump the Helm chart version in `production/helm/loki/Chart.yaml` and update `production/helm/loki/CHANGELOG.md` and `production/helm/loki/README.md`. [Example PR](https://github.com/grafana/loki/commit/d10549e3ece02120974929894ee333d07755d213) - [ ] If the change is deprecating or removing a configuration option, update the `deprecated-config.yaml` and `deleted-config.yaml` files respectively in the `tools/deprecated-config-checker` directory. [Example PR](https://github.com/grafana/loki/pull/10840/commits/0d4416a4b03739583349934b96f272fb4f685d15)
2 years ago
l := make([]labels.Label, 0, len(d.Labels))
for _, p := range d.Labels {
l = append(l, labels.Label{Name: p.Key, Value: p.Value})
}
series = append(series, labels.Labels(l).String())
Lazily decode series protobuf. (#10071) **What this PR does / why we need it**: The protobuf decoding for series responses runs into a memory issue for many series. The response is only merged and passed through the front end. It is more efficient to decode the protobuf encoded series lazily. And `decode`, `merge`, `encode` benchmark shows the benefit of transcoding the protobuf series message into a JSON response directly. This change will not impact production code since it only applies to protobuf encoding messaging between the querier and query frontend that must be explicitly enabled. ``` › go test -v -run=^$ -bench "Benchmark_DecodeMergeEncodeCycle" -memprofile memory_base.prof -count=10 ./pkg/querier/queryrange > before.txt › go test -v -run=^$ -bench "Benchmark_DecodeMergeEncodeCycle" -memprofile memory_base.prof -count=10 ./pkg/querier/queryrange > before.txt › benchstat before.txt after.txt before.txt:5: missing iteration count after.txt:5: missing iteration count goos: linux goarch: amd64 pkg: github.com/grafana/loki/pkg/querier/queryrange cpu: AMD Ryzen 7 3700X 8-Core Processor │ before.txt │ after.txt │ │ sec/op │ sec/op vs base │ _DecodeMergeEncodeCycle-16 2537.7m ± 2% 934.2m ± 1% -63.19% (p=0.000 n=10) │ before.txt │ after.txt │ │ B/op │ B/op vs base │ _DecodeMergeEncodeCycle-16 1723.4Mi ± 0% 641.1Mi ± 0% -62.80% (p=0.000 n=10) │ before.txt │ after.txt │ │ allocs/op │ allocs/op vs base │ _DecodeMergeEncodeCycle-16 20240.6k ± 0% 203.0k ± 0% -99.00% (p=0.000 n=10) ``` **Checklist** - [ ] Reviewed the [`CONTRIBUTING.md`](https://github.com/grafana/loki/blob/main/CONTRIBUTING.md) guide (**required**) - [ ] Documentation added - [x] Tests updated - [ ] `CHANGELOG.md` updated - [ ] If the change is worth mentioning in the release notes, add `add-to-release-notes` label - [ ] Changes that require user attention or interaction to upgrade are documented in `docs/sources/setup/upgrade/_index.md` - [ ] For Helm chart changes bump the Helm chart version in `production/helm/loki/Chart.yaml` and update `production/helm/loki/CHANGELOG.md` and `production/helm/loki/README.md`. [Example PR](https://github.com/grafana/loki/commit/d10549e3ece02120974929894ee333d07755d213)
2 years ago
}
Optimize series response format by using repeated fileds. (#11498) **What this PR does / why we need it**: The Protobuf map type is encodied as a repeated field of map entries. Decoding them to a slice is much faster than decoding them into a map. Since Loki is not using the fast key check for a map we can use the slice decoding. This change also allows us to decode the JSON directly into the right protobuf struct. This doulbes the JSON decoding speed and reduces the memory pressure by ~40%. ``` › go test -bench=. -run=^$ -count=10 ./pkg/querier/queryrange > before.log › go test -bench=. -run=^$ -count=10 ./pkg/querier/queryrange > after.log › benchstat before.log after.log goos: darwin goarch: arm64 pkg: github.com/grafana/loki/pkg/querier/queryrange │ before.log │ after.log │ │ sec/op │ sec/op vs base │ ResponseMerge/mergeStreams_unlimited-10 32.36m ± 0% 32.63m ± 2% ~ (p=0.393 n=10) ResponseMerge/mergeOrderedNonOverlappingStreams_unlimited-10 1.050m ± 1% 1.080m ± 3% +2.84% (p=0.005 n=10) ResponseMerge/mergeStreams_limited-10 33.02m ± 0% 32.60m ± 1% -1.29% (p=0.004 n=10) ResponseMerge/mergeOrderedNonOverlappingStreams_limited-10 15.11m ± 0% 15.07m ± 0% ~ (p=0.075 n=10) _CodecDecodeLogs-10 4.395m ± 1% 4.364m ± 0% -0.72% (p=0.005 n=10) _CodecDecodeSamples-10 16.97m ± 0% 16.84m ± 2% -0.77% (p=0.023 n=10) _CodecDecodeSeries/application/vnd.google.protobuf-10 745.8µ ± 8% 736.8µ ± 12% ~ (p=0.739 n=10) _CodecDecodeSeries/application/json;_charset=utf-8-10 15.37m ± 1% 10.60m ± 0% -31.03% (p=0.000 n=10) _MergeResponses-10 1186.9m ± 2% 149.8m ± 1% -87.38% (p=0.000 n=10) _UnwrapSeries-10 9.399m ± 1% 4.049m ± 0% -56.92% (p=0.000 n=10) _DecodeMergeEncodeCycle-10 666.0m ± 3% 194.6m ± 0% -70.79% (p=0.000 n=10) geomean 18.87m 12.51m -33.70% │ before.log │ after.log │ │ B/op │ B/op vs base │ _CodecDecodeLogs-10 3.649Mi ± 0% 3.649Mi ± 0% ~ (p=0.364 n=10) _CodecDecodeSamples-10 18.12Mi ± 0% 18.12Mi ± 0% ~ (p=0.926 n=10) _CodecDecodeSeries/application/vnd.google.protobuf-10 7.647Mi ± 0% 7.647Mi ± 0% ~ (p=0.587 n=10) _CodecDecodeSeries/application/json;_charset=utf-8-10 27.94Mi ± 0% 16.99Mi ± 0% -39.18% (p=0.000 n=10) _MergeResponses-10 2.362Mi ± 0% 2.408Mi ± 0% +1.98% (p=0.000 n=10) _UnwrapSeries-10 19.495Mi ± 0% 8.595Mi ± 0% -55.91% (p=0.000 n=10) _DecodeMergeEncodeCycle-10 772.3Mi ± 0% 772.3Mi ± 0% ~ (p=0.912 n=10) geomean 17.50Mi 14.54Mi -16.91% │ before.log │ after.log │ │ allocs/op │ allocs/op vs base │ _CodecDecodeLogs-10 41.10k ± 0% 41.10k ± 0% ~ (p=1.000 n=10) ¹ _CodecDecodeSamples-10 411.9k ± 0% 411.9k ± 0% ~ (p=1.000 n=10) _CodecDecodeSeries/application/vnd.google.protobuf-10 32.00 ± 0% 32.00 ± 0% ~ (p=1.000 n=10) ¹ _CodecDecodeSeries/application/json;_charset=utf-8-10 304.2k ± 0% 298.1k ± 0% -2.01% (p=0.000 n=10) _MergeResponses-10 100.1k ± 0% 100.1k ± 0% -0.00% (p=0.002 n=10) _UnwrapSeries-10 201.1k ± 0% 198.0k ± 0% -1.54% (p=0.000 n=10) _DecodeMergeEncodeCycle-10 203.1k ± 0% 203.1k ± 0% ~ (p=0.621 n=10) geomean 48.95k 48.70k -0.51% ¹ all samples are equal ``` **Checklist** - [x] Reviewed the [`CONTRIBUTING.md`](https://github.com/grafana/loki/blob/main/CONTRIBUTING.md) guide (**required**) - [ ] Documentation added - [x] Tests updated - [ ] `CHANGELOG.md` updated - [ ] If the change is worth mentioning in the release notes, add `add-to-release-notes` label - [ ] Changes that require user attention or interaction to upgrade are documented in `docs/sources/setup/upgrade/_index.md` - [ ] For Helm chart changes bump the Helm chart version in `production/helm/loki/Chart.yaml` and update `production/helm/loki/CHANGELOG.md` and `production/helm/loki/README.md`. [Example PR](https://github.com/grafana/loki/commit/d10549e3ece02120974929894ee333d07755d213) - [ ] If the change is deprecating or removing a configuration option, update the `deprecated-config.yaml` and `deleted-config.yaml` files respectively in the `tools/deprecated-config-checker` directory. [Example PR](https://github.com/grafana/loki/pull/10840/commits/0d4416a4b03739583349934b96f272fb4f685d15)
2 years ago
expected := []string{`{i="1", baz="woof"}`, `{i="3", baz="woof"}`, `{i="2", foo="bar"}`}
Lazily decode series protobuf. (#10071) **What this PR does / why we need it**: The protobuf decoding for series responses runs into a memory issue for many series. The response is only merged and passed through the front end. It is more efficient to decode the protobuf encoded series lazily. And `decode`, `merge`, `encode` benchmark shows the benefit of transcoding the protobuf series message into a JSON response directly. This change will not impact production code since it only applies to protobuf encoding messaging between the querier and query frontend that must be explicitly enabled. ``` › go test -v -run=^$ -bench "Benchmark_DecodeMergeEncodeCycle" -memprofile memory_base.prof -count=10 ./pkg/querier/queryrange > before.txt › go test -v -run=^$ -bench "Benchmark_DecodeMergeEncodeCycle" -memprofile memory_base.prof -count=10 ./pkg/querier/queryrange > before.txt › benchstat before.txt after.txt before.txt:5: missing iteration count after.txt:5: missing iteration count goos: linux goarch: amd64 pkg: github.com/grafana/loki/pkg/querier/queryrange cpu: AMD Ryzen 7 3700X 8-Core Processor │ before.txt │ after.txt │ │ sec/op │ sec/op vs base │ _DecodeMergeEncodeCycle-16 2537.7m ± 2% 934.2m ± 1% -63.19% (p=0.000 n=10) │ before.txt │ after.txt │ │ B/op │ B/op vs base │ _DecodeMergeEncodeCycle-16 1723.4Mi ± 0% 641.1Mi ± 0% -62.80% (p=0.000 n=10) │ before.txt │ after.txt │ │ allocs/op │ allocs/op vs base │ _DecodeMergeEncodeCycle-16 20240.6k ± 0% 203.0k ± 0% -99.00% (p=0.000 n=10) ``` **Checklist** - [ ] Reviewed the [`CONTRIBUTING.md`](https://github.com/grafana/loki/blob/main/CONTRIBUTING.md) guide (**required**) - [ ] Documentation added - [x] Tests updated - [ ] `CHANGELOG.md` updated - [ ] If the change is worth mentioning in the release notes, add `add-to-release-notes` label - [ ] Changes that require user attention or interaction to upgrade are documented in `docs/sources/setup/upgrade/_index.md` - [ ] For Helm chart changes bump the Helm chart version in `production/helm/loki/Chart.yaml` and update `production/helm/loki/CHANGELOG.md` and `production/helm/loki/README.md`. [Example PR](https://github.com/grafana/loki/commit/d10549e3ece02120974929894ee333d07755d213)
2 years ago
require.ElementsMatch(t, series, expected)
}
func TestMergedViewJSON(t *testing.T) {
var b strings.Builder
response := &LokiSeriesResponse{
Data: []logproto.SeriesIdentifier{
{
Optimize series response format by using repeated fileds. (#11498) **What this PR does / why we need it**: The Protobuf map type is encodied as a repeated field of map entries. Decoding them to a slice is much faster than decoding them into a map. Since Loki is not using the fast key check for a map we can use the slice decoding. This change also allows us to decode the JSON directly into the right protobuf struct. This doulbes the JSON decoding speed and reduces the memory pressure by ~40%. ``` › go test -bench=. -run=^$ -count=10 ./pkg/querier/queryrange > before.log › go test -bench=. -run=^$ -count=10 ./pkg/querier/queryrange > after.log › benchstat before.log after.log goos: darwin goarch: arm64 pkg: github.com/grafana/loki/pkg/querier/queryrange │ before.log │ after.log │ │ sec/op │ sec/op vs base │ ResponseMerge/mergeStreams_unlimited-10 32.36m ± 0% 32.63m ± 2% ~ (p=0.393 n=10) ResponseMerge/mergeOrderedNonOverlappingStreams_unlimited-10 1.050m ± 1% 1.080m ± 3% +2.84% (p=0.005 n=10) ResponseMerge/mergeStreams_limited-10 33.02m ± 0% 32.60m ± 1% -1.29% (p=0.004 n=10) ResponseMerge/mergeOrderedNonOverlappingStreams_limited-10 15.11m ± 0% 15.07m ± 0% ~ (p=0.075 n=10) _CodecDecodeLogs-10 4.395m ± 1% 4.364m ± 0% -0.72% (p=0.005 n=10) _CodecDecodeSamples-10 16.97m ± 0% 16.84m ± 2% -0.77% (p=0.023 n=10) _CodecDecodeSeries/application/vnd.google.protobuf-10 745.8µ ± 8% 736.8µ ± 12% ~ (p=0.739 n=10) _CodecDecodeSeries/application/json;_charset=utf-8-10 15.37m ± 1% 10.60m ± 0% -31.03% (p=0.000 n=10) _MergeResponses-10 1186.9m ± 2% 149.8m ± 1% -87.38% (p=0.000 n=10) _UnwrapSeries-10 9.399m ± 1% 4.049m ± 0% -56.92% (p=0.000 n=10) _DecodeMergeEncodeCycle-10 666.0m ± 3% 194.6m ± 0% -70.79% (p=0.000 n=10) geomean 18.87m 12.51m -33.70% │ before.log │ after.log │ │ B/op │ B/op vs base │ _CodecDecodeLogs-10 3.649Mi ± 0% 3.649Mi ± 0% ~ (p=0.364 n=10) _CodecDecodeSamples-10 18.12Mi ± 0% 18.12Mi ± 0% ~ (p=0.926 n=10) _CodecDecodeSeries/application/vnd.google.protobuf-10 7.647Mi ± 0% 7.647Mi ± 0% ~ (p=0.587 n=10) _CodecDecodeSeries/application/json;_charset=utf-8-10 27.94Mi ± 0% 16.99Mi ± 0% -39.18% (p=0.000 n=10) _MergeResponses-10 2.362Mi ± 0% 2.408Mi ± 0% +1.98% (p=0.000 n=10) _UnwrapSeries-10 19.495Mi ± 0% 8.595Mi ± 0% -55.91% (p=0.000 n=10) _DecodeMergeEncodeCycle-10 772.3Mi ± 0% 772.3Mi ± 0% ~ (p=0.912 n=10) geomean 17.50Mi 14.54Mi -16.91% │ before.log │ after.log │ │ allocs/op │ allocs/op vs base │ _CodecDecodeLogs-10 41.10k ± 0% 41.10k ± 0% ~ (p=1.000 n=10) ¹ _CodecDecodeSamples-10 411.9k ± 0% 411.9k ± 0% ~ (p=1.000 n=10) _CodecDecodeSeries/application/vnd.google.protobuf-10 32.00 ± 0% 32.00 ± 0% ~ (p=1.000 n=10) ¹ _CodecDecodeSeries/application/json;_charset=utf-8-10 304.2k ± 0% 298.1k ± 0% -2.01% (p=0.000 n=10) _MergeResponses-10 100.1k ± 0% 100.1k ± 0% -0.00% (p=0.002 n=10) _UnwrapSeries-10 201.1k ± 0% 198.0k ± 0% -1.54% (p=0.000 n=10) _DecodeMergeEncodeCycle-10 203.1k ± 0% 203.1k ± 0% ~ (p=0.621 n=10) geomean 48.95k 48.70k -0.51% ¹ all samples are equal ``` **Checklist** - [x] Reviewed the [`CONTRIBUTING.md`](https://github.com/grafana/loki/blob/main/CONTRIBUTING.md) guide (**required**) - [ ] Documentation added - [x] Tests updated - [ ] `CHANGELOG.md` updated - [ ] If the change is worth mentioning in the release notes, add `add-to-release-notes` label - [ ] Changes that require user attention or interaction to upgrade are documented in `docs/sources/setup/upgrade/_index.md` - [ ] For Helm chart changes bump the Helm chart version in `production/helm/loki/Chart.yaml` and update `production/helm/loki/CHANGELOG.md` and `production/helm/loki/README.md`. [Example PR](https://github.com/grafana/loki/commit/d10549e3ece02120974929894ee333d07755d213) - [ ] If the change is deprecating or removing a configuration option, update the `deprecated-config.yaml` and `deleted-config.yaml` files respectively in the `tools/deprecated-config-checker` directory. [Example PR](https://github.com/grafana/loki/pull/10840/commits/0d4416a4b03739583349934b96f272fb4f685d15)
2 years ago
Labels: []logproto.SeriesIdentifier_LabelsEntry{
{Key: "i", Value: "1"},
{Key: "baz", Value: "woof"},
},
Lazily decode series protobuf. (#10071) **What this PR does / why we need it**: The protobuf decoding for series responses runs into a memory issue for many series. The response is only merged and passed through the front end. It is more efficient to decode the protobuf encoded series lazily. And `decode`, `merge`, `encode` benchmark shows the benefit of transcoding the protobuf series message into a JSON response directly. This change will not impact production code since it only applies to protobuf encoding messaging between the querier and query frontend that must be explicitly enabled. ``` › go test -v -run=^$ -bench "Benchmark_DecodeMergeEncodeCycle" -memprofile memory_base.prof -count=10 ./pkg/querier/queryrange > before.txt › go test -v -run=^$ -bench "Benchmark_DecodeMergeEncodeCycle" -memprofile memory_base.prof -count=10 ./pkg/querier/queryrange > before.txt › benchstat before.txt after.txt before.txt:5: missing iteration count after.txt:5: missing iteration count goos: linux goarch: amd64 pkg: github.com/grafana/loki/pkg/querier/queryrange cpu: AMD Ryzen 7 3700X 8-Core Processor │ before.txt │ after.txt │ │ sec/op │ sec/op vs base │ _DecodeMergeEncodeCycle-16 2537.7m ± 2% 934.2m ± 1% -63.19% (p=0.000 n=10) │ before.txt │ after.txt │ │ B/op │ B/op vs base │ _DecodeMergeEncodeCycle-16 1723.4Mi ± 0% 641.1Mi ± 0% -62.80% (p=0.000 n=10) │ before.txt │ after.txt │ │ allocs/op │ allocs/op vs base │ _DecodeMergeEncodeCycle-16 20240.6k ± 0% 203.0k ± 0% -99.00% (p=0.000 n=10) ``` **Checklist** - [ ] Reviewed the [`CONTRIBUTING.md`](https://github.com/grafana/loki/blob/main/CONTRIBUTING.md) guide (**required**) - [ ] Documentation added - [x] Tests updated - [ ] `CHANGELOG.md` updated - [ ] If the change is worth mentioning in the release notes, add `add-to-release-notes` label - [ ] Changes that require user attention or interaction to upgrade are documented in `docs/sources/setup/upgrade/_index.md` - [ ] For Helm chart changes bump the Helm chart version in `production/helm/loki/Chart.yaml` and update `production/helm/loki/CHANGELOG.md` and `production/helm/loki/README.md`. [Example PR](https://github.com/grafana/loki/commit/d10549e3ece02120974929894ee333d07755d213)
2 years ago
},
{
Optimize series response format by using repeated fileds. (#11498) **What this PR does / why we need it**: The Protobuf map type is encodied as a repeated field of map entries. Decoding them to a slice is much faster than decoding them into a map. Since Loki is not using the fast key check for a map we can use the slice decoding. This change also allows us to decode the JSON directly into the right protobuf struct. This doulbes the JSON decoding speed and reduces the memory pressure by ~40%. ``` › go test -bench=. -run=^$ -count=10 ./pkg/querier/queryrange > before.log › go test -bench=. -run=^$ -count=10 ./pkg/querier/queryrange > after.log › benchstat before.log after.log goos: darwin goarch: arm64 pkg: github.com/grafana/loki/pkg/querier/queryrange │ before.log │ after.log │ │ sec/op │ sec/op vs base │ ResponseMerge/mergeStreams_unlimited-10 32.36m ± 0% 32.63m ± 2% ~ (p=0.393 n=10) ResponseMerge/mergeOrderedNonOverlappingStreams_unlimited-10 1.050m ± 1% 1.080m ± 3% +2.84% (p=0.005 n=10) ResponseMerge/mergeStreams_limited-10 33.02m ± 0% 32.60m ± 1% -1.29% (p=0.004 n=10) ResponseMerge/mergeOrderedNonOverlappingStreams_limited-10 15.11m ± 0% 15.07m ± 0% ~ (p=0.075 n=10) _CodecDecodeLogs-10 4.395m ± 1% 4.364m ± 0% -0.72% (p=0.005 n=10) _CodecDecodeSamples-10 16.97m ± 0% 16.84m ± 2% -0.77% (p=0.023 n=10) _CodecDecodeSeries/application/vnd.google.protobuf-10 745.8µ ± 8% 736.8µ ± 12% ~ (p=0.739 n=10) _CodecDecodeSeries/application/json;_charset=utf-8-10 15.37m ± 1% 10.60m ± 0% -31.03% (p=0.000 n=10) _MergeResponses-10 1186.9m ± 2% 149.8m ± 1% -87.38% (p=0.000 n=10) _UnwrapSeries-10 9.399m ± 1% 4.049m ± 0% -56.92% (p=0.000 n=10) _DecodeMergeEncodeCycle-10 666.0m ± 3% 194.6m ± 0% -70.79% (p=0.000 n=10) geomean 18.87m 12.51m -33.70% │ before.log │ after.log │ │ B/op │ B/op vs base │ _CodecDecodeLogs-10 3.649Mi ± 0% 3.649Mi ± 0% ~ (p=0.364 n=10) _CodecDecodeSamples-10 18.12Mi ± 0% 18.12Mi ± 0% ~ (p=0.926 n=10) _CodecDecodeSeries/application/vnd.google.protobuf-10 7.647Mi ± 0% 7.647Mi ± 0% ~ (p=0.587 n=10) _CodecDecodeSeries/application/json;_charset=utf-8-10 27.94Mi ± 0% 16.99Mi ± 0% -39.18% (p=0.000 n=10) _MergeResponses-10 2.362Mi ± 0% 2.408Mi ± 0% +1.98% (p=0.000 n=10) _UnwrapSeries-10 19.495Mi ± 0% 8.595Mi ± 0% -55.91% (p=0.000 n=10) _DecodeMergeEncodeCycle-10 772.3Mi ± 0% 772.3Mi ± 0% ~ (p=0.912 n=10) geomean 17.50Mi 14.54Mi -16.91% │ before.log │ after.log │ │ allocs/op │ allocs/op vs base │ _CodecDecodeLogs-10 41.10k ± 0% 41.10k ± 0% ~ (p=1.000 n=10) ¹ _CodecDecodeSamples-10 411.9k ± 0% 411.9k ± 0% ~ (p=1.000 n=10) _CodecDecodeSeries/application/vnd.google.protobuf-10 32.00 ± 0% 32.00 ± 0% ~ (p=1.000 n=10) ¹ _CodecDecodeSeries/application/json;_charset=utf-8-10 304.2k ± 0% 298.1k ± 0% -2.01% (p=0.000 n=10) _MergeResponses-10 100.1k ± 0% 100.1k ± 0% -0.00% (p=0.002 n=10) _UnwrapSeries-10 201.1k ± 0% 198.0k ± 0% -1.54% (p=0.000 n=10) _DecodeMergeEncodeCycle-10 203.1k ± 0% 203.1k ± 0% ~ (p=0.621 n=10) geomean 48.95k 48.70k -0.51% ¹ all samples are equal ``` **Checklist** - [x] Reviewed the [`CONTRIBUTING.md`](https://github.com/grafana/loki/blob/main/CONTRIBUTING.md) guide (**required**) - [ ] Documentation added - [x] Tests updated - [ ] `CHANGELOG.md` updated - [ ] If the change is worth mentioning in the release notes, add `add-to-release-notes` label - [ ] Changes that require user attention or interaction to upgrade are documented in `docs/sources/setup/upgrade/_index.md` - [ ] For Helm chart changes bump the Helm chart version in `production/helm/loki/Chart.yaml` and update `production/helm/loki/CHANGELOG.md` and `production/helm/loki/README.md`. [Example PR](https://github.com/grafana/loki/commit/d10549e3ece02120974929894ee333d07755d213) - [ ] If the change is deprecating or removing a configuration option, update the `deprecated-config.yaml` and `deleted-config.yaml` files respectively in the `tools/deprecated-config-checker` directory. [Example PR](https://github.com/grafana/loki/pull/10840/commits/0d4416a4b03739583349934b96f272fb4f685d15)
2 years ago
Labels: []logproto.SeriesIdentifier_LabelsEntry{
{Key: "i", Value: "2"},
{Key: "foo", Value: "bar"},
},
Lazily decode series protobuf. (#10071) **What this PR does / why we need it**: The protobuf decoding for series responses runs into a memory issue for many series. The response is only merged and passed through the front end. It is more efficient to decode the protobuf encoded series lazily. And `decode`, `merge`, `encode` benchmark shows the benefit of transcoding the protobuf series message into a JSON response directly. This change will not impact production code since it only applies to protobuf encoding messaging between the querier and query frontend that must be explicitly enabled. ``` › go test -v -run=^$ -bench "Benchmark_DecodeMergeEncodeCycle" -memprofile memory_base.prof -count=10 ./pkg/querier/queryrange > before.txt › go test -v -run=^$ -bench "Benchmark_DecodeMergeEncodeCycle" -memprofile memory_base.prof -count=10 ./pkg/querier/queryrange > before.txt › benchstat before.txt after.txt before.txt:5: missing iteration count after.txt:5: missing iteration count goos: linux goarch: amd64 pkg: github.com/grafana/loki/pkg/querier/queryrange cpu: AMD Ryzen 7 3700X 8-Core Processor │ before.txt │ after.txt │ │ sec/op │ sec/op vs base │ _DecodeMergeEncodeCycle-16 2537.7m ± 2% 934.2m ± 1% -63.19% (p=0.000 n=10) │ before.txt │ after.txt │ │ B/op │ B/op vs base │ _DecodeMergeEncodeCycle-16 1723.4Mi ± 0% 641.1Mi ± 0% -62.80% (p=0.000 n=10) │ before.txt │ after.txt │ │ allocs/op │ allocs/op vs base │ _DecodeMergeEncodeCycle-16 20240.6k ± 0% 203.0k ± 0% -99.00% (p=0.000 n=10) ``` **Checklist** - [ ] Reviewed the [`CONTRIBUTING.md`](https://github.com/grafana/loki/blob/main/CONTRIBUTING.md) guide (**required**) - [ ] Documentation added - [x] Tests updated - [ ] `CHANGELOG.md` updated - [ ] If the change is worth mentioning in the release notes, add `add-to-release-notes` label - [ ] Changes that require user attention or interaction to upgrade are documented in `docs/sources/setup/upgrade/_index.md` - [ ] For Helm chart changes bump the Helm chart version in `production/helm/loki/Chart.yaml` and update `production/helm/loki/CHANGELOG.md` and `production/helm/loki/README.md`. [Example PR](https://github.com/grafana/loki/commit/d10549e3ece02120974929894ee333d07755d213)
2 years ago
},
{
Optimize series response format by using repeated fileds. (#11498) **What this PR does / why we need it**: The Protobuf map type is encodied as a repeated field of map entries. Decoding them to a slice is much faster than decoding them into a map. Since Loki is not using the fast key check for a map we can use the slice decoding. This change also allows us to decode the JSON directly into the right protobuf struct. This doulbes the JSON decoding speed and reduces the memory pressure by ~40%. ``` › go test -bench=. -run=^$ -count=10 ./pkg/querier/queryrange > before.log › go test -bench=. -run=^$ -count=10 ./pkg/querier/queryrange > after.log › benchstat before.log after.log goos: darwin goarch: arm64 pkg: github.com/grafana/loki/pkg/querier/queryrange │ before.log │ after.log │ │ sec/op │ sec/op vs base │ ResponseMerge/mergeStreams_unlimited-10 32.36m ± 0% 32.63m ± 2% ~ (p=0.393 n=10) ResponseMerge/mergeOrderedNonOverlappingStreams_unlimited-10 1.050m ± 1% 1.080m ± 3% +2.84% (p=0.005 n=10) ResponseMerge/mergeStreams_limited-10 33.02m ± 0% 32.60m ± 1% -1.29% (p=0.004 n=10) ResponseMerge/mergeOrderedNonOverlappingStreams_limited-10 15.11m ± 0% 15.07m ± 0% ~ (p=0.075 n=10) _CodecDecodeLogs-10 4.395m ± 1% 4.364m ± 0% -0.72% (p=0.005 n=10) _CodecDecodeSamples-10 16.97m ± 0% 16.84m ± 2% -0.77% (p=0.023 n=10) _CodecDecodeSeries/application/vnd.google.protobuf-10 745.8µ ± 8% 736.8µ ± 12% ~ (p=0.739 n=10) _CodecDecodeSeries/application/json;_charset=utf-8-10 15.37m ± 1% 10.60m ± 0% -31.03% (p=0.000 n=10) _MergeResponses-10 1186.9m ± 2% 149.8m ± 1% -87.38% (p=0.000 n=10) _UnwrapSeries-10 9.399m ± 1% 4.049m ± 0% -56.92% (p=0.000 n=10) _DecodeMergeEncodeCycle-10 666.0m ± 3% 194.6m ± 0% -70.79% (p=0.000 n=10) geomean 18.87m 12.51m -33.70% │ before.log │ after.log │ │ B/op │ B/op vs base │ _CodecDecodeLogs-10 3.649Mi ± 0% 3.649Mi ± 0% ~ (p=0.364 n=10) _CodecDecodeSamples-10 18.12Mi ± 0% 18.12Mi ± 0% ~ (p=0.926 n=10) _CodecDecodeSeries/application/vnd.google.protobuf-10 7.647Mi ± 0% 7.647Mi ± 0% ~ (p=0.587 n=10) _CodecDecodeSeries/application/json;_charset=utf-8-10 27.94Mi ± 0% 16.99Mi ± 0% -39.18% (p=0.000 n=10) _MergeResponses-10 2.362Mi ± 0% 2.408Mi ± 0% +1.98% (p=0.000 n=10) _UnwrapSeries-10 19.495Mi ± 0% 8.595Mi ± 0% -55.91% (p=0.000 n=10) _DecodeMergeEncodeCycle-10 772.3Mi ± 0% 772.3Mi ± 0% ~ (p=0.912 n=10) geomean 17.50Mi 14.54Mi -16.91% │ before.log │ after.log │ │ allocs/op │ allocs/op vs base │ _CodecDecodeLogs-10 41.10k ± 0% 41.10k ± 0% ~ (p=1.000 n=10) ¹ _CodecDecodeSamples-10 411.9k ± 0% 411.9k ± 0% ~ (p=1.000 n=10) _CodecDecodeSeries/application/vnd.google.protobuf-10 32.00 ± 0% 32.00 ± 0% ~ (p=1.000 n=10) ¹ _CodecDecodeSeries/application/json;_charset=utf-8-10 304.2k ± 0% 298.1k ± 0% -2.01% (p=0.000 n=10) _MergeResponses-10 100.1k ± 0% 100.1k ± 0% -0.00% (p=0.002 n=10) _UnwrapSeries-10 201.1k ± 0% 198.0k ± 0% -1.54% (p=0.000 n=10) _DecodeMergeEncodeCycle-10 203.1k ± 0% 203.1k ± 0% ~ (p=0.621 n=10) geomean 48.95k 48.70k -0.51% ¹ all samples are equal ``` **Checklist** - [x] Reviewed the [`CONTRIBUTING.md`](https://github.com/grafana/loki/blob/main/CONTRIBUTING.md) guide (**required**) - [ ] Documentation added - [x] Tests updated - [ ] `CHANGELOG.md` updated - [ ] If the change is worth mentioning in the release notes, add `add-to-release-notes` label - [ ] Changes that require user attention or interaction to upgrade are documented in `docs/sources/setup/upgrade/_index.md` - [ ] For Helm chart changes bump the Helm chart version in `production/helm/loki/Chart.yaml` and update `production/helm/loki/CHANGELOG.md` and `production/helm/loki/README.md`. [Example PR](https://github.com/grafana/loki/commit/d10549e3ece02120974929894ee333d07755d213) - [ ] If the change is deprecating or removing a configuration option, update the `deprecated-config.yaml` and `deleted-config.yaml` files respectively in the `tools/deprecated-config-checker` directory. [Example PR](https://github.com/grafana/loki/pull/10840/commits/0d4416a4b03739583349934b96f272fb4f685d15)
2 years ago
Labels: []logproto.SeriesIdentifier_LabelsEntry{
{Key: "i", Value: "3"},
{Key: "baz", Value: "woof"},
},
Lazily decode series protobuf. (#10071) **What this PR does / why we need it**: The protobuf decoding for series responses runs into a memory issue for many series. The response is only merged and passed through the front end. It is more efficient to decode the protobuf encoded series lazily. And `decode`, `merge`, `encode` benchmark shows the benefit of transcoding the protobuf series message into a JSON response directly. This change will not impact production code since it only applies to protobuf encoding messaging between the querier and query frontend that must be explicitly enabled. ``` › go test -v -run=^$ -bench "Benchmark_DecodeMergeEncodeCycle" -memprofile memory_base.prof -count=10 ./pkg/querier/queryrange > before.txt › go test -v -run=^$ -bench "Benchmark_DecodeMergeEncodeCycle" -memprofile memory_base.prof -count=10 ./pkg/querier/queryrange > before.txt › benchstat before.txt after.txt before.txt:5: missing iteration count after.txt:5: missing iteration count goos: linux goarch: amd64 pkg: github.com/grafana/loki/pkg/querier/queryrange cpu: AMD Ryzen 7 3700X 8-Core Processor │ before.txt │ after.txt │ │ sec/op │ sec/op vs base │ _DecodeMergeEncodeCycle-16 2537.7m ± 2% 934.2m ± 1% -63.19% (p=0.000 n=10) │ before.txt │ after.txt │ │ B/op │ B/op vs base │ _DecodeMergeEncodeCycle-16 1723.4Mi ± 0% 641.1Mi ± 0% -62.80% (p=0.000 n=10) │ before.txt │ after.txt │ │ allocs/op │ allocs/op vs base │ _DecodeMergeEncodeCycle-16 20240.6k ± 0% 203.0k ± 0% -99.00% (p=0.000 n=10) ``` **Checklist** - [ ] Reviewed the [`CONTRIBUTING.md`](https://github.com/grafana/loki/blob/main/CONTRIBUTING.md) guide (**required**) - [ ] Documentation added - [x] Tests updated - [ ] `CHANGELOG.md` updated - [ ] If the change is worth mentioning in the release notes, add `add-to-release-notes` label - [ ] Changes that require user attention or interaction to upgrade are documented in `docs/sources/setup/upgrade/_index.md` - [ ] For Helm chart changes bump the Helm chart version in `production/helm/loki/Chart.yaml` and update `production/helm/loki/CHANGELOG.md` and `production/helm/loki/README.md`. [Example PR](https://github.com/grafana/loki/commit/d10549e3ece02120974929894ee333d07755d213)
2 years ago
},
},
}
view := &MergedSeriesResponseView{}
data, err := response.Marshal()
require.NoError(t, err)
view.responses = append(view.responses, &LokiSeriesResponseView{buffer: data, headers: nil})
err = WriteSeriesResponseViewJSON(view, &b)
require.NoError(t, err)
actual := b.String()
b.Reset()
err = marshal.WriteSeriesResponseJSON(response.Data, &b)
Lazily decode series protobuf. (#10071) **What this PR does / why we need it**: The protobuf decoding for series responses runs into a memory issue for many series. The response is only merged and passed through the front end. It is more efficient to decode the protobuf encoded series lazily. And `decode`, `merge`, `encode` benchmark shows the benefit of transcoding the protobuf series message into a JSON response directly. This change will not impact production code since it only applies to protobuf encoding messaging between the querier and query frontend that must be explicitly enabled. ``` › go test -v -run=^$ -bench "Benchmark_DecodeMergeEncodeCycle" -memprofile memory_base.prof -count=10 ./pkg/querier/queryrange > before.txt › go test -v -run=^$ -bench "Benchmark_DecodeMergeEncodeCycle" -memprofile memory_base.prof -count=10 ./pkg/querier/queryrange > before.txt › benchstat before.txt after.txt before.txt:5: missing iteration count after.txt:5: missing iteration count goos: linux goarch: amd64 pkg: github.com/grafana/loki/pkg/querier/queryrange cpu: AMD Ryzen 7 3700X 8-Core Processor │ before.txt │ after.txt │ │ sec/op │ sec/op vs base │ _DecodeMergeEncodeCycle-16 2537.7m ± 2% 934.2m ± 1% -63.19% (p=0.000 n=10) │ before.txt │ after.txt │ │ B/op │ B/op vs base │ _DecodeMergeEncodeCycle-16 1723.4Mi ± 0% 641.1Mi ± 0% -62.80% (p=0.000 n=10) │ before.txt │ after.txt │ │ allocs/op │ allocs/op vs base │ _DecodeMergeEncodeCycle-16 20240.6k ± 0% 203.0k ± 0% -99.00% (p=0.000 n=10) ``` **Checklist** - [ ] Reviewed the [`CONTRIBUTING.md`](https://github.com/grafana/loki/blob/main/CONTRIBUTING.md) guide (**required**) - [ ] Documentation added - [x] Tests updated - [ ] `CHANGELOG.md` updated - [ ] If the change is worth mentioning in the release notes, add `add-to-release-notes` label - [ ] Changes that require user attention or interaction to upgrade are documented in `docs/sources/setup/upgrade/_index.md` - [ ] For Helm chart changes bump the Helm chart version in `production/helm/loki/Chart.yaml` and update `production/helm/loki/CHANGELOG.md` and `production/helm/loki/README.md`. [Example PR](https://github.com/grafana/loki/commit/d10549e3ece02120974929894ee333d07755d213)
2 years ago
require.NoError(t, err)
expected := b.String()
require.JSONEq(t, expected, actual)
}
func Benchmark_DecodeMergeEncodeCycle(b *testing.B) {
// Setup HTTP responses from querier with protobuf encoding.
u := &url.URL{Path: "/loki/api/v1/series"}
httpReq := &http.Request{
Method: "GET",
RequestURI: u.String(),
URL: u,
Header: http.Header{
"Accept": []string{ProtobufType},
},
}
qreq, err := DefaultCodec.DecodeRequest(context.Background(), httpReq, []string{})
require.NoError(b, err)
responses := make([]*LokiSeriesResponse, 100)
for i := range responses {
responses[i] = &LokiSeriesResponse{
Status: "200",
Version: 1,
Statistics: stats.Result{},
Data: generateSeries(),
}
}
httpResponses := make([]*http.Response, 0)
readers := make([]*bytes.Reader, 0)
for _, r := range responses {
resp, err := DefaultCodec.EncodeResponse(context.Background(), httpReq, r)
require.NoError(b, err)
buf, err := io.ReadAll(resp.Body)
require.Nil(b, err)
reader := bytes.NewReader(buf)
resp.Body = io.NopCloser(reader)
readers = append(readers, reader)
httpResponses = append(httpResponses, resp)
}
// Originally the responses were encoded using protobuf. Demand JSON
// now.
httpReq.Header.Del("Accept")
httpReq.Header.Add("Accept", JSONType)
qresps := make([]queryrangebase.Response, 0, 100)
b.ReportAllocs()
b.ResetTimer()
for n := 0; n < b.N; n++ {
// Decode
qresps = qresps[:0]
for i, httpResp := range httpResponses {
_, _ = readers[i].Seek(0, io.SeekStart)
qresp, err := DefaultCodec.DecodeResponse(context.Background(), httpResp, qreq)
require.NoError(b, err)
qresps = append(qresps, qresp)
}
// Merge
result, _ := DefaultCodec.MergeResponse(qresps...)
// Encode
httpRes, err := DefaultCodec.EncodeResponse(context.Background(), httpReq, result)
require.NoError(b, err)
require.Equal(b, "application/json; charset=UTF-8", httpRes.Header.Get("Content-Type"))
}
}