Like Prometheus, but for logs.
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
loki/pkg/querier/http_test.go

365 lines
11 KiB

package querier
import (
"context"
"fmt"
"net/http"
"net/http/httptest"
"net/url"
"testing"
"time"
"github.com/pkg/errors"
"github.com/stretchr/testify/mock"
"github.com/grafana/loki/v3/pkg/loghttp"
"github.com/grafana/loki/v3/pkg/logproto"
"github.com/grafana/loki/v3/pkg/logqlmodel"
"github.com/grafana/loki/v3/pkg/util/constants"
"github.com/grafana/loki/v3/pkg/validation"
"github.com/go-kit/log"
"github.com/grafana/dskit/user"
"github.com/prometheus/common/model"
"github.com/stretchr/testify/require"
)
func TestInstantQueryHandler(t *testing.T) {
defaultLimits := defaultLimitsTestConfig()
limits, err := validation.NewOverrides(defaultLimits, nil)
require.NoError(t, err)
t.Run("log selector expression not allowed for instant queries", func(t *testing.T) {
api := NewQuerierAPI(mockQuerierConfig(), nil, limits, nil, nil, log.NewNopLogger())
ctx := user.InjectOrgID(context.Background(), "user")
req, err := http.NewRequestWithContext(ctx, "GET", `/api/v1/query`, nil)
require.NoError(t, err)
q := req.URL.Query()
q.Add("query", `{app="loki"}`)
req.URL.RawQuery = q.Encode()
err = req.ParseForm()
require.NoError(t, err)
rr := httptest.NewRecorder()
handler := NewQuerierHandler(api)
httpHandler := NewQuerierHTTPHandler(handler)
httpHandler.ServeHTTP(rr, req)
require.Equal(t, http.StatusBadRequest, rr.Code)
require.Equal(t, logqlmodel.ErrUnsupportedSyntaxForInstantQuery.Error(), rr.Body.String())
})
}
type slowConnectionSimulator struct {
sleepFor time.Duration
deadline time.Duration
didTimeout bool
}
func (s *slowConnectionSimulator) ServeHTTP(_ http.ResponseWriter, r *http.Request) {
ctx := r.Context()
if err := ctx.Err(); err != nil {
panic(fmt.Sprintf("context already errored: %s", err))
}
time.Sleep(s.sleepFor)
select {
case <-ctx.Done():
switch ctx.Err() {
case context.DeadlineExceeded:
s.didTimeout = true
case context.Canceled:
panic("context already canceled")
}
case <-time.After(s.deadline):
}
}
func TestQueryWrapperMiddleware(t *testing.T) {
shortestTimeout := time.Millisecond * 5
t.Run("request timeout is the shortest one", func(t *testing.T) {
defaultLimits := defaultLimitsTestConfig()
defaultLimits.QueryTimeout = model.Duration(time.Millisecond * 10)
limits, err := validation.NewOverrides(defaultLimits, nil)
require.NoError(t, err)
// request timeout is 5ms but it sleeps for 100ms, so timeout injected in the request is expected.
connSimulator := &slowConnectionSimulator{
sleepFor: time.Millisecond * 100,
deadline: shortestTimeout,
}
midl := WrapQuerySpanAndTimeout("mycall", limits).Wrap(connSimulator)
req, err := http.NewRequest("GET", "/loki/api/v1/label", nil)
ctx, cancelFunc := context.WithTimeout(user.InjectOrgID(req.Context(), "fake"), shortestTimeout)
defer cancelFunc()
req = req.WithContext(ctx)
require.NoError(t, err)
rr := httptest.NewRecorder()
srv := http.HandlerFunc(midl.ServeHTTP)
srv.ServeHTTP(rr, req)
require.Equal(t, http.StatusOK, rr.Code)
select {
case <-ctx.Done():
break
case <-time.After(shortestTimeout):
require.FailNow(t, "should have timed out before %s", shortestTimeout)
default:
require.FailNow(t, "timeout expected")
}
require.True(t, connSimulator.didTimeout)
})
t.Run("apply limits query timeout", func(t *testing.T) {
defaultLimits := defaultLimitsTestConfig()
defaultLimits.QueryTimeout = model.Duration(shortestTimeout)
limits, err := validation.NewOverrides(defaultLimits, nil)
require.NoError(t, err)
connSimulator := &slowConnectionSimulator{
sleepFor: time.Millisecond * 100,
deadline: shortestTimeout,
}
midl := WrapQuerySpanAndTimeout("mycall", limits).Wrap(connSimulator)
req, err := http.NewRequest("GET", "/loki/api/v1/label", nil)
ctx, cancelFunc := context.WithTimeout(user.InjectOrgID(req.Context(), "fake"), time.Millisecond*100)
defer cancelFunc()
req = req.WithContext(ctx)
require.NoError(t, err)
rr := httptest.NewRecorder()
srv := http.HandlerFunc(midl.ServeHTTP)
srv.ServeHTTP(rr, req)
require.Equal(t, http.StatusOK, rr.Code)
select {
case <-ctx.Done():
break
case <-time.After(shortestTimeout):
require.FailNow(t, "should have timed out before %s", shortestTimeout)
}
require.True(t, connSimulator.didTimeout)
})
}
func injectOrgID(next http.Handler) http.Handler {
return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
_, ctx, _ := user.ExtractOrgIDFromHTTPRequest(r)
next.ServeHTTP(w, r.WithContext(ctx))
})
}
func buildHandler(api *QuerierAPI) http.Handler {
return injectOrgID(NewQuerierHTTPHandler(NewQuerierHandler(api)))
}
func TestSeriesHandler(t *testing.T) {
t.Run("instant queries set a step of 0", func(t *testing.T) {
ret := func() *logproto.SeriesResponse {
return &logproto.SeriesResponse{
Series: []logproto.SeriesIdentifier{
{
Optimize series response format by using repeated fileds. (#11498) **What this PR does / why we need it**: The Protobuf map type is encodied as a repeated field of map entries. Decoding them to a slice is much faster than decoding them into a map. Since Loki is not using the fast key check for a map we can use the slice decoding. This change also allows us to decode the JSON directly into the right protobuf struct. This doulbes the JSON decoding speed and reduces the memory pressure by ~40%. ``` › go test -bench=. -run=^$ -count=10 ./pkg/querier/queryrange > before.log › go test -bench=. -run=^$ -count=10 ./pkg/querier/queryrange > after.log › benchstat before.log after.log goos: darwin goarch: arm64 pkg: github.com/grafana/loki/pkg/querier/queryrange │ before.log │ after.log │ │ sec/op │ sec/op vs base │ ResponseMerge/mergeStreams_unlimited-10 32.36m ± 0% 32.63m ± 2% ~ (p=0.393 n=10) ResponseMerge/mergeOrderedNonOverlappingStreams_unlimited-10 1.050m ± 1% 1.080m ± 3% +2.84% (p=0.005 n=10) ResponseMerge/mergeStreams_limited-10 33.02m ± 0% 32.60m ± 1% -1.29% (p=0.004 n=10) ResponseMerge/mergeOrderedNonOverlappingStreams_limited-10 15.11m ± 0% 15.07m ± 0% ~ (p=0.075 n=10) _CodecDecodeLogs-10 4.395m ± 1% 4.364m ± 0% -0.72% (p=0.005 n=10) _CodecDecodeSamples-10 16.97m ± 0% 16.84m ± 2% -0.77% (p=0.023 n=10) _CodecDecodeSeries/application/vnd.google.protobuf-10 745.8µ ± 8% 736.8µ ± 12% ~ (p=0.739 n=10) _CodecDecodeSeries/application/json;_charset=utf-8-10 15.37m ± 1% 10.60m ± 0% -31.03% (p=0.000 n=10) _MergeResponses-10 1186.9m ± 2% 149.8m ± 1% -87.38% (p=0.000 n=10) _UnwrapSeries-10 9.399m ± 1% 4.049m ± 0% -56.92% (p=0.000 n=10) _DecodeMergeEncodeCycle-10 666.0m ± 3% 194.6m ± 0% -70.79% (p=0.000 n=10) geomean 18.87m 12.51m -33.70% │ before.log │ after.log │ │ B/op │ B/op vs base │ _CodecDecodeLogs-10 3.649Mi ± 0% 3.649Mi ± 0% ~ (p=0.364 n=10) _CodecDecodeSamples-10 18.12Mi ± 0% 18.12Mi ± 0% ~ (p=0.926 n=10) _CodecDecodeSeries/application/vnd.google.protobuf-10 7.647Mi ± 0% 7.647Mi ± 0% ~ (p=0.587 n=10) _CodecDecodeSeries/application/json;_charset=utf-8-10 27.94Mi ± 0% 16.99Mi ± 0% -39.18% (p=0.000 n=10) _MergeResponses-10 2.362Mi ± 0% 2.408Mi ± 0% +1.98% (p=0.000 n=10) _UnwrapSeries-10 19.495Mi ± 0% 8.595Mi ± 0% -55.91% (p=0.000 n=10) _DecodeMergeEncodeCycle-10 772.3Mi ± 0% 772.3Mi ± 0% ~ (p=0.912 n=10) geomean 17.50Mi 14.54Mi -16.91% │ before.log │ after.log │ │ allocs/op │ allocs/op vs base │ _CodecDecodeLogs-10 41.10k ± 0% 41.10k ± 0% ~ (p=1.000 n=10) ¹ _CodecDecodeSamples-10 411.9k ± 0% 411.9k ± 0% ~ (p=1.000 n=10) _CodecDecodeSeries/application/vnd.google.protobuf-10 32.00 ± 0% 32.00 ± 0% ~ (p=1.000 n=10) ¹ _CodecDecodeSeries/application/json;_charset=utf-8-10 304.2k ± 0% 298.1k ± 0% -2.01% (p=0.000 n=10) _MergeResponses-10 100.1k ± 0% 100.1k ± 0% -0.00% (p=0.002 n=10) _UnwrapSeries-10 201.1k ± 0% 198.0k ± 0% -1.54% (p=0.000 n=10) _DecodeMergeEncodeCycle-10 203.1k ± 0% 203.1k ± 0% ~ (p=0.621 n=10) geomean 48.95k 48.70k -0.51% ¹ all samples are equal ``` **Checklist** - [x] Reviewed the [`CONTRIBUTING.md`](https://github.com/grafana/loki/blob/main/CONTRIBUTING.md) guide (**required**) - [ ] Documentation added - [x] Tests updated - [ ] `CHANGELOG.md` updated - [ ] If the change is worth mentioning in the release notes, add `add-to-release-notes` label - [ ] Changes that require user attention or interaction to upgrade are documented in `docs/sources/setup/upgrade/_index.md` - [ ] For Helm chart changes bump the Helm chart version in `production/helm/loki/Chart.yaml` and update `production/helm/loki/CHANGELOG.md` and `production/helm/loki/README.md`. [Example PR](https://github.com/grafana/loki/commit/d10549e3ece02120974929894ee333d07755d213) - [ ] If the change is deprecating or removing a configuration option, update the `deprecated-config.yaml` and `deleted-config.yaml` files respectively in the `tools/deprecated-config-checker` directory. [Example PR](https://github.com/grafana/loki/pull/10840/commits/0d4416a4b03739583349934b96f272fb4f685d15)
2 years ago
Labels: []logproto.SeriesIdentifier_LabelsEntry{
{Key: "a", Value: "1"},
{Key: "b", Value: "2"},
},
},
{
Optimize series response format by using repeated fileds. (#11498) **What this PR does / why we need it**: The Protobuf map type is encodied as a repeated field of map entries. Decoding them to a slice is much faster than decoding them into a map. Since Loki is not using the fast key check for a map we can use the slice decoding. This change also allows us to decode the JSON directly into the right protobuf struct. This doulbes the JSON decoding speed and reduces the memory pressure by ~40%. ``` › go test -bench=. -run=^$ -count=10 ./pkg/querier/queryrange > before.log › go test -bench=. -run=^$ -count=10 ./pkg/querier/queryrange > after.log › benchstat before.log after.log goos: darwin goarch: arm64 pkg: github.com/grafana/loki/pkg/querier/queryrange │ before.log │ after.log │ │ sec/op │ sec/op vs base │ ResponseMerge/mergeStreams_unlimited-10 32.36m ± 0% 32.63m ± 2% ~ (p=0.393 n=10) ResponseMerge/mergeOrderedNonOverlappingStreams_unlimited-10 1.050m ± 1% 1.080m ± 3% +2.84% (p=0.005 n=10) ResponseMerge/mergeStreams_limited-10 33.02m ± 0% 32.60m ± 1% -1.29% (p=0.004 n=10) ResponseMerge/mergeOrderedNonOverlappingStreams_limited-10 15.11m ± 0% 15.07m ± 0% ~ (p=0.075 n=10) _CodecDecodeLogs-10 4.395m ± 1% 4.364m ± 0% -0.72% (p=0.005 n=10) _CodecDecodeSamples-10 16.97m ± 0% 16.84m ± 2% -0.77% (p=0.023 n=10) _CodecDecodeSeries/application/vnd.google.protobuf-10 745.8µ ± 8% 736.8µ ± 12% ~ (p=0.739 n=10) _CodecDecodeSeries/application/json;_charset=utf-8-10 15.37m ± 1% 10.60m ± 0% -31.03% (p=0.000 n=10) _MergeResponses-10 1186.9m ± 2% 149.8m ± 1% -87.38% (p=0.000 n=10) _UnwrapSeries-10 9.399m ± 1% 4.049m ± 0% -56.92% (p=0.000 n=10) _DecodeMergeEncodeCycle-10 666.0m ± 3% 194.6m ± 0% -70.79% (p=0.000 n=10) geomean 18.87m 12.51m -33.70% │ before.log │ after.log │ │ B/op │ B/op vs base │ _CodecDecodeLogs-10 3.649Mi ± 0% 3.649Mi ± 0% ~ (p=0.364 n=10) _CodecDecodeSamples-10 18.12Mi ± 0% 18.12Mi ± 0% ~ (p=0.926 n=10) _CodecDecodeSeries/application/vnd.google.protobuf-10 7.647Mi ± 0% 7.647Mi ± 0% ~ (p=0.587 n=10) _CodecDecodeSeries/application/json;_charset=utf-8-10 27.94Mi ± 0% 16.99Mi ± 0% -39.18% (p=0.000 n=10) _MergeResponses-10 2.362Mi ± 0% 2.408Mi ± 0% +1.98% (p=0.000 n=10) _UnwrapSeries-10 19.495Mi ± 0% 8.595Mi ± 0% -55.91% (p=0.000 n=10) _DecodeMergeEncodeCycle-10 772.3Mi ± 0% 772.3Mi ± 0% ~ (p=0.912 n=10) geomean 17.50Mi 14.54Mi -16.91% │ before.log │ after.log │ │ allocs/op │ allocs/op vs base │ _CodecDecodeLogs-10 41.10k ± 0% 41.10k ± 0% ~ (p=1.000 n=10) ¹ _CodecDecodeSamples-10 411.9k ± 0% 411.9k ± 0% ~ (p=1.000 n=10) _CodecDecodeSeries/application/vnd.google.protobuf-10 32.00 ± 0% 32.00 ± 0% ~ (p=1.000 n=10) ¹ _CodecDecodeSeries/application/json;_charset=utf-8-10 304.2k ± 0% 298.1k ± 0% -2.01% (p=0.000 n=10) _MergeResponses-10 100.1k ± 0% 100.1k ± 0% -0.00% (p=0.002 n=10) _UnwrapSeries-10 201.1k ± 0% 198.0k ± 0% -1.54% (p=0.000 n=10) _DecodeMergeEncodeCycle-10 203.1k ± 0% 203.1k ± 0% ~ (p=0.621 n=10) geomean 48.95k 48.70k -0.51% ¹ all samples are equal ``` **Checklist** - [x] Reviewed the [`CONTRIBUTING.md`](https://github.com/grafana/loki/blob/main/CONTRIBUTING.md) guide (**required**) - [ ] Documentation added - [x] Tests updated - [ ] `CHANGELOG.md` updated - [ ] If the change is worth mentioning in the release notes, add `add-to-release-notes` label - [ ] Changes that require user attention or interaction to upgrade are documented in `docs/sources/setup/upgrade/_index.md` - [ ] For Helm chart changes bump the Helm chart version in `production/helm/loki/Chart.yaml` and update `production/helm/loki/CHANGELOG.md` and `production/helm/loki/README.md`. [Example PR](https://github.com/grafana/loki/commit/d10549e3ece02120974929894ee333d07755d213) - [ ] If the change is deprecating or removing a configuration option, update the `deprecated-config.yaml` and `deleted-config.yaml` files respectively in the `tools/deprecated-config-checker` directory. [Example PR](https://github.com/grafana/loki/pull/10840/commits/0d4416a4b03739583349934b96f272fb4f685d15)
2 years ago
Labels: []logproto.SeriesIdentifier_LabelsEntry{
{Key: "c", Value: "3"},
{Key: "d", Value: "4"},
},
},
},
}
}
expected := `{"status":"success","data":[{"a":"1","b":"2"},{"c":"3","d":"4"}]}`
q := newQuerierMock()
q.On("Series", mock.Anything, mock.Anything).Return(ret, nil)
api := setupAPI(t, q, false)
handler := buildHandler(api)
req := httptest.NewRequest(http.MethodGet, "/loki/api/v1/series"+
"?start=0"+
"&end=1"+
"&step=42"+
"&query=%7Bfoo%3D%22bar%22%7D", nil)
req.Header.Set("X-Scope-OrgID", "test-org")
res := makeRequest(t, handler, req)
require.Equalf(t, 200, res.Code, "response was not HTTP OK: %s", res.Body.String())
require.JSONEq(t, expected, res.Body.String())
})
t.Run("ignores __aggregated_metric__ series, when possible, unless explicitly requested", func(t *testing.T) {
ret := func() *logproto.SeriesResponse {
return &logproto.SeriesResponse{
Series: []logproto.SeriesIdentifier{},
}
}
q := newQuerierMock()
q.On("Series", mock.Anything, mock.Anything).Return(ret, nil)
api := setupAPI(t, q, true)
handler := buildHandler(api)
for _, tt := range []struct {
match string
expectedGroups []string
}{
{
// we can't add the negated __aggregated_metric__ matcher to an empty matcher set,
// as that will produce an invalid query
match: "{}",
expectedGroups: []string{},
},
{
match: `{foo="bar"}`,
expectedGroups: []string{fmt.Sprintf(`{foo="bar", %s=""}`, constants.AggregatedMetricLabel)},
},
{
match: fmt.Sprintf(`{%s="foo-service"}`, constants.AggregatedMetricLabel),
expectedGroups: []string{fmt.Sprintf(`{%s="foo-service"}`, constants.AggregatedMetricLabel)},
},
} {
req := httptest.NewRequest(http.MethodGet, "/loki/api/v1/series"+
"?start=0"+
"&end=1"+
fmt.Sprintf("&match=%s", url.QueryEscape(tt.match)), nil)
req.Header.Set("X-Scope-OrgID", "test-org")
_ = makeRequest(t, handler, req)
q.AssertCalled(t, "Series", mock.Anything, &logproto.SeriesRequest{
Start: time.Unix(0, 0).UTC(),
End: time.Unix(1, 0).UTC(),
Groups: tt.expectedGroups,
})
}
})
}
func TestVolumeHandler(t *testing.T) {
ret := &logproto.VolumeResponse{
Volumes: []logproto.Volume{
{Name: `{foo="bar"}`, Volume: 38},
},
}
t.Run("shared beavhior between range and instant queries", func(t *testing.T) {
for _, tc := range []struct {
mode string
req *logproto.VolumeRequest
}{
{mode: "instant", req: loghttp.NewVolumeInstantQueryWithDefaults(`{foo="bar"}`)},
{mode: "range", req: loghttp.NewVolumeRangeQueryWithDefaults(`{foo="bar"}`)},
} {
t.Run(fmt.Sprintf("%s queries return label volumes from the querier", tc.mode), func(t *testing.T) {
querier := newQuerierMock()
querier.On("Volume", mock.Anything, mock.Anything).Return(ret, nil)
api := setupAPI(t, querier, false)
res, err := api.VolumeHandler(context.Background(), tc.req)
require.NoError(t, err)
calls := querier.GetMockedCallsByMethod("Volume")
require.Len(t, calls, 1)
request := calls[0].Arguments[1].(*logproto.VolumeRequest)
require.Equal(t, `{foo="bar"}`, request.Matchers)
require.Equal(t, "series", request.AggregateBy)
require.Equal(t, ret, res)
})
t.Run(fmt.Sprintf("%s queries return nothing when a store doesn't support label volumes", tc.mode), func(t *testing.T) {
querier := newQuerierMock()
querier.On("Volume", mock.Anything, mock.Anything).Return(nil, nil)
api := setupAPI(t, querier, false)
res, err := api.VolumeHandler(context.Background(), tc.req)
require.NoError(t, err)
calls := querier.GetMockedCallsByMethod("Volume")
require.Len(t, calls, 1)
require.Empty(t, res.Volumes)
})
t.Run(fmt.Sprintf("%s queries return error when there's an error in the querier", tc.mode), func(t *testing.T) {
err := errors.New("something bad")
querier := newQuerierMock()
querier.On("Volume", mock.Anything, mock.Anything).Return(nil, err)
api := setupAPI(t, querier, false)
_, err = api.VolumeHandler(context.Background(), tc.req)
require.ErrorContains(t, err, "something bad")
calls := querier.GetMockedCallsByMethod("Volume")
require.Len(t, calls, 1)
})
}
})
}
func TestLabelsHandler(t *testing.T) {
t.Run("remove __aggregated_metric__ label from response when present", func(t *testing.T) {
ret := &logproto.LabelResponse{
Values: []string{
constants.AggregatedMetricLabel,
"foo",
"bar",
},
}
expected := `{"status":"success","data":["foo","bar"]}`
q := newQuerierMock()
q.On("Label", mock.Anything, mock.Anything).Return(ret, nil)
api := setupAPI(t, q, true)
handler := buildHandler(api)
req := httptest.NewRequest(http.MethodGet, "/loki/api/v1/labels"+
"?start=0"+
"&end=1", nil)
req.Header.Set("X-Scope-OrgID", "test-org")
res := makeRequest(t, handler, req)
require.Equalf(t, 200, res.Code, "response was not HTTP OK: %s", res.Body.String())
require.JSONEq(t, expected, res.Body.String())
})
}
func makeRequest(t *testing.T, handler http.Handler, req *http.Request) *httptest.ResponseRecorder {
err := req.ParseForm()
require.NoError(t, err)
w := httptest.NewRecorder()
handler.ServeHTTP(w, req)
return w
}
func setupAPI(t *testing.T, querier *querierMock, enableMetricAggregation bool) *QuerierAPI {
defaultLimits := defaultLimitsTestConfig()
defaultLimits.MetricAggregationEnabled = enableMetricAggregation
limits, err := validation.NewOverrides(defaultLimits, nil)
require.NoError(t, err)
api := NewQuerierAPI(Config{}, querier, limits, nil, nil, log.NewNopLogger())
return api
}