Feature/per tenant splitby (#1565)

* split by overrides by tenant

* tenant aware metric query splitting

* consistent split_queries_by_interval naming

* updates readme

* fixes rebase conflict

* refactor to support limit based splitby intervals in all locations

* simplifies limits cachesplitter

* fixes comment
pull/1616/head
Owen Diehl 5 years ago committed by GitHub
parent 891eae9af8
commit 3e075497cd
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
  1. 1
      CHANGELOG.md
  2. 56
      pkg/querier/queryrange/limits.go
  3. 34
      pkg/querier/queryrange/limits_test.go
  4. 88
      pkg/querier/queryrange/roundtrip.go
  5. 8
      pkg/querier/queryrange/roundtrip_test.go
  6. 41
      pkg/querier/queryrange/split_by_interval.go
  7. 149
      pkg/querier/queryrange/split_by_interval_test.go
  8. 8
      pkg/util/validation/limits.go

@ -4,6 +4,7 @@
* [1572](https://github.com/grafana/loki/pull/1572) **owen-d**: Introduces the `querier.query-ingesters-within` flag and associated yaml config. When enabled, queries for a time range that do not overlap this lookback interval will not be sent to the ingesters.
* [1558](https://github.com/grafana/loki/pull/1558) **owen-d**: Introduces `ingester.max-chunk-age` which specifies the maximum chunk age before it's cut.
* [1565](https://github.com/grafana/loki/pull/1565) **owen-d**: The query frontend's `split_queries_by_interval` can now be specified as an override
## 1.3.0 (2020-01-16)

@ -0,0 +1,56 @@
package queryrange
import (
"fmt"
"time"
"github.com/cortexproject/cortex/pkg/querier/queryrange"
)
// Limits extends the cortex limits interface with support for per tenant splitby parameters
type Limits interface {
queryrange.Limits
QuerySplitDuration(string) time.Duration
}
type limits struct {
Limits
splitDuration time.Duration
}
func (l limits) QuerySplitDuration(user string) time.Duration {
dur := l.Limits.QuerySplitDuration(user)
if dur == 0 {
return l.splitDuration
}
return dur
}
// WithDefaults will construct a Limits with a default value for QuerySplitDuration when no overrides are present.
func WithDefaultLimits(l Limits, conf queryrange.Config) Limits {
res := limits{
Limits: l,
}
if conf.SplitQueriesByDay {
res.splitDuration = 24 * time.Hour
}
if conf.SplitQueriesByInterval != 0 {
res.splitDuration = conf.SplitQueriesByInterval
}
return res
}
// cacheKeyLimits intersects Limits and CacheSplitter
type cacheKeyLimits struct {
Limits
}
// GenerateCacheKey will panic if it encounters a 0 split duration. We ensure against this by requiring
// a nonzero split interval when caching is enabled
func (l cacheKeyLimits) GenerateCacheKey(userID string, r queryrange.Request) string {
currentInterval := r.GetStart() / int64(l.QuerySplitDuration(userID)/time.Millisecond)
return fmt.Sprintf("%s:%s:%d:%d", userID, r.GetQuery(), r.GetStep(), currentInterval)
}

@ -0,0 +1,34 @@
package queryrange
import (
"testing"
"time"
"github.com/cortexproject/cortex/pkg/querier/queryrange"
"github.com/stretchr/testify/require"
)
func TestWithDefaultLimits(t *testing.T) {
l := fakeLimits{
splits: map[string]time.Duration{"a": time.Minute},
}
require.Equal(t, l.QuerySplitDuration("a"), time.Minute)
require.Equal(t, l.QuerySplitDuration("b"), time.Duration(0))
wrapped := WithDefaultLimits(l, queryrange.Config{
SplitQueriesByDay: true,
})
require.Equal(t, wrapped.QuerySplitDuration("a"), time.Minute)
require.Equal(t, wrapped.QuerySplitDuration("b"), 24*time.Hour)
wrapped = WithDefaultLimits(l, queryrange.Config{
SplitQueriesByDay: true, // should be overridden by SplitQueriesByInterval
SplitQueriesByInterval: time.Hour,
})
require.Equal(t, wrapped.QuerySplitDuration("a"), time.Minute)
require.Equal(t, wrapped.QuerySplitDuration("b"), time.Hour)
}

@ -5,9 +5,11 @@ import (
"net/http"
"strings"
"github.com/cortexproject/cortex/pkg/chunk/cache"
"github.com/cortexproject/cortex/pkg/querier/frontend"
"github.com/cortexproject/cortex/pkg/querier/queryrange"
"github.com/go-kit/kit/log"
"github.com/go-kit/kit/log/level"
"github.com/grafana/loki/pkg/logql"
"github.com/prometheus/prometheus/pkg/labels"
)
@ -28,8 +30,13 @@ type Stopper interface {
}
// NewTripperware returns a Tripperware configured with middlewares to align, split and cache requests.
func NewTripperware(cfg Config, log log.Logger, limits queryrange.Limits) (frontend.Tripperware, Stopper, error) {
metricsTripperware, cache, err := queryrange.NewTripperware(cfg.Config, log, limits, lokiCodec, queryrange.PrometheusResponseExtractor)
func NewTripperware(cfg Config, log log.Logger, limits Limits) (frontend.Tripperware, Stopper, error) {
// Ensure that QuerySplitDuration uses configuration defaults.
// This avoids divide by zero errors when determining cache keys where user specific overrides don't exist.
limits = WithDefaultLimits(limits, cfg.Config)
metricsTripperware, cache, err := NewMetricTripperware(cfg, log, limits, lokiCodec, queryrange.PrometheusResponseExtractor)
if err != nil {
return nil, nil, err
}
@ -78,12 +85,12 @@ func NewTripperware(cfg Config, log log.Logger, limits queryrange.Limits) (front
func NewLogFilterTripperware(
cfg Config,
log log.Logger,
limits queryrange.Limits,
limits Limits,
codec queryrange.Codec,
) (frontend.Tripperware, error) {
queryRangeMiddleware := []queryrange.Middleware{queryrange.LimitsMiddleware(limits)}
if cfg.SplitQueriesByInterval != 0 {
queryRangeMiddleware = append(queryRangeMiddleware, queryrange.InstrumentMiddleware("split_by_interval"), SplitByIntervalMiddleware(cfg.SplitQueriesByInterval, limits, codec))
queryRangeMiddleware = append(queryRangeMiddleware, queryrange.InstrumentMiddleware("split_by_interval"), SplitByIntervalMiddleware(limits, codec))
}
if cfg.MaxRetries > 0 {
queryRangeMiddleware = append(queryRangeMiddleware, queryrange.InstrumentMiddleware("retry"), queryrange.NewRetryMiddleware(log, cfg.MaxRetries))
@ -95,3 +102,76 @@ func NewLogFilterTripperware(
return next
}), nil
}
// NewMetricTripperware creates a new frontend tripperware responsible for handling metric queries
func NewMetricTripperware(
cfg Config,
log log.Logger,
limits Limits,
codec queryrange.Codec,
extractor queryrange.Extractor,
) (frontend.Tripperware, Stopper, error) {
queryRangeMiddleware := []queryrange.Middleware{queryrange.LimitsMiddleware(limits)}
if cfg.AlignQueriesWithStep {
queryRangeMiddleware = append(
queryRangeMiddleware,
queryrange.InstrumentMiddleware("step_align"),
queryrange.StepAlignMiddleware,
)
}
// SplitQueriesByDay is deprecated use SplitQueriesByInterval.
if cfg.SplitQueriesByDay {
level.Warn(log).Log("msg", "flag querier.split-queries-by-day (or config split_queries_by_day) is deprecated, use querier.split-queries-by-interval instead.")
}
queryRangeMiddleware = append(
queryRangeMiddleware,
queryrange.InstrumentMiddleware("split_by_interval"),
SplitByIntervalMiddleware(limits, codec),
)
var c cache.Cache
if cfg.CacheResults {
queryCacheMiddleware, cache, err := queryrange.NewResultsCacheMiddleware(
log,
cfg.ResultsCacheConfig,
cacheKeyLimits{limits},
limits,
codec,
extractor,
)
if err != nil {
return nil, nil, err
}
c = cache
queryRangeMiddleware = append(
queryRangeMiddleware,
queryrange.InstrumentMiddleware("results_cache"),
queryCacheMiddleware,
)
}
if cfg.MaxRetries > 0 {
queryRangeMiddleware = append(
queryRangeMiddleware,
queryrange.InstrumentMiddleware("retry"),
queryrange.NewRetryMiddleware(log, cfg.MaxRetries),
)
}
return frontend.Tripperware(func(next http.RoundTripper) http.RoundTripper {
// Finally, if the user selected any query range middleware, stitch it in.
if len(queryRangeMiddleware) > 0 {
rt := queryrange.NewRoundTripper(next, codec, queryRangeMiddleware...)
return frontend.RoundTripFunc(func(r *http.Request) (*http.Response, error) {
if !strings.HasSuffix(r.URL.Path, "/query_range") {
return next.RoundTrip(r)
}
return rt.RoundTrip(r)
})
}
return next
}), c, nil
}

@ -287,6 +287,14 @@ func TestRegexpParamsSupport(t *testing.T) {
type fakeLimits struct {
maxQueryParallelism int
splits map[string]time.Duration
}
func (f fakeLimits) QuerySplitDuration(key string) time.Duration {
if f.splits == nil {
return 0
}
return f.splits[key]
}
func (fakeLimits) MaxQueryLength(string) time.Duration {

@ -12,13 +12,12 @@ import (
)
// SplitByIntervalMiddleware creates a new Middleware that splits log requests by a given interval.
func SplitByIntervalMiddleware(interval time.Duration, limits queryrange.Limits, merger queryrange.Merger) queryrange.Middleware {
func SplitByIntervalMiddleware(limits Limits, merger queryrange.Merger) queryrange.Middleware {
return queryrange.MiddlewareFunc(func(next queryrange.Handler) queryrange.Handler {
return &splitByInterval{
next: next,
limits: limits,
merger: merger,
interval: interval,
next: next,
limits: limits,
merger: merger,
}
})
}
@ -34,10 +33,9 @@ type packedResp struct {
}
type splitByInterval struct {
next queryrange.Handler
limits queryrange.Limits
merger queryrange.Merger
interval time.Duration
next queryrange.Handler
limits Limits
merger queryrange.Merger
}
func (h *splitByInterval) Feed(ctx context.Context, input []*lokiResult) chan *lokiResult {
@ -69,6 +67,12 @@ func (h *splitByInterval) Process(
ch := h.Feed(ctx, input)
// queries with 0 limits should not be exited early
var unlimited bool
if threshold == 0 {
unlimited = true
}
// don't spawn unnecessary goroutines
var p int = parallelism
if len(input) < parallelism {
@ -91,10 +95,15 @@ func (h *splitByInterval) Process(
responses = append(responses, data.resp)
// see if we can exit early if a limit has been reached
threshold -= data.resp.(*LokiResponse).Count()
if threshold <= 0 {
return responses, nil
if casted, ok := data.resp.(*LokiResponse); !unlimited && ok {
threshold -= casted.Count()
if threshold <= 0 {
return responses, nil
}
}
}
}
@ -129,7 +138,13 @@ func (h *splitByInterval) Do(ctx context.Context, r queryrange.Request) (queryra
return nil, err
}
intervals := splitByTime(lokiRequest, h.interval)
interval := h.limits.QuerySplitDuration(userid)
// skip split by if unset
if interval == 0 {
return h.next.Do(ctx, r)
}
intervals := splitByTime(lokiRequest, interval)
if sp := opentracing.SpanFromContext(ctx); sp != nil {
sp.LogFields(otlog.Int("n_intervals", len(intervals)))

@ -79,31 +79,32 @@ func Test_splitQuery(t *testing.T) {
func Test_splitByInterval_Do(t *testing.T) {
ctx := user.InjectOrgID(context.Background(), "1")
split := splitByInterval{
next: queryrange.HandlerFunc(func(_ context.Context, r queryrange.Request) (queryrange.Response, error) {
return &LokiResponse{
Status: loghttp.QueryStatusSuccess,
Direction: r.(*LokiRequest).Direction,
Limit: r.(*LokiRequest).Limit,
Version: uint32(loghttp.VersionV1),
Data: LokiData{
ResultType: loghttp.ResultTypeStream,
Result: []logproto.Stream{
{
Labels: `{foo="bar", level="debug"}`,
Entries: []logproto.Entry{
next := queryrange.HandlerFunc(func(_ context.Context, r queryrange.Request) (queryrange.Response, error) {
return &LokiResponse{
Status: loghttp.QueryStatusSuccess,
Direction: r.(*LokiRequest).Direction,
Limit: r.(*LokiRequest).Limit,
Version: uint32(loghttp.VersionV1),
Data: LokiData{
ResultType: loghttp.ResultTypeStream,
Result: []logproto.Stream{
{
Labels: `{foo="bar", level="debug"}`,
Entries: []logproto.Entry{
{Timestamp: time.Unix(0, r.(*LokiRequest).StartTs.UnixNano()), Line: fmt.Sprintf("%d", r.(*LokiRequest).StartTs.UnixNano())},
},
{Timestamp: time.Unix(0, r.(*LokiRequest).StartTs.UnixNano()), Line: fmt.Sprintf("%d", r.(*LokiRequest).StartTs.UnixNano())},
},
},
},
}, nil
}),
limits: fakeLimits{},
merger: lokiCodec,
interval: time.Hour,
}
},
}, nil
})
l := WithDefaultLimits(fakeLimits{}, queryrange.Config{SplitQueriesByInterval: time.Hour})
split := SplitByIntervalMiddleware(
l,
lokiCodec,
).Wrap(next)
tests := []struct {
name string
@ -252,40 +253,41 @@ func Test_ExitEarly(t *testing.T) {
var callCt int
var mtx sync.Mutex
split := splitByInterval{
next: queryrange.HandlerFunc(func(_ context.Context, r queryrange.Request) (queryrange.Response, error) {
time.Sleep(time.Millisecond) // artificial delay to minimize race condition exposure in test
next := queryrange.HandlerFunc(func(_ context.Context, r queryrange.Request) (queryrange.Response, error) {
time.Sleep(time.Millisecond) // artificial delay to minimize race condition exposure in test
mtx.Lock()
defer mtx.Unlock()
callCt++
mtx.Lock()
defer mtx.Unlock()
callCt++
return &LokiResponse{
Status: loghttp.QueryStatusSuccess,
Direction: r.(*LokiRequest).Direction,
Limit: r.(*LokiRequest).Limit,
Version: uint32(loghttp.VersionV1),
Data: LokiData{
ResultType: loghttp.ResultTypeStream,
Result: []logproto.Stream{
{
Labels: `{foo="bar", level="debug"}`,
Entries: []logproto.Entry{
return &LokiResponse{
Status: loghttp.QueryStatusSuccess,
Direction: r.(*LokiRequest).Direction,
Limit: r.(*LokiRequest).Limit,
Version: uint32(loghttp.VersionV1),
Data: LokiData{
ResultType: loghttp.ResultTypeStream,
Result: []logproto.Stream{
{
Labels: `{foo="bar", level="debug"}`,
Entries: []logproto.Entry{
{
Timestamp: time.Unix(0, r.(*LokiRequest).StartTs.UnixNano()),
Line: fmt.Sprintf("%d", r.(*LokiRequest).StartTs.UnixNano()),
},
{
Timestamp: time.Unix(0, r.(*LokiRequest).StartTs.UnixNano()),
Line: fmt.Sprintf("%d", r.(*LokiRequest).StartTs.UnixNano()),
},
},
},
},
}, nil
}),
limits: fakeLimits{},
merger: lokiCodec,
interval: time.Hour,
}
},
}, nil
})
l := WithDefaultLimits(fakeLimits{}, queryrange.Config{SplitQueriesByInterval: time.Hour})
split := SplitByIntervalMiddleware(
l,
lokiCodec,
).Wrap(next)
req := &LokiRequest{
StartTs: time.Unix(0, 0),
@ -332,37 +334,38 @@ func Test_ExitEarly(t *testing.T) {
func Test_DoesntDeadlock(t *testing.T) {
n := 10
split := splitByInterval{
next: queryrange.HandlerFunc(func(_ context.Context, r queryrange.Request) (queryrange.Response, error) {
next := queryrange.HandlerFunc(func(_ context.Context, r queryrange.Request) (queryrange.Response, error) {
return &LokiResponse{
Status: loghttp.QueryStatusSuccess,
Direction: r.(*LokiRequest).Direction,
Limit: r.(*LokiRequest).Limit,
Version: uint32(loghttp.VersionV1),
Data: LokiData{
ResultType: loghttp.ResultTypeStream,
Result: []logproto.Stream{
{
Labels: `{foo="bar", level="debug"}`,
Entries: []logproto.Entry{
return &LokiResponse{
Status: loghttp.QueryStatusSuccess,
Direction: r.(*LokiRequest).Direction,
Limit: r.(*LokiRequest).Limit,
Version: uint32(loghttp.VersionV1),
Data: LokiData{
ResultType: loghttp.ResultTypeStream,
Result: []logproto.Stream{
{
Labels: `{foo="bar", level="debug"}`,
Entries: []logproto.Entry{
{
Timestamp: time.Unix(0, r.(*LokiRequest).StartTs.UnixNano()),
Line: fmt.Sprintf("%d", r.(*LokiRequest).StartTs.UnixNano()),
},
{
Timestamp: time.Unix(0, r.(*LokiRequest).StartTs.UnixNano()),
Line: fmt.Sprintf("%d", r.(*LokiRequest).StartTs.UnixNano()),
},
},
},
},
}, nil
}),
limits: fakeLimits{
maxQueryParallelism: n,
},
merger: lokiCodec,
interval: time.Hour,
}
},
}, nil
})
l := WithDefaultLimits(fakeLimits{
maxQueryParallelism: n,
}, queryrange.Config{SplitQueriesByInterval: time.Hour})
split := SplitByIntervalMiddleware(
l,
lokiCodec,
).Wrap(next)
// split into n requests w/ n/2 limit, ensuring unused responses are cleaned up properly
req := &LokiRequest{

@ -42,6 +42,9 @@ type Limits struct {
MaxStreamsMatchersPerQuery int `yaml:"max_streams_matchers_per_query"`
MaxConcurrentTailRequests int `yaml:"max_concurrent_tail_requests"`
// Query frontend enforced limits. The default is actually parameterized by the queryrange config.
QuerySplitDuration time.Duration `yaml:"split_queries_by_interval"`
// Config for overrides, convenient if it goes here.
PerTenantOverrideConfig string `yaml:"per_tenant_override_config"`
PerTenantOverridePeriod time.Duration `yaml:"per_tenant_override_period"`
@ -214,6 +217,11 @@ func (o *Overrides) MaxStreamsMatchersPerQuery(userID string) int {
return o.getOverridesForUser(userID).MaxStreamsMatchersPerQuery
}
// QuerySplitDuration returns the tenant specific splitby interval applied in the query frontend.
func (o *Overrides) QuerySplitDuration(userID string) time.Duration {
return o.getOverridesForUser(userID).QuerySplitDuration
}
// MaxConcurrentTailRequests returns the limit to number of concurrent tail requests.
func (o *Overrides) MaxConcurrentTailRequests(userID string) int {
return o.getOverridesForUser(userID).MaxConcurrentTailRequests

Loading…
Cancel
Save