Like Prometheus, but for logs.
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 
 
 
loki/pkg/querier/queryrange/splitters.go

377 lines
13 KiB

package queryrange
import (
"fmt"
"time"
"github.com/go-kit/log/level"
"github.com/prometheus/common/model"
util_log "github.com/grafana/loki/v3/pkg/util/log"
"github.com/grafana/loki/v3/pkg/logproto"
"github.com/grafana/loki/v3/pkg/querier/queryrange/queryrangebase"
"github.com/grafana/loki/v3/pkg/util"
"github.com/grafana/loki/v3/pkg/util/validation"
)
type splitter interface {
split(execTime time.Time, tenantIDs []string, request queryrangebase.Request, interval time.Duration) ([]queryrangebase.Request, error)
}
type defaultSplitter struct {
limits Limits
iqo util.IngesterQueryOptions
}
func newDefaultSplitter(limits Limits, iqo util.IngesterQueryOptions) *defaultSplitter {
return &defaultSplitter{limits, iqo}
}
func (s *defaultSplitter) split(execTime time.Time, tenantIDs []string, req queryrangebase.Request, interval time.Duration) ([]queryrangebase.Request, error) {
var (
reqs []queryrangebase.Request
factory func(start, end time.Time)
endTimeInclusive = true
)
switch r := req.(type) {
case *LokiRequest:
endTimeInclusive = false
factory = func(start, end time.Time) {
reqs = append(reqs, &LokiRequest{
Query: r.Query,
Limit: r.Limit,
Step: r.Step,
Interval: r.Interval,
Direction: r.Direction,
Path: r.Path,
StartTs: start,
EndTs: end,
Plan: r.Plan,
})
}
case *LokiSeriesRequest:
// metadata queries have end time inclusive.
// Set endTimeInclusive to true so that ForInterval keeps a gap of 1ms between splits to
// avoid querying duplicate data in adjacent queries.
factory = func(start, end time.Time) {
reqs = append(reqs, &LokiSeriesRequest{
Match: r.Match,
Path: r.Path,
StartTs: start,
EndTs: end,
Shards: r.Shards,
})
}
case *LabelRequest:
// metadata queries have end time inclusive.
// Set endTimeInclusive to true so that ForInterval keeps a gap of 1ms between splits to
// avoid querying duplicate data in adjacent queries.
factory = func(start, end time.Time) {
reqs = append(reqs, NewLabelRequest(start, end, r.Query, r.Name, r.Path()))
}
case *logproto.IndexStatsRequest:
factory = func(start, end time.Time) {
reqs = append(reqs, &logproto.IndexStatsRequest{
From: model.TimeFromUnix(start.Unix()),
Through: model.TimeFromUnix(end.Unix()),
Matchers: r.GetMatchers(),
})
}
case *logproto.ShardsRequest:
factory = func(start, end time.Time) {
reqs = append(reqs, &logproto.ShardsRequest{
From: model.TimeFromUnix(start.Unix()),
Through: model.TimeFromUnix(end.Unix()),
Query: r.Query,
TargetBytesPerShard: r.TargetBytesPerShard,
})
}
case *logproto.VolumeRequest:
factory = func(start, end time.Time) {
reqs = append(reqs, &logproto.VolumeRequest{
From: model.TimeFromUnix(start.Unix()),
Through: model.TimeFromUnix(end.Unix()),
Matchers: r.GetMatchers(),
Limit: r.Limit,
TargetLabels: r.TargetLabels,
AggregateBy: r.AggregateBy,
})
}
case *DetectedFieldsRequest:
factory = func(start, end time.Time) {
reqs = append(reqs, &DetectedFieldsRequest{
DetectedFieldsRequest: logproto.DetectedFieldsRequest{
Start: start,
End: end,
Query: r.GetQuery(),
LineLimit: r.GetLineLimit(),
Limit: r.GetLimit(),
Step: r.GetStep(),
},
path: r.path,
})
}
case *DetectedLabelsRequest:
factory = func(start, end time.Time) {
reqs = append(reqs, &DetectedLabelsRequest{
DetectedLabelsRequest: logproto.DetectedLabelsRequest{
Start: start,
End: end,
Query: r.Query,
},
path: r.path,
})
}
default:
level.Warn(util_log.Logger).Log("msg", fmt.Sprintf("splitter: unsupported request type: %T", req))
return nil, nil
}
var (
splitsBeforeRebound []queryrangebase.Request
origStart = req.GetStart().UTC()
origEnd = req.GetEnd().UTC()
start, end = origStart, origEnd
reboundOrigQuery bool
splitIntervalBeforeRebound time.Duration
)
switch req.(type) {
// not applying `split_ingester_queries_by_interval` for metadata queries since it solves a different problem of reducing the subqueries sent to the ingesters.
// we instead prefer `split_recent_metadata_queries_by_interval` for metadata queries which favours shorter subqueries to improve cache effectiveness.
// even though the number of subqueries increase, caching should deamplify it overtime.
case *LokiSeriesRequest, *LabelRequest:
var (
recentMetadataQueryWindow = validation.MaxDurationOrZeroPerTenant(tenantIDs, s.limits.RecentMetadataQueryWindow)
recentMetadataQuerySplitInterval = validation.MaxDurationOrZeroPerTenant(tenantIDs, s.limits.RecentMetadataQuerySplitDuration)
)
// if either of them are not configured, we fallback to the default split interval for the entire query length.
if recentMetadataQueryWindow == 0 || recentMetadataQuerySplitInterval == 0 {
break
}
start, end, reboundOrigQuery = recentMetadataQueryBounds(execTime, recentMetadataQueryWindow, req)
splitIntervalBeforeRebound = recentMetadataQuerySplitInterval
default:
if ingesterQueryInterval := validation.MaxDurationOrZeroPerTenant(tenantIDs, s.limits.IngesterQuerySplitDuration); ingesterQueryInterval != 0 {
start, end, reboundOrigQuery = ingesterQueryBounds(execTime, s.iqo, req)
splitIntervalBeforeRebound = ingesterQueryInterval
}
}
if reboundOrigQuery {
util.ForInterval(splitIntervalBeforeRebound, start, end, endTimeInclusive, factory)
// rebound after query portion within ingester query window or recent metadata query window has been split out
end = start
start = origStart
if endTimeInclusive {
end = end.Add(-util.SplitGap)
}
// query only overlaps ingester query window or recent metadata query window, nothing more to do
if start.After(end) || start.Equal(end) {
return reqs, nil
}
// copy the splits, reset the results
splitsBeforeRebound = reqs
reqs = nil
} else {
start = origStart
end = origEnd
}
// perform splitting over the rest of the time range
util.ForInterval(interval, start, end, endTimeInclusive, factory)
// move the ingester or recent metadata splits to the end to maintain correct order
reqs = append(reqs, splitsBeforeRebound...)
return reqs, nil
}
type metricQuerySplitter struct {
limits Limits
iqo util.IngesterQueryOptions
}
func newMetricQuerySplitter(limits Limits, iqo util.IngesterQueryOptions) *metricQuerySplitter {
return &metricQuerySplitter{limits, iqo}
}
// reduceSplitIntervalForRangeVector reduces the split interval for a range query based on the duration of the range vector.
// Large range vector durations will not be split into smaller intervals because it can cause the queries to be slow by over-processing data.
func (s *metricQuerySplitter) reduceSplitIntervalForRangeVector(r *LokiRequest, interval time.Duration) (time.Duration, error) {
maxRange, _, err := maxRangeVectorAndOffsetDuration(r.Plan.AST)
if err != nil {
return 0, err
}
if maxRange > interval {
return maxRange, nil
}
return interval, nil
}
// Round up to the step before the next interval boundary.
func (s *metricQuerySplitter) nextIntervalBoundary(t time.Time, step int64, interval time.Duration) time.Time {
stepNs := step * 1e6
nsPerInterval := interval.Nanoseconds()
startOfNextInterval := ((t.UnixNano() / nsPerInterval) + 1) * nsPerInterval
// ensure that target is a multiple of steps away from the start time
target := startOfNextInterval - ((startOfNextInterval - t.UnixNano()) % stepNs)
if target == startOfNextInterval {
target -= stepNs
}
return time.Unix(0, target)
}
func (s *metricQuerySplitter) split(execTime time.Time, tenantIDs []string, r queryrangebase.Request, interval time.Duration) ([]queryrangebase.Request, error) {
var reqs []queryrangebase.Request
lokiReq := r.(*LokiRequest)
interval, err := s.reduceSplitIntervalForRangeVector(lokiReq, interval)
if err != nil {
return nil, err
}
start, end := s.alignStartEnd(r.GetStep(), lokiReq.StartTs, lokiReq.EndTs)
lokiReq = lokiReq.WithStartEnd(start, end).(*LokiRequest)
factory := func(start, end time.Time) {
reqs = append(reqs, &LokiRequest{
Query: lokiReq.Query,
Limit: lokiReq.Limit,
Step: lokiReq.Step,
Interval: lokiReq.Interval,
Direction: lokiReq.Direction,
Path: lokiReq.Path,
StartTs: start,
EndTs: end,
Plan: lokiReq.Plan,
})
}
// step is >= configured split interval, let us just split the query interval by step
// TODO this is likely buggy when step >= query range, how should we handle this?
if lokiReq.Step >= interval.Milliseconds() {
util.ForInterval(time.Duration(lokiReq.Step*1e6), lokiReq.StartTs, lokiReq.EndTs, false, factory)
return reqs, nil
}
var (
ingesterSplits []queryrangebase.Request
needsIngesterSplits bool
)
origStart := start
origEnd := end
start, end, needsIngesterSplits = ingesterQueryBounds(execTime, s.iqo, lokiReq)
start, end = s.alignStartEnd(r.GetStep(), start, end)
if ingesterQueryInterval := validation.MaxDurationOrZeroPerTenant(tenantIDs, s.limits.IngesterQuerySplitDuration); ingesterQueryInterval != 0 && needsIngesterSplits {
// perform splitting using special interval (`split_ingester_queries_by_interval`)
s.buildMetricSplits(lokiReq.GetStep(), ingesterQueryInterval, start, end, factory)
// rebound after ingester queries have been split out
//
// the end time should now be the boundary of the `query_ingester_within` window, which is "start" currently;
// but since start is already step-aligned we need to subtract 1ns to align it down by 1 more step so that we
// get a consistent step between splits
end, _ = s.alignStartEnd(r.GetStep(), start.Add(-time.Nanosecond), end)
// we restore the previous start time (the start time of the query)
start = origStart
// query only overlaps ingester query window, nothing more to do
if start.After(end) || start.Equal(end) {
return reqs, nil
}
// copy the splits, reset the results
ingesterSplits = reqs
reqs = nil
} else {
start = origStart
end = origEnd
}
// perform splitting over the rest of the time range
s.buildMetricSplits(lokiReq.GetStep(), interval, start, end, factory)
// move the ingester splits to the end to maintain correct order
reqs = append(reqs, ingesterSplits...)
return reqs, nil
}
func (s *metricQuerySplitter) alignStartEnd(step int64, start, end time.Time) (time.Time, time.Time) {
// step align start and end time of the query. Start time is rounded down and end time is rounded up.
stepNs := step * 1e6
startNs := start.UnixNano()
endNs := end.UnixNano()
if mod := endNs % stepNs; mod != 0 {
endNs += stepNs - mod
}
return time.Unix(0, startNs-startNs%stepNs), time.Unix(0, endNs)
}
func (s *metricQuerySplitter) buildMetricSplits(step int64, interval time.Duration, start, end time.Time, factory func(start, end time.Time)) {
for splStart := start; splStart.Before(end); splStart = s.nextIntervalBoundary(splStart, step, interval).Add(time.Duration(step) * time.Millisecond) {
splEnd := s.nextIntervalBoundary(splStart, step, interval)
if splEnd.Add(time.Duration(step)*time.Millisecond).After(end) || splEnd.Add(time.Duration(step)*time.Millisecond) == end {
splEnd = end
}
factory(splStart, splEnd)
}
}
func recentMetadataQueryBounds(execTime time.Time, recentMetadataQueryWindow time.Duration, req queryrangebase.Request) (time.Time, time.Time, bool) {
start, end := req.GetStart().UTC(), req.GetEnd().UTC()
windowStart := execTime.UTC().Add(-recentMetadataQueryWindow)
// rebound only if the query end is strictly inside the window
if !windowStart.Before(end) {
return start, end, false
}
if windowStart.Before(start) {
windowStart = start
}
return windowStart, end, true
}
// ingesterQueryBounds determines if we need to split time ranges overlapping the ingester query window (`query_ingesters_within`)
// and retrieve the bounds for those specific splits
func ingesterQueryBounds(execTime time.Time, iqo util.IngesterQueryOptions, req queryrangebase.Request) (time.Time, time.Time, bool) {
start, end := req.GetStart().UTC(), req.GetEnd().UTC()
// ingesters are not queried, nothing to do
if iqo == nil || iqo.QueryStoreOnly() {
return start, end, false
}
windowSize := iqo.QueryIngestersWithin()
ingesterWindow := execTime.UTC().Add(-windowSize)
// clamp to the start time
if ingesterWindow.Before(start) {
ingesterWindow = start
}
// query range does not overlap with ingester query window, nothing to do
if end.Before(ingesterWindow) {
return start, end, false
}
return ingesterWindow, end, true
}