mirror of https://github.com/grafana/loki
You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
306 lines
8.9 KiB
306 lines
8.9 KiB
package queryrange
|
|
|
|
import (
|
|
"context"
|
|
"fmt"
|
|
strings "strings"
|
|
"time"
|
|
|
|
"github.com/dustin/go-humanize"
|
|
"github.com/efficientgo/core/errors"
|
|
"github.com/go-kit/log"
|
|
"github.com/go-kit/log/level"
|
|
"github.com/grafana/dskit/concurrency"
|
|
"github.com/grafana/dskit/tenant"
|
|
"github.com/opentracing/opentracing-go"
|
|
"github.com/prometheus/common/model"
|
|
|
|
"github.com/grafana/loki/v3/pkg/logproto"
|
|
"github.com/grafana/loki/v3/pkg/logql"
|
|
"github.com/grafana/loki/v3/pkg/logql/syntax"
|
|
logqlstats "github.com/grafana/loki/v3/pkg/logqlmodel/stats"
|
|
"github.com/grafana/loki/v3/pkg/querier/queryrange/queryrangebase"
|
|
"github.com/grafana/loki/v3/pkg/storage/config"
|
|
"github.com/grafana/loki/v3/pkg/storage/stores/index/stats"
|
|
"github.com/grafana/loki/v3/pkg/storage/stores/shipper/indexshipper/tsdb/sharding"
|
|
"github.com/grafana/loki/v3/pkg/storage/types"
|
|
util_log "github.com/grafana/loki/v3/pkg/util/log"
|
|
"github.com/grafana/loki/v3/pkg/util/spanlogger"
|
|
"github.com/grafana/loki/v3/pkg/util/validation"
|
|
)
|
|
|
|
func shardResolverForConf(
|
|
ctx context.Context,
|
|
conf config.PeriodConfig,
|
|
defaultLookback time.Duration,
|
|
logger log.Logger,
|
|
maxParallelism int,
|
|
maxShards int,
|
|
r queryrangebase.Request,
|
|
statsHandler, next, retryNext queryrangebase.Handler,
|
|
limits Limits,
|
|
) (logql.ShardResolver, bool) {
|
|
if conf.IndexType == types.TSDBType {
|
|
return &dynamicShardResolver{
|
|
ctx: ctx,
|
|
logger: logger,
|
|
statsHandler: statsHandler,
|
|
retryNextHandler: retryNext,
|
|
next: next,
|
|
limits: limits,
|
|
from: model.Time(r.GetStart().UnixMilli()),
|
|
through: model.Time(r.GetEnd().UnixMilli()),
|
|
maxParallelism: maxParallelism,
|
|
maxShards: maxShards,
|
|
defaultLookback: defaultLookback,
|
|
}, true
|
|
}
|
|
if conf.RowShards < 2 {
|
|
return nil, false
|
|
}
|
|
return logql.ConstantShards(conf.RowShards), true
|
|
}
|
|
|
|
type dynamicShardResolver struct {
|
|
ctx context.Context
|
|
// TODO(owen-d): shouldn't have to fork handlers here -- one should just transparently handle the right logic
|
|
// depending on the underlying type?
|
|
statsHandler queryrangebase.Handler // index stats handler (hooked up to results cache, etc)
|
|
retryNextHandler queryrangebase.Handler // next handler wrapped with retries
|
|
next queryrangebase.Handler // next handler in the chain (used for non-stats reqs)
|
|
logger log.Logger
|
|
limits Limits
|
|
|
|
from, through model.Time
|
|
maxParallelism int
|
|
maxShards int
|
|
defaultLookback time.Duration
|
|
}
|
|
|
|
// getStatsForMatchers returns the index stats for all the groups in matcherGroups.
|
|
func getStatsForMatchers(
|
|
ctx context.Context,
|
|
logger log.Logger,
|
|
statsHandler queryrangebase.Handler,
|
|
start, end model.Time,
|
|
matcherGroups []syntax.MatcherRange,
|
|
parallelism int,
|
|
defaultLookback time.Duration,
|
|
) ([]*stats.Stats, error) {
|
|
startTime := time.Now()
|
|
|
|
results := make([]*stats.Stats, len(matcherGroups))
|
|
if err := concurrency.ForEachJob(ctx, len(matcherGroups), parallelism, func(ctx context.Context, i int) error {
|
|
matchers := syntax.MatchersString(matcherGroups[i].Matchers)
|
|
diff := matcherGroups[i].Interval + matcherGroups[i].Offset
|
|
adjustedFrom := start.Add(-diff)
|
|
if matcherGroups[i].Interval == 0 {
|
|
// For limited instant queries, when start == end, the queries would return
|
|
// zero results. Prometheus has a concept of "look back amount of time for instant queries"
|
|
// since metric data is sampled at some configurable scrape_interval (commonly 15s, 30s, or 1m).
|
|
// We copy that idea and say "find me logs from the past when start=end".
|
|
adjustedFrom = adjustedFrom.Add(-defaultLookback)
|
|
}
|
|
|
|
adjustedThrough := end.Add(-matcherGroups[i].Offset)
|
|
|
|
resp, err := statsHandler.Do(ctx, &logproto.IndexStatsRequest{
|
|
From: adjustedFrom,
|
|
Through: adjustedThrough,
|
|
Matchers: matchers,
|
|
})
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
casted, ok := resp.(*IndexStatsResponse)
|
|
if !ok {
|
|
return fmt.Errorf("expected *IndexStatsResponse while querying index, got %T", resp)
|
|
}
|
|
|
|
results[i] = casted.Response
|
|
|
|
level.Debug(logger).Log(
|
|
append(
|
|
casted.Response.LoggingKeyValues(),
|
|
"msg", "queried index",
|
|
"type", "single",
|
|
"matchers", matchers,
|
|
"duration", time.Since(startTime),
|
|
"from", adjustedFrom.Time(),
|
|
"through", adjustedThrough.Time(),
|
|
"length", adjustedThrough.Sub(adjustedFrom),
|
|
)...,
|
|
)
|
|
|
|
return nil
|
|
}); err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
return results, nil
|
|
}
|
|
|
|
func (r *dynamicShardResolver) GetStats(e syntax.Expr) (stats.Stats, error) {
|
|
sp, ctx := opentracing.StartSpanFromContext(r.ctx, "dynamicShardResolver.GetStats")
|
|
defer sp.Finish()
|
|
|
|
start := time.Now()
|
|
|
|
// We try to shard subtrees in the AST independently if possible, although
|
|
// nested binary expressions can make this difficult. In this case,
|
|
// we query the index stats for all matcher groups then sum the results.
|
|
grps, err := syntax.MatcherGroups(e)
|
|
if err != nil {
|
|
return stats.Stats{}, err
|
|
}
|
|
|
|
// If there are zero matchers groups, we'll inject one to query everything
|
|
if len(grps) == 0 {
|
|
grps = append(grps, syntax.MatcherRange{})
|
|
}
|
|
|
|
log := util_log.WithContext(ctx, util_log.Logger)
|
|
results, err := getStatsForMatchers(ctx, log, r.statsHandler, r.from, r.through, grps, r.maxParallelism, r.defaultLookback)
|
|
if err != nil {
|
|
return stats.Stats{}, err
|
|
}
|
|
|
|
combined := stats.MergeStats(results...)
|
|
|
|
level.Debug(log).Log(
|
|
append(
|
|
combined.LoggingKeyValues(),
|
|
"msg", "queried index",
|
|
"type", "combined",
|
|
"len", len(results),
|
|
"max_parallelism", r.maxParallelism,
|
|
"duration", time.Since(start),
|
|
)...,
|
|
)
|
|
|
|
return combined, nil
|
|
}
|
|
|
|
func (r *dynamicShardResolver) Shards(e syntax.Expr) (int, uint64, error) {
|
|
sp, ctx := opentracing.StartSpanFromContext(r.ctx, "dynamicShardResolver.Shards")
|
|
defer sp.Finish()
|
|
log := spanlogger.FromContext(ctx)
|
|
defer log.Finish()
|
|
|
|
combined, err := r.GetStats(e)
|
|
if err != nil {
|
|
return 0, 0, err
|
|
}
|
|
|
|
tenantIDs, err := tenant.TenantIDs(ctx)
|
|
if err != nil {
|
|
return 0, 0, err
|
|
}
|
|
|
|
maxBytesPerShard := validation.SmallestPositiveIntPerTenant(tenantIDs, r.limits.TSDBMaxBytesPerShard)
|
|
factor := sharding.GuessShardFactor(combined.Bytes, uint64(maxBytesPerShard), r.maxShards)
|
|
|
|
bytesPerShard := combined.Bytes
|
|
if factor > 0 {
|
|
bytesPerShard = combined.Bytes / uint64(factor)
|
|
}
|
|
|
|
level.Debug(log).Log(
|
|
append(
|
|
combined.LoggingKeyValues(),
|
|
"msg", "got shard factor",
|
|
"factor", factor,
|
|
"total_bytes", strings.Replace(humanize.Bytes(combined.Bytes), " ", "", 1),
|
|
"bytes_per_shard", strings.Replace(humanize.Bytes(bytesPerShard), " ", "", 1),
|
|
)...,
|
|
)
|
|
return factor, bytesPerShard, nil
|
|
}
|
|
|
|
func (r *dynamicShardResolver) ShardingRanges(expr syntax.Expr, targetBytesPerShard uint64) (
|
|
[]logproto.Shard,
|
|
[]logproto.ChunkRefGroup,
|
|
error,
|
|
) {
|
|
log := spanlogger.FromContext(r.ctx)
|
|
|
|
var (
|
|
adjustedFrom = r.from
|
|
adjustedThrough model.Time
|
|
)
|
|
|
|
// NB(owen-d): there should only ever be 1 matcher group passed
|
|
// to this call as we call it separately for different legs
|
|
// of binary ops, but I'm putting in the loop for completion
|
|
grps, err := syntax.MatcherGroups(expr)
|
|
if err != nil {
|
|
return nil, nil, err
|
|
}
|
|
|
|
for _, grp := range grps {
|
|
diff := grp.Interval
|
|
|
|
// For instant queries, when start == end,
|
|
// we have a default lookback which we add here
|
|
if diff == 0 {
|
|
diff = r.defaultLookback
|
|
}
|
|
|
|
diff += grp.Offset
|
|
|
|
// use the oldest adjustedFrom
|
|
if r.from.Add(-diff).Before(adjustedFrom) {
|
|
adjustedFrom = r.from.Add(-diff)
|
|
}
|
|
|
|
// use the latest adjustedThrough
|
|
if r.through.Add(-grp.Offset).After(adjustedThrough) {
|
|
adjustedThrough = r.through.Add(-grp.Offset)
|
|
}
|
|
}
|
|
|
|
// handle the case where there are no matchers
|
|
if adjustedThrough == 0 {
|
|
adjustedThrough = r.through
|
|
}
|
|
|
|
exprStr := expr.String()
|
|
// try to get shards for the given expression
|
|
// if it fails, fallback to linearshards based on stats
|
|
// use the retry handler here to retry transient errors
|
|
resp, err := r.retryNextHandler.Do(r.ctx, &logproto.ShardsRequest{
|
|
From: adjustedFrom,
|
|
Through: adjustedThrough,
|
|
Query: expr.String(),
|
|
TargetBytesPerShard: targetBytesPerShard,
|
|
})
|
|
if err != nil {
|
|
return nil, nil, errors.Wrapf(err, "failed to get shards for expression, got %T: %+v", err, err)
|
|
}
|
|
|
|
casted, ok := resp.(*ShardsResponse)
|
|
if !ok {
|
|
return nil, nil, fmt.Errorf("expected *ShardsResponse while querying index, got %T", resp)
|
|
}
|
|
|
|
// accumulate stats
|
|
logqlstats.JoinResults(r.ctx, casted.Response.Statistics)
|
|
|
|
var refs int
|
|
for _, x := range casted.Response.ChunkGroups {
|
|
refs += len(x.Refs)
|
|
}
|
|
|
|
level.Debug(log).Log(
|
|
"msg", "retrieved sharding ranges",
|
|
"target_bytes_per_shard", targetBytesPerShard,
|
|
"shards", len(casted.Response.Shards),
|
|
"query", exprStr,
|
|
"total_chunks", casted.Response.Statistics.Index.TotalChunks,
|
|
"post_filter_chunks", casted.Response.Statistics.Index.PostFilterChunks,
|
|
"precomputed_refs", refs,
|
|
)
|
|
|
|
return casted.Response.Shards, casted.Response.ChunkGroups, err
|
|
}
|
|
|