Like Prometheus, but for logs.
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 
 
 
loki/pkg/querier/queryrange/queryrangebase/results_cache.go

336 lines
11 KiB

package queryrangebase
import (
"context"
"flag"
"fmt"
"strings"
"github.com/go-kit/log"
"github.com/go-kit/log/level"
"github.com/grafana/dskit/user"
"github.com/pkg/errors"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/client_golang/prometheus/promauto"
"github.com/prometheus/prometheus/promql"
"github.com/prometheus/prometheus/promql/parser"
"github.com/grafana/loki/v3/pkg/logproto"
"github.com/grafana/loki/v3/pkg/storage/chunk/cache"
"github.com/grafana/loki/v3/pkg/storage/chunk/cache/resultscache"
"github.com/grafana/loki/v3/pkg/util/constants"
)
var (
// Value that cacheControlHeader has if the response indicates that the results should not be cached.
noStoreValue = "no-store"
)
const (
reasonMissing = "missing"
reasonMismatch = "mismatch"
)
type ResultsCacheMetrics struct {
versionComparisons prometheus.Counter
versionComparisonFailures *prometheus.CounterVec
}
func NewResultsCacheMetrics(registerer prometheus.Registerer) *ResultsCacheMetrics {
return &ResultsCacheMetrics{
versionComparisons: promauto.With(registerer).NewCounter(prometheus.CounterOpts{
Namespace: constants.Loki,
Name: "results_cache_version_comparisons_total",
Help: "Comparisons of cache key versions in the results cache between query-frontends & queriers",
}),
versionComparisonFailures: promauto.With(registerer).NewCounterVec(prometheus.CounterOpts{
Namespace: constants.Loki,
Name: "results_cache_version_comparisons_failed",
Help: "Comparison failures of cache key versions in the results cache between query-frontends & queriers",
}, []string{"reason"}),
}
}
// ResultsCacheConfig is the config for the results cache.
type ResultsCacheConfig struct {
resultscache.Config `yaml:",inline"`
}
// RegisterFlags registers flags.
func (cfg *ResultsCacheConfig) RegisterFlags(f *flag.FlagSet) {
cfg.RegisterFlagsWithPrefix(f, "frontend.")
}
// Extractor is used by the cache to extract a subset of a response from a cache entry.
type Extractor interface {
resultscache.Extractor
ResponseWithoutHeaders(resp Response) Response
}
// PrometheusResponseExtractor helps extracting specific info from Query Response.
type PrometheusResponseExtractor struct{}
// Extract extracts response for specific a range from a response.
func (PrometheusResponseExtractor) Extract(start, end int64, res resultscache.Response, _, _ int64) resultscache.Response {
promRes := res.(*PrometheusResponse)
return &PrometheusResponse{
Status: StatusSuccess,
Warnings: promRes.Warnings,
Data: PrometheusData{
ResultType: promRes.Data.ResultType,
Result: extractMatrix(start, end, promRes.Data.Result),
},
Headers: promRes.Headers,
}
}
// ResponseWithoutHeaders is useful in caching data without headers since
// we anyways do not need headers for sending back the response so this saves some space by reducing size of the objects.
func (PrometheusResponseExtractor) ResponseWithoutHeaders(resp Response) Response {
promRes := resp.(*PrometheusResponse)
return &PrometheusResponse{
Status: StatusSuccess,
Warnings: promRes.Warnings,
Data: PrometheusData{
ResultType: promRes.Data.ResultType,
Result: promRes.Data.Result,
},
}
}
// ShouldCacheFn checks whether the current request should go to cache
// or not. If not, just send the request to next handler.
type ShouldCacheFn func(ctx context.Context, r Request) bool
// ParallelismForReqFn returns the parallelism for a given request.
type ParallelismForReqFn func(ctx context.Context, tenantIDs []string, r Request) int
type resultsCache struct {
cache *resultscache.ResultsCache
logger log.Logger
cacheGenNumberLoader resultscache.CacheGenNumberLoader
metrics *ResultsCacheMetrics
}
// NewResultsCacheMiddleware creates results cache middleware from config.
// The middleware cache result using a unique cache key for a given request (step,query,user) and interval.
// The cache assumes that each request length (end-start) is below or equal the interval.
// Each request starting from within the same interval will hit the same cache entry.
// If the cache doesn't have the entire duration of the request cached, it will query the uncached parts and append them to the cache entries.
// see `generateKey`.
func NewResultsCacheMiddleware(
logger log.Logger,
c cache.Cache,
keygen resultscache.KeyGenerator,
limits Limits,
merger Merger,
extractor Extractor,
cacheGenNumberLoader resultscache.CacheGenNumberLoader,
shouldCache ShouldCacheFn,
parallelismForReq ParallelismForReqFn,
retentionEnabled bool,
onlyUseEntireExtent bool,
metrics *ResultsCacheMetrics,
) (Middleware, error) {
if cacheGenNumberLoader != nil {
c = cache.NewCacheGenNumMiddleware(c)
}
out := &resultsCache{
logger: logger,
cacheGenNumberLoader: cacheGenNumberLoader,
metrics: metrics,
}
return MiddlewareFunc(func(next Handler) Handler {
nextCacheWrapper := resultscache.HandlerFunc(func(ctx context.Context, req resultscache.Request) (resultscache.Response, error) {
return next.Do(ctx, req.(Request))
})
shouldCacheReqWrapper := func(ctx context.Context, req resultscache.Request) bool {
if shouldCache == nil {
return true
}
return shouldCache(ctx, req.(Request))
}
shouldCacheResWrapper := func(ctx context.Context, req resultscache.Request, res resultscache.Response, maxCacheTime int64) bool {
return out.shouldCacheResponse(ctx, req.(Request), res.(Response), maxCacheTime)
}
parallelismForReqWrapper := func(ctx context.Context, tenantIDs []string, req resultscache.Request) int {
return parallelismForReq(ctx, tenantIDs, req.(Request))
}
out.cache = resultscache.NewResultsCache(
logger,
c,
nextCacheWrapper,
keygen,
limits,
FromQueryResponseMergerToCacheResponseMerger(merger),
extractor,
shouldCacheReqWrapper,
shouldCacheResWrapper,
parallelismForReqWrapper,
cacheGenNumberLoader,
retentionEnabled,
onlyUseEntireExtent,
)
return out
}), nil
}
func (s resultsCache) Do(ctx context.Context, r Request) (Response, error) {
res, err := s.cache.Do(ctx, r.(resultscache.Request))
if err != nil {
return nil, err
}
queryRes, ok := res.(Response)
if !ok {
return nil, fmt.Errorf("could not cast cache response to query response")
}
return queryRes, nil
}
// shouldCacheResponse says whether the response should be cached or not.
func (s resultsCache) shouldCacheResponse(ctx context.Context, req Request, r Response, maxCacheTime int64) bool {
headerValues := getHeaderValuesWithName(r, cacheControlHeader)
user, _ := user.ExtractOrgID(ctx)
logger := log.With(s.logger, "org_id", user)
for _, v := range headerValues {
if v == noStoreValue {
level.Debug(logger).Log("msg", fmt.Sprintf("%s header in response is equal to %s, not caching the response", cacheControlHeader, noStoreValue))
return false
}
}
if !s.isAtModifierCachable(req, maxCacheTime) {
return false
}
if s.cacheGenNumberLoader == nil {
return true
}
genNumbersFromResp := getHeaderValuesWithName(r, ResultsCacheGenNumberHeaderName)
genNumberFromCtx := cache.ExtractCacheGenNumber(ctx)
s.metrics.versionComparisons.Inc()
if len(genNumbersFromResp) == 0 && genNumberFromCtx != "" {
level.Debug(logger).Log("msg", fmt.Sprintf("we found results cache gen number %s set in store but none in headers", genNumberFromCtx), "query", req.GetQuery())
// NB(owen-d):
// If the queriers aren't returning cache numbers, something is likely broken
// (i.e. the queriers aren't reporting the cache numbers they see or aren't seeing cache numbers at all).
s.metrics.versionComparisonFailures.WithLabelValues(reasonMissing).Inc()
return false
}
for _, gen := range genNumbersFromResp {
if gen != genNumberFromCtx {
level.Debug(logger).Log("msg", fmt.Sprintf("inconsistency in results cache gen numbers %s (GEN-FROM-RESPONSE) != %s (GEN-FROM-STORE), not caching the response", gen, genNumberFromCtx), "query", req.GetQuery())
s.metrics.versionComparisonFailures.WithLabelValues(reasonMismatch).Inc()
return false
}
}
return true
}
var errAtModifierAfterEnd = errors.New("at modifier after end")
// isAtModifierCachable returns true if the @ modifier result
// is safe to cache.
func (s resultsCache) isAtModifierCachable(r Request, maxCacheTime int64) bool {
// There are 2 cases when @ modifier is not safe to cache:
// 1. When @ modifier points to time beyond the maxCacheTime.
// 2. If the @ modifier time is > the query range end while being
// below maxCacheTime. In such cases if any tenant is intentionally
// playing with old data, we could cache empty result if we look
// beyond query end.
query := r.GetQuery()
if !strings.Contains(query, "@") {
return true
}
expr, err := parser.ParseExpr(query)
if err != nil {
// We are being pessimistic in such cases.
level.Warn(s.logger).Log("msg", "failed to parse query, considering @ modifier as not cachable", "query", query, "err", err)
return false
}
// This resolves the start() and end() used with the @ modifier.
expr = promql.PreprocessExpr(expr, r.GetStart(), r.GetEnd())
end := r.GetEnd().UnixMilli()
atModCachable := true
parser.Inspect(expr, func(n parser.Node, _ []parser.Node) error {
switch e := n.(type) {
case *parser.VectorSelector:
if e.Timestamp != nil && (*e.Timestamp > end || *e.Timestamp > maxCacheTime) {
atModCachable = false
return errAtModifierAfterEnd
}
case *parser.MatrixSelector:
ts := e.VectorSelector.(*parser.VectorSelector).Timestamp
if ts != nil && (*ts > end || *ts > maxCacheTime) {
atModCachable = false
return errAtModifierAfterEnd
}
case *parser.SubqueryExpr:
if e.Timestamp != nil && (*e.Timestamp > end || *e.Timestamp > maxCacheTime) {
atModCachable = false
return errAtModifierAfterEnd
}
}
return nil
})
return atModCachable
}
func getHeaderValuesWithName(r Response, headerName string) (headerValues []string) {
for _, hv := range r.GetHeaders() {
if hv.GetName() != headerName {
continue
}
headerValues = append(headerValues, hv.GetValues()...)
}
return
}
func extractMatrix(start, end int64, matrix []SampleStream) []SampleStream {
result := make([]SampleStream, 0, len(matrix))
for _, stream := range matrix {
extracted, ok := extractSampleStream(start, end, stream)
if ok {
result = append(result, extracted)
}
}
return result
}
func extractSampleStream(start, end int64, stream SampleStream) (SampleStream, bool) {
result := SampleStream{
Labels: stream.Labels,
Samples: make([]logproto.LegacySample, 0, len(stream.Samples)),
}
for _, sample := range stream.Samples {
if start <= sample.TimestampMs && sample.TimestampMs <= end {
result.Samples = append(result.Samples, sample)
}
}
if len(result.Samples) == 0 {
return SampleStream{}, false
}
return result, true
}