mirror of https://github.com/grafana/loki
You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
336 lines
10 KiB
336 lines
10 KiB
package ruler
|
|
|
|
import (
|
|
"context"
|
|
"fmt"
|
|
"strings"
|
|
"time"
|
|
|
|
"github.com/go-kit/log"
|
|
"github.com/pkg/errors"
|
|
"github.com/prometheus/client_golang/prometheus"
|
|
"github.com/prometheus/common/model"
|
|
"github.com/prometheus/common/sigv4"
|
|
"github.com/prometheus/prometheus/config"
|
|
"github.com/prometheus/prometheus/model/labels"
|
|
"github.com/prometheus/prometheus/model/rulefmt"
|
|
"github.com/prometheus/prometheus/model/timestamp"
|
|
"github.com/prometheus/prometheus/notifier"
|
|
"github.com/prometheus/prometheus/promql"
|
|
"github.com/prometheus/prometheus/promql/parser"
|
|
"github.com/prometheus/prometheus/rules"
|
|
"github.com/prometheus/prometheus/template"
|
|
"github.com/weaveworks/common/user"
|
|
|
|
"github.com/grafana/loki/pkg/logql/syntax"
|
|
ruler "github.com/grafana/loki/pkg/ruler/base"
|
|
"github.com/grafana/loki/pkg/ruler/rulespb"
|
|
"github.com/grafana/loki/pkg/ruler/util"
|
|
)
|
|
|
|
// RulesLimits is the one function we need from limits.Overrides, and
|
|
// is here to limit coupling.
|
|
type RulesLimits interface {
|
|
ruler.RulesLimits
|
|
|
|
RulerRemoteWriteDisabled(userID string) bool
|
|
RulerRemoteWriteURL(userID string) string
|
|
RulerRemoteWriteTimeout(userID string) time.Duration
|
|
RulerRemoteWriteHeaders(userID string) map[string]string
|
|
RulerRemoteWriteRelabelConfigs(userID string) []*util.RelabelConfig
|
|
RulerRemoteWriteConfig(userID string, id string) *config.RemoteWriteConfig
|
|
RulerRemoteWriteQueueCapacity(userID string) int
|
|
RulerRemoteWriteQueueMinShards(userID string) int
|
|
RulerRemoteWriteQueueMaxShards(userID string) int
|
|
RulerRemoteWriteQueueMaxSamplesPerSend(userID string) int
|
|
RulerRemoteWriteQueueBatchSendDeadline(userID string) time.Duration
|
|
RulerRemoteWriteQueueMinBackoff(userID string) time.Duration
|
|
RulerRemoteWriteQueueMaxBackoff(userID string) time.Duration
|
|
RulerRemoteWriteQueueRetryOnRateLimit(userID string) bool
|
|
RulerRemoteWriteSigV4Config(userID string) *sigv4.SigV4Config
|
|
|
|
RulerRemoteEvaluationTimeout(userID string) time.Duration
|
|
RulerRemoteEvaluationMaxResponseSize(userID string) int64
|
|
}
|
|
|
|
// queryFunc returns a new query function using the rules.EngineQueryFunc function
|
|
// and passing an altered timestamp.
|
|
func queryFunc(evaluator Evaluator, overrides RulesLimits, checker readyChecker, userID string) rules.QueryFunc {
|
|
return func(ctx context.Context, qs string, t time.Time) (promql.Vector, error) {
|
|
// check if storage instance is ready; if not, fail the rule evaluation;
|
|
// we do this to prevent an attempt to append new samples before the WAL appender is ready
|
|
if !checker.isReady(userID) {
|
|
return nil, errNotReady
|
|
}
|
|
|
|
adjusted := t.Add(-overrides.EvaluationDelay(userID))
|
|
res, err := evaluator.Eval(ctx, qs, adjusted)
|
|
|
|
if err != nil {
|
|
return nil, fmt.Errorf("rule evaluation failed: %w", err)
|
|
}
|
|
switch v := res.Data.(type) {
|
|
case promql.Vector:
|
|
return v, nil
|
|
case promql.Scalar:
|
|
return promql.Vector{promql.Sample{
|
|
Point: promql.Point{T: v.T, V: v.V},
|
|
Metric: labels.Labels{},
|
|
}}, nil
|
|
default:
|
|
return nil, errors.New("rule result is not a vector or scalar")
|
|
}
|
|
}
|
|
}
|
|
|
|
// MultiTenantManagerAdapter will wrap a MultiTenantManager which validates loki rules
|
|
func MultiTenantManagerAdapter(mgr ruler.MultiTenantManager) ruler.MultiTenantManager {
|
|
return &MultiTenantManager{inner: mgr}
|
|
}
|
|
|
|
// MultiTenantManager wraps a cortex MultiTenantManager but validates loki rules
|
|
type MultiTenantManager struct {
|
|
inner ruler.MultiTenantManager
|
|
}
|
|
|
|
func (m *MultiTenantManager) SyncRuleGroups(ctx context.Context, ruleGroups map[string]rulespb.RuleGroupList) {
|
|
m.inner.SyncRuleGroups(ctx, ruleGroups)
|
|
}
|
|
|
|
func (m *MultiTenantManager) GetRules(userID string) []*rules.Group {
|
|
return m.inner.GetRules(userID)
|
|
}
|
|
|
|
func (m *MultiTenantManager) Stop() {
|
|
if registry != nil {
|
|
registry.stop()
|
|
}
|
|
|
|
m.inner.Stop()
|
|
}
|
|
|
|
// ValidateRuleGroup validates a rulegroup
|
|
func (m *MultiTenantManager) ValidateRuleGroup(grp rulefmt.RuleGroup) []error {
|
|
return ValidateGroups(grp)
|
|
}
|
|
|
|
// MetricsPrefix defines the prefix to use for all metrics in this package
|
|
const MetricsPrefix = "loki_ruler_wal_"
|
|
|
|
var registry storageRegistry
|
|
|
|
func MultiTenantRuleManager(cfg Config, evaluator Evaluator, overrides RulesLimits, logger log.Logger, reg prometheus.Registerer) ruler.ManagerFactory {
|
|
reg = prometheus.WrapRegistererWithPrefix(MetricsPrefix, reg)
|
|
|
|
registry = newWALRegistry(log.With(logger, "storage", "registry"), reg, cfg, overrides)
|
|
|
|
return func(
|
|
ctx context.Context,
|
|
userID string,
|
|
notifier *notifier.Manager,
|
|
logger log.Logger,
|
|
reg prometheus.Registerer,
|
|
) ruler.RulesManager {
|
|
registry.configureTenantStorage(userID)
|
|
|
|
logger = log.With(logger, "user", userID)
|
|
queryFn := queryFunc(evaluator, overrides, registry, userID)
|
|
memStore := NewMemStore(userID, queryFn, newMemstoreMetrics(reg), 5*time.Minute, log.With(logger, "subcomponent", "MemStore"))
|
|
|
|
// GroupLoader builds a cache of the rules as they're loaded by the
|
|
// manager.This is used to back the memstore
|
|
groupLoader := NewCachingGroupLoader(GroupLoader{})
|
|
|
|
mgr := rules.NewManager(&rules.ManagerOptions{
|
|
Appendable: registry,
|
|
Queryable: memStore,
|
|
QueryFunc: queryFn,
|
|
Context: user.InjectOrgID(ctx, userID),
|
|
ExternalURL: cfg.ExternalURL.URL,
|
|
NotifyFunc: ruler.SendAlerts(notifier, cfg.ExternalURL.URL.String()),
|
|
Logger: logger,
|
|
Registerer: reg,
|
|
OutageTolerance: cfg.OutageTolerance,
|
|
ForGracePeriod: cfg.ForGracePeriod,
|
|
ResendDelay: cfg.ResendDelay,
|
|
GroupLoader: groupLoader,
|
|
})
|
|
|
|
cachingManager := &CachingRulesManager{
|
|
manager: mgr,
|
|
groupLoader: groupLoader,
|
|
}
|
|
|
|
memStore.Start(groupLoader)
|
|
|
|
return cachingManager
|
|
}
|
|
}
|
|
|
|
// CachingRulesManager holds a CachingGroupLoader to make sure the GroupLoader
|
|
// has consistent state after update operations. Manager needs to hold the same
|
|
// caching grouploader
|
|
type CachingRulesManager struct {
|
|
manager ruler.RulesManager
|
|
groupLoader *CachingGroupLoader
|
|
}
|
|
|
|
// Update reconciles the state of the CachingGroupLoader after a manager.Update.
|
|
// The GroupLoader is mutated as part of a call to Update but it might still
|
|
// contain removed files. Update tells the loader which files to keep
|
|
func (m *CachingRulesManager) Update(interval time.Duration, files []string, externalLabels labels.Labels, externalURL string, ruleGroupPostProcessFunc rules.RuleGroupPostProcessFunc) error {
|
|
err := m.manager.Update(interval, files, externalLabels, externalURL, ruleGroupPostProcessFunc)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
m.groupLoader.Prune(files)
|
|
return nil
|
|
}
|
|
|
|
func (m *CachingRulesManager) Run() {
|
|
m.manager.Run()
|
|
}
|
|
|
|
func (m *CachingRulesManager) Stop() {
|
|
m.manager.Stop()
|
|
}
|
|
|
|
func (m *CachingRulesManager) RuleGroups() []*rules.Group {
|
|
return m.manager.RuleGroups()
|
|
}
|
|
|
|
func ValidateGroups(grps ...rulefmt.RuleGroup) (errs []error) {
|
|
set := map[string]struct{}{}
|
|
|
|
for i, g := range grps {
|
|
if g.Name == "" {
|
|
errs = append(errs, errors.Errorf("group %d: Groupname must not be empty", i))
|
|
}
|
|
|
|
if _, ok := set[g.Name]; ok {
|
|
errs = append(
|
|
errs,
|
|
errors.Errorf("groupname: \"%s\" is repeated in the same file", g.Name),
|
|
)
|
|
}
|
|
|
|
set[g.Name] = struct{}{}
|
|
|
|
for _, r := range g.Rules {
|
|
if err := validateRuleNode(&r, g.Name); err != nil {
|
|
errs = append(errs, err)
|
|
}
|
|
}
|
|
}
|
|
|
|
return errs
|
|
}
|
|
|
|
func validateRuleNode(r *rulefmt.RuleNode, groupName string) error {
|
|
if r.Record.Value != "" && r.Alert.Value != "" {
|
|
return errors.Errorf("only one of 'record' and 'alert' must be set")
|
|
}
|
|
|
|
if r.Record.Value == "" && r.Alert.Value == "" {
|
|
return errors.Errorf("one of 'record' or 'alert' must be set")
|
|
}
|
|
|
|
if r.Expr.Value == "" {
|
|
return errors.Errorf("field 'expr' must be set in rule")
|
|
} else if _, err := syntax.ParseExpr(r.Expr.Value); err != nil {
|
|
return errors.Wrapf(err, fmt.Sprintf("could not parse expression for record '%s' in group '%s'", r.Record.Value, groupName))
|
|
}
|
|
|
|
if r.Record.Value != "" {
|
|
if len(r.Annotations) > 0 {
|
|
return errors.Errorf("invalid field 'annotations' in recording rule")
|
|
}
|
|
if r.For != 0 {
|
|
return errors.Errorf("invalid field 'for' in recording rule")
|
|
}
|
|
if !model.IsValidMetricName(model.LabelValue(r.Record.Value)) {
|
|
return errors.Errorf("invalid recording rule name: %s", r.Record.Value)
|
|
}
|
|
}
|
|
|
|
for k, v := range r.Labels {
|
|
if !model.LabelName(k).IsValid() || k == model.MetricNameLabel {
|
|
return errors.Errorf("invalid label name: %s", k)
|
|
}
|
|
|
|
if !model.LabelValue(v).IsValid() {
|
|
return errors.Errorf("invalid label value: %s", v)
|
|
}
|
|
}
|
|
|
|
for k := range r.Annotations {
|
|
if !model.LabelName(k).IsValid() {
|
|
return errors.Errorf("invalid annotation name: %s", k)
|
|
}
|
|
}
|
|
|
|
for _, err := range testTemplateParsing(r) {
|
|
return err
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
// testTemplateParsing checks if the templates used in labels and annotations
|
|
// of the alerting rules are parsed correctly.
|
|
func testTemplateParsing(rl *rulefmt.RuleNode) (errs []error) {
|
|
if rl.Alert.Value == "" {
|
|
// Not an alerting rule.
|
|
return errs
|
|
}
|
|
|
|
// Trying to parse templates.
|
|
tmplData := template.AlertTemplateData(map[string]string{}, map[string]string{}, "", 0)
|
|
defs := []string{
|
|
"{{$labels := .Labels}}",
|
|
"{{$externalLabels := .ExternalLabels}}",
|
|
"{{$value := .Value}}",
|
|
}
|
|
parseTest := func(text string) error {
|
|
tmpl := template.NewTemplateExpander(
|
|
context.TODO(),
|
|
strings.Join(append(defs, text), ""),
|
|
"__alert_"+rl.Alert.Value,
|
|
tmplData,
|
|
model.Time(timestamp.FromTime(time.Now())),
|
|
nil,
|
|
nil,
|
|
nil,
|
|
)
|
|
return tmpl.ParseTest()
|
|
}
|
|
|
|
// Parsing Labels.
|
|
for k, val := range rl.Labels {
|
|
err := parseTest(val)
|
|
if err != nil {
|
|
errs = append(errs, errors.Wrapf(err, "label %q", k))
|
|
}
|
|
}
|
|
|
|
// Parsing Annotations.
|
|
for k, val := range rl.Annotations {
|
|
err := parseTest(val)
|
|
if err != nil {
|
|
errs = append(errs, errors.Wrapf(err, "annotation %q", k))
|
|
}
|
|
}
|
|
|
|
return errs
|
|
}
|
|
|
|
// Allows logql expressions to be treated as promql expressions by the prometheus rules pkg.
|
|
type exprAdapter struct {
|
|
syntax.Expr
|
|
}
|
|
|
|
func (exprAdapter) PositionRange() parser.PositionRange { return parser.PositionRange{} }
|
|
func (exprAdapter) PromQLExpr() {}
|
|
func (exprAdapter) Type() parser.ValueType { return parser.ValueType("unimplemented") }
|
|
func (exprAdapter) Pretty(level int) string { return "" }
|
|
|