Like Prometheus, but for logs.
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
loki/pkg/ruler/compat.go

337 lines
10 KiB

Ruler: Recording Rules (#3766) * WIP: hack to get recording rules working and pushing to Cortex/Prometheus Signed-off-by: Danny Kopping <danny.kopping@grafana.com> * Refactoring Adding remote_write config for ruler Signed-off-by: Danny Kopping <danny.kopping@grafana.com> * Minor refactorings Signed-off-by: Danny Kopping <danny.kopping@grafana.com> * Moving manager subpackage into ruler package to avoid dependency cycles This also mirrors Cortex's package structure Signed-off-by: Danny Kopping <danny.kopping@grafana.com> * Minor refactorings Signed-off-by: Danny Kopping <danny.kopping@grafana.com> * Skipping commit if remote-write client is not defined Signed-off-by: Danny Kopping <danny.kopping@grafana.com> * Updating use of cortex client Signed-off-by: Danny Kopping <danny.kopping@grafana.com> * Memoizing appenders, using queue for samples & labels Signed-off-by: Danny Kopping <danny.kopping@grafana.com> * Adding buffer size configurability Refactoring Signed-off-by: Danny Kopping <danny.kopping@grafana.com> * Adding metric to show current buffer size Signed-off-by: Danny Kopping <danny.kopping@grafana.com> * Refactoring for better responsibility separation & testability Signed-off-by: Danny Kopping <danny.kopping@grafana.com> * Adding per-tenant overrides of remote-write queue capacity Renaming "buffer size" to "queue capacity" to be more accurate Signed-off-by: Danny Kopping <danny.kopping@grafana.com> * Adding tests for evicting queue Minor refactoring Signed-off-by: Danny Kopping <danny.kopping@grafana.com> * Adding more tests and refactoring Signed-off-by: Danny Kopping <danny.kopping@grafana.com> * Adding queue benchmark Signed-off-by: Danny Kopping <danny.kopping@grafana.com> * Reducing redundancy in metric names Signed-off-by: Danny Kopping <danny.kopping@grafana.com> * Testing that only metric queries can be run Signed-off-by: Danny Kopping <danny.kopping@grafana.com> * Minor fixes pre-review Signed-off-by: Danny Kopping <danny.kopping@grafana.com> * Appeasing the linter Signed-off-by: Danny Kopping <danny.kopping@grafana.com> * Guarding against unprotected nil pointer dereference in Prometheus remote.Client Adding remote-write client validation Signed-off-by: Danny Kopping <danny.kopping@grafana.com> * Appeasing the linter Signed-off-by: Danny Kopping <danny.kopping@grafana.com> * Setting tenant ID header on remote-write client Adding User-Agent string Signed-off-by: Danny Kopping <danny.kopping@grafana.com> * Updating benchmark to use complex struct rather than int to be more reflective of usage Signed-off-by: Danny Kopping <danny.kopping@grafana.com> * Registering flags Removing extraneous checks Signed-off-by: Danny Kopping <danny.kopping@grafana.com> * Adding metric to track remote-write commit errors Signed-off-by: Danny Kopping <danny.kopping@grafana.com> * Refactoring based on review Signed-off-by: Danny Kopping <danny.kopping@grafana.com> * Performance improvements based on review Signed-off-by: Danny Kopping <danny.kopping@grafana.com> * Return error on invalid queue capacity Signed-off-by: Danny Kopping <danny.kopping@grafana.com> * Removing global queue capacity config - using limits Minor refactoring Signed-off-by: Danny Kopping <danny.kopping@grafana.com> * Reusing memory in request preparation Refactoring for testability Signed-off-by: Danny Kopping <danny.kopping@grafana.com> * Moving remote-write metrics into struct Refactoring Unexporting and refactoring memstore metrics to match Signed-off-by: Danny Kopping <danny.kopping@grafana.com> * Applying review suggestions Signed-off-by: Danny Kopping <danny.kopping@grafana.com> * Allowing for runtime changing of per-tenant remote-write queue capacity Signed-off-by: Danny Kopping <danny.kopping@grafana.com> * Appeasing the linter Signed-off-by: Danny Kopping <danny.kopping@grafana.com>
4 years ago
package ruler
import (
"context"
"fmt"
"strings"
"time"
"github.com/go-kit/log"
"github.com/pkg/errors"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/common/model"
"github.com/prometheus/common/sigv4"
"github.com/prometheus/prometheus/config"
"github.com/prometheus/prometheus/model/labels"
"github.com/prometheus/prometheus/model/rulefmt"
"github.com/prometheus/prometheus/model/timestamp"
"github.com/prometheus/prometheus/notifier"
"github.com/prometheus/prometheus/promql"
"github.com/prometheus/prometheus/promql/parser"
"github.com/prometheus/prometheus/rules"
"github.com/prometheus/prometheus/template"
"github.com/weaveworks/common/user"
"github.com/grafana/loki/pkg/logql/syntax"
ruler "github.com/grafana/loki/pkg/ruler/base"
"github.com/grafana/loki/pkg/ruler/rulespb"
"github.com/grafana/loki/pkg/ruler/util"
)
// RulesLimits is the one function we need from limits.Overrides, and
// is here to limit coupling.
type RulesLimits interface {
Ruler: Recording Rules (#3766) * WIP: hack to get recording rules working and pushing to Cortex/Prometheus Signed-off-by: Danny Kopping <danny.kopping@grafana.com> * Refactoring Adding remote_write config for ruler Signed-off-by: Danny Kopping <danny.kopping@grafana.com> * Minor refactorings Signed-off-by: Danny Kopping <danny.kopping@grafana.com> * Moving manager subpackage into ruler package to avoid dependency cycles This also mirrors Cortex's package structure Signed-off-by: Danny Kopping <danny.kopping@grafana.com> * Minor refactorings Signed-off-by: Danny Kopping <danny.kopping@grafana.com> * Skipping commit if remote-write client is not defined Signed-off-by: Danny Kopping <danny.kopping@grafana.com> * Updating use of cortex client Signed-off-by: Danny Kopping <danny.kopping@grafana.com> * Memoizing appenders, using queue for samples & labels Signed-off-by: Danny Kopping <danny.kopping@grafana.com> * Adding buffer size configurability Refactoring Signed-off-by: Danny Kopping <danny.kopping@grafana.com> * Adding metric to show current buffer size Signed-off-by: Danny Kopping <danny.kopping@grafana.com> * Refactoring for better responsibility separation & testability Signed-off-by: Danny Kopping <danny.kopping@grafana.com> * Adding per-tenant overrides of remote-write queue capacity Renaming "buffer size" to "queue capacity" to be more accurate Signed-off-by: Danny Kopping <danny.kopping@grafana.com> * Adding tests for evicting queue Minor refactoring Signed-off-by: Danny Kopping <danny.kopping@grafana.com> * Adding more tests and refactoring Signed-off-by: Danny Kopping <danny.kopping@grafana.com> * Adding queue benchmark Signed-off-by: Danny Kopping <danny.kopping@grafana.com> * Reducing redundancy in metric names Signed-off-by: Danny Kopping <danny.kopping@grafana.com> * Testing that only metric queries can be run Signed-off-by: Danny Kopping <danny.kopping@grafana.com> * Minor fixes pre-review Signed-off-by: Danny Kopping <danny.kopping@grafana.com> * Appeasing the linter Signed-off-by: Danny Kopping <danny.kopping@grafana.com> * Guarding against unprotected nil pointer dereference in Prometheus remote.Client Adding remote-write client validation Signed-off-by: Danny Kopping <danny.kopping@grafana.com> * Appeasing the linter Signed-off-by: Danny Kopping <danny.kopping@grafana.com> * Setting tenant ID header on remote-write client Adding User-Agent string Signed-off-by: Danny Kopping <danny.kopping@grafana.com> * Updating benchmark to use complex struct rather than int to be more reflective of usage Signed-off-by: Danny Kopping <danny.kopping@grafana.com> * Registering flags Removing extraneous checks Signed-off-by: Danny Kopping <danny.kopping@grafana.com> * Adding metric to track remote-write commit errors Signed-off-by: Danny Kopping <danny.kopping@grafana.com> * Refactoring based on review Signed-off-by: Danny Kopping <danny.kopping@grafana.com> * Performance improvements based on review Signed-off-by: Danny Kopping <danny.kopping@grafana.com> * Return error on invalid queue capacity Signed-off-by: Danny Kopping <danny.kopping@grafana.com> * Removing global queue capacity config - using limits Minor refactoring Signed-off-by: Danny Kopping <danny.kopping@grafana.com> * Reusing memory in request preparation Refactoring for testability Signed-off-by: Danny Kopping <danny.kopping@grafana.com> * Moving remote-write metrics into struct Refactoring Unexporting and refactoring memstore metrics to match Signed-off-by: Danny Kopping <danny.kopping@grafana.com> * Applying review suggestions Signed-off-by: Danny Kopping <danny.kopping@grafana.com> * Allowing for runtime changing of per-tenant remote-write queue capacity Signed-off-by: Danny Kopping <danny.kopping@grafana.com> * Appeasing the linter Signed-off-by: Danny Kopping <danny.kopping@grafana.com>
4 years ago
ruler.RulesLimits
RulerRemoteWriteDisabled(userID string) bool
RulerRemoteWriteURL(userID string) string
RulerRemoteWriteTimeout(userID string) time.Duration
RulerRemoteWriteHeaders(userID string) map[string]string
RulerRemoteWriteRelabelConfigs(userID string) []*util.RelabelConfig
RulerRemoteWriteConfig(userID string, id string) *config.RemoteWriteConfig
Ruler: Recording Rules (#3766) * WIP: hack to get recording rules working and pushing to Cortex/Prometheus Signed-off-by: Danny Kopping <danny.kopping@grafana.com> * Refactoring Adding remote_write config for ruler Signed-off-by: Danny Kopping <danny.kopping@grafana.com> * Minor refactorings Signed-off-by: Danny Kopping <danny.kopping@grafana.com> * Moving manager subpackage into ruler package to avoid dependency cycles This also mirrors Cortex's package structure Signed-off-by: Danny Kopping <danny.kopping@grafana.com> * Minor refactorings Signed-off-by: Danny Kopping <danny.kopping@grafana.com> * Skipping commit if remote-write client is not defined Signed-off-by: Danny Kopping <danny.kopping@grafana.com> * Updating use of cortex client Signed-off-by: Danny Kopping <danny.kopping@grafana.com> * Memoizing appenders, using queue for samples & labels Signed-off-by: Danny Kopping <danny.kopping@grafana.com> * Adding buffer size configurability Refactoring Signed-off-by: Danny Kopping <danny.kopping@grafana.com> * Adding metric to show current buffer size Signed-off-by: Danny Kopping <danny.kopping@grafana.com> * Refactoring for better responsibility separation & testability Signed-off-by: Danny Kopping <danny.kopping@grafana.com> * Adding per-tenant overrides of remote-write queue capacity Renaming "buffer size" to "queue capacity" to be more accurate Signed-off-by: Danny Kopping <danny.kopping@grafana.com> * Adding tests for evicting queue Minor refactoring Signed-off-by: Danny Kopping <danny.kopping@grafana.com> * Adding more tests and refactoring Signed-off-by: Danny Kopping <danny.kopping@grafana.com> * Adding queue benchmark Signed-off-by: Danny Kopping <danny.kopping@grafana.com> * Reducing redundancy in metric names Signed-off-by: Danny Kopping <danny.kopping@grafana.com> * Testing that only metric queries can be run Signed-off-by: Danny Kopping <danny.kopping@grafana.com> * Minor fixes pre-review Signed-off-by: Danny Kopping <danny.kopping@grafana.com> * Appeasing the linter Signed-off-by: Danny Kopping <danny.kopping@grafana.com> * Guarding against unprotected nil pointer dereference in Prometheus remote.Client Adding remote-write client validation Signed-off-by: Danny Kopping <danny.kopping@grafana.com> * Appeasing the linter Signed-off-by: Danny Kopping <danny.kopping@grafana.com> * Setting tenant ID header on remote-write client Adding User-Agent string Signed-off-by: Danny Kopping <danny.kopping@grafana.com> * Updating benchmark to use complex struct rather than int to be more reflective of usage Signed-off-by: Danny Kopping <danny.kopping@grafana.com> * Registering flags Removing extraneous checks Signed-off-by: Danny Kopping <danny.kopping@grafana.com> * Adding metric to track remote-write commit errors Signed-off-by: Danny Kopping <danny.kopping@grafana.com> * Refactoring based on review Signed-off-by: Danny Kopping <danny.kopping@grafana.com> * Performance improvements based on review Signed-off-by: Danny Kopping <danny.kopping@grafana.com> * Return error on invalid queue capacity Signed-off-by: Danny Kopping <danny.kopping@grafana.com> * Removing global queue capacity config - using limits Minor refactoring Signed-off-by: Danny Kopping <danny.kopping@grafana.com> * Reusing memory in request preparation Refactoring for testability Signed-off-by: Danny Kopping <danny.kopping@grafana.com> * Moving remote-write metrics into struct Refactoring Unexporting and refactoring memstore metrics to match Signed-off-by: Danny Kopping <danny.kopping@grafana.com> * Applying review suggestions Signed-off-by: Danny Kopping <danny.kopping@grafana.com> * Allowing for runtime changing of per-tenant remote-write queue capacity Signed-off-by: Danny Kopping <danny.kopping@grafana.com> * Appeasing the linter Signed-off-by: Danny Kopping <danny.kopping@grafana.com>
4 years ago
RulerRemoteWriteQueueCapacity(userID string) int
RulerRemoteWriteQueueMinShards(userID string) int
RulerRemoteWriteQueueMaxShards(userID string) int
RulerRemoteWriteQueueMaxSamplesPerSend(userID string) int
RulerRemoteWriteQueueBatchSendDeadline(userID string) time.Duration
RulerRemoteWriteQueueMinBackoff(userID string) time.Duration
RulerRemoteWriteQueueMaxBackoff(userID string) time.Duration
RulerRemoteWriteQueueRetryOnRateLimit(userID string) bool
RulerRemoteWriteSigV4Config(userID string) *sigv4.SigV4Config
RulerRemoteEvaluationTimeout(userID string) time.Duration
RulerRemoteEvaluationMaxResponseSize(userID string) int64
}
// queryFunc returns a new query function using the rules.EngineQueryFunc function
// and passing an altered timestamp.
func queryFunc(evaluator Evaluator, overrides RulesLimits, checker readyChecker, userID string) rules.QueryFunc {
return func(ctx context.Context, qs string, t time.Time) (promql.Vector, error) {
// check if storage instance is ready; if not, fail the rule evaluation;
// we do this to prevent an attempt to append new samples before the WAL appender is ready
if !checker.isReady(userID) {
return nil, errNotReady
}
adjusted := t.Add(-overrides.EvaluationDelay(userID))
res, err := evaluator.Eval(ctx, qs, adjusted)
if err != nil {
return nil, fmt.Errorf("rule evaluation failed: %w", err)
}
switch v := res.Data.(type) {
case promql.Vector:
return v, nil
case promql.Scalar:
return promql.Vector{promql.Sample{
Point: promql.Point{T: v.T, V: v.V},
Metric: labels.Labels{},
}}, nil
default:
return nil, errors.New("rule result is not a vector or scalar")
}
}
}
// MultiTenantManagerAdapter will wrap a MultiTenantManager which validates loki rules
func MultiTenantManagerAdapter(mgr ruler.MultiTenantManager) ruler.MultiTenantManager {
return &MultiTenantManager{inner: mgr}
}
// MultiTenantManager wraps a cortex MultiTenantManager but validates loki rules
type MultiTenantManager struct {
inner ruler.MultiTenantManager
}
func (m *MultiTenantManager) SyncRuleGroups(ctx context.Context, ruleGroups map[string]rulespb.RuleGroupList) {
m.inner.SyncRuleGroups(ctx, ruleGroups)
}
func (m *MultiTenantManager) GetRules(userID string) []*rules.Group {
return m.inner.GetRules(userID)
}
func (m *MultiTenantManager) Stop() {
if registry != nil {
registry.stop()
}
m.inner.Stop()
}
// ValidateRuleGroup validates a rulegroup
func (m *MultiTenantManager) ValidateRuleGroup(grp rulefmt.RuleGroup) []error {
return ValidateGroups(grp)
}
// MetricsPrefix defines the prefix to use for all metrics in this package
const MetricsPrefix = "loki_ruler_wal_"
var registry storageRegistry
func MultiTenantRuleManager(cfg Config, evaluator Evaluator, overrides RulesLimits, logger log.Logger, reg prometheus.Registerer) ruler.ManagerFactory {
reg = prometheus.WrapRegistererWithPrefix(MetricsPrefix, reg)
registry = newWALRegistry(log.With(logger, "storage", "registry"), reg, cfg, overrides)
return func(
ctx context.Context,
userID string,
notifier *notifier.Manager,
logger log.Logger,
reg prometheus.Registerer,
) ruler.RulesManager {
registry.configureTenantStorage(userID)
Ruler: Recording Rules (#3766) * WIP: hack to get recording rules working and pushing to Cortex/Prometheus Signed-off-by: Danny Kopping <danny.kopping@grafana.com> * Refactoring Adding remote_write config for ruler Signed-off-by: Danny Kopping <danny.kopping@grafana.com> * Minor refactorings Signed-off-by: Danny Kopping <danny.kopping@grafana.com> * Moving manager subpackage into ruler package to avoid dependency cycles This also mirrors Cortex's package structure Signed-off-by: Danny Kopping <danny.kopping@grafana.com> * Minor refactorings Signed-off-by: Danny Kopping <danny.kopping@grafana.com> * Skipping commit if remote-write client is not defined Signed-off-by: Danny Kopping <danny.kopping@grafana.com> * Updating use of cortex client Signed-off-by: Danny Kopping <danny.kopping@grafana.com> * Memoizing appenders, using queue for samples & labels Signed-off-by: Danny Kopping <danny.kopping@grafana.com> * Adding buffer size configurability Refactoring Signed-off-by: Danny Kopping <danny.kopping@grafana.com> * Adding metric to show current buffer size Signed-off-by: Danny Kopping <danny.kopping@grafana.com> * Refactoring for better responsibility separation & testability Signed-off-by: Danny Kopping <danny.kopping@grafana.com> * Adding per-tenant overrides of remote-write queue capacity Renaming "buffer size" to "queue capacity" to be more accurate Signed-off-by: Danny Kopping <danny.kopping@grafana.com> * Adding tests for evicting queue Minor refactoring Signed-off-by: Danny Kopping <danny.kopping@grafana.com> * Adding more tests and refactoring Signed-off-by: Danny Kopping <danny.kopping@grafana.com> * Adding queue benchmark Signed-off-by: Danny Kopping <danny.kopping@grafana.com> * Reducing redundancy in metric names Signed-off-by: Danny Kopping <danny.kopping@grafana.com> * Testing that only metric queries can be run Signed-off-by: Danny Kopping <danny.kopping@grafana.com> * Minor fixes pre-review Signed-off-by: Danny Kopping <danny.kopping@grafana.com> * Appeasing the linter Signed-off-by: Danny Kopping <danny.kopping@grafana.com> * Guarding against unprotected nil pointer dereference in Prometheus remote.Client Adding remote-write client validation Signed-off-by: Danny Kopping <danny.kopping@grafana.com> * Appeasing the linter Signed-off-by: Danny Kopping <danny.kopping@grafana.com> * Setting tenant ID header on remote-write client Adding User-Agent string Signed-off-by: Danny Kopping <danny.kopping@grafana.com> * Updating benchmark to use complex struct rather than int to be more reflective of usage Signed-off-by: Danny Kopping <danny.kopping@grafana.com> * Registering flags Removing extraneous checks Signed-off-by: Danny Kopping <danny.kopping@grafana.com> * Adding metric to track remote-write commit errors Signed-off-by: Danny Kopping <danny.kopping@grafana.com> * Refactoring based on review Signed-off-by: Danny Kopping <danny.kopping@grafana.com> * Performance improvements based on review Signed-off-by: Danny Kopping <danny.kopping@grafana.com> * Return error on invalid queue capacity Signed-off-by: Danny Kopping <danny.kopping@grafana.com> * Removing global queue capacity config - using limits Minor refactoring Signed-off-by: Danny Kopping <danny.kopping@grafana.com> * Reusing memory in request preparation Refactoring for testability Signed-off-by: Danny Kopping <danny.kopping@grafana.com> * Moving remote-write metrics into struct Refactoring Unexporting and refactoring memstore metrics to match Signed-off-by: Danny Kopping <danny.kopping@grafana.com> * Applying review suggestions Signed-off-by: Danny Kopping <danny.kopping@grafana.com> * Allowing for runtime changing of per-tenant remote-write queue capacity Signed-off-by: Danny Kopping <danny.kopping@grafana.com> * Appeasing the linter Signed-off-by: Danny Kopping <danny.kopping@grafana.com>
4 years ago
logger = log.With(logger, "user", userID)
queryFn := queryFunc(evaluator, overrides, registry, userID)
memStore := NewMemStore(userID, queryFn, newMemstoreMetrics(reg), 5*time.Minute, log.With(logger, "subcomponent", "MemStore"))
// GroupLoader builds a cache of the rules as they're loaded by the
// manager.This is used to back the memstore
groupLoader := NewCachingGroupLoader(GroupLoader{})
mgr := rules.NewManager(&rules.ManagerOptions{
Appendable: registry,
Queryable: memStore,
QueryFunc: queryFn,
Context: user.InjectOrgID(ctx, userID),
ExternalURL: cfg.ExternalURL.URL,
NotifyFunc: ruler.SendAlerts(notifier, cfg.ExternalURL.URL.String()),
Logger: logger,
Registerer: reg,
OutageTolerance: cfg.OutageTolerance,
ForGracePeriod: cfg.ForGracePeriod,
ResendDelay: cfg.ResendDelay,
GroupLoader: groupLoader,
})
cachingManager := &CachingRulesManager{
manager: mgr,
groupLoader: groupLoader,
}
memStore.Start(groupLoader)
return cachingManager
Ruler: Recording Rules (#3766) * WIP: hack to get recording rules working and pushing to Cortex/Prometheus Signed-off-by: Danny Kopping <danny.kopping@grafana.com> * Refactoring Adding remote_write config for ruler Signed-off-by: Danny Kopping <danny.kopping@grafana.com> * Minor refactorings Signed-off-by: Danny Kopping <danny.kopping@grafana.com> * Moving manager subpackage into ruler package to avoid dependency cycles This also mirrors Cortex's package structure Signed-off-by: Danny Kopping <danny.kopping@grafana.com> * Minor refactorings Signed-off-by: Danny Kopping <danny.kopping@grafana.com> * Skipping commit if remote-write client is not defined Signed-off-by: Danny Kopping <danny.kopping@grafana.com> * Updating use of cortex client Signed-off-by: Danny Kopping <danny.kopping@grafana.com> * Memoizing appenders, using queue for samples & labels Signed-off-by: Danny Kopping <danny.kopping@grafana.com> * Adding buffer size configurability Refactoring Signed-off-by: Danny Kopping <danny.kopping@grafana.com> * Adding metric to show current buffer size Signed-off-by: Danny Kopping <danny.kopping@grafana.com> * Refactoring for better responsibility separation & testability Signed-off-by: Danny Kopping <danny.kopping@grafana.com> * Adding per-tenant overrides of remote-write queue capacity Renaming "buffer size" to "queue capacity" to be more accurate Signed-off-by: Danny Kopping <danny.kopping@grafana.com> * Adding tests for evicting queue Minor refactoring Signed-off-by: Danny Kopping <danny.kopping@grafana.com> * Adding more tests and refactoring Signed-off-by: Danny Kopping <danny.kopping@grafana.com> * Adding queue benchmark Signed-off-by: Danny Kopping <danny.kopping@grafana.com> * Reducing redundancy in metric names Signed-off-by: Danny Kopping <danny.kopping@grafana.com> * Testing that only metric queries can be run Signed-off-by: Danny Kopping <danny.kopping@grafana.com> * Minor fixes pre-review Signed-off-by: Danny Kopping <danny.kopping@grafana.com> * Appeasing the linter Signed-off-by: Danny Kopping <danny.kopping@grafana.com> * Guarding against unprotected nil pointer dereference in Prometheus remote.Client Adding remote-write client validation Signed-off-by: Danny Kopping <danny.kopping@grafana.com> * Appeasing the linter Signed-off-by: Danny Kopping <danny.kopping@grafana.com> * Setting tenant ID header on remote-write client Adding User-Agent string Signed-off-by: Danny Kopping <danny.kopping@grafana.com> * Updating benchmark to use complex struct rather than int to be more reflective of usage Signed-off-by: Danny Kopping <danny.kopping@grafana.com> * Registering flags Removing extraneous checks Signed-off-by: Danny Kopping <danny.kopping@grafana.com> * Adding metric to track remote-write commit errors Signed-off-by: Danny Kopping <danny.kopping@grafana.com> * Refactoring based on review Signed-off-by: Danny Kopping <danny.kopping@grafana.com> * Performance improvements based on review Signed-off-by: Danny Kopping <danny.kopping@grafana.com> * Return error on invalid queue capacity Signed-off-by: Danny Kopping <danny.kopping@grafana.com> * Removing global queue capacity config - using limits Minor refactoring Signed-off-by: Danny Kopping <danny.kopping@grafana.com> * Reusing memory in request preparation Refactoring for testability Signed-off-by: Danny Kopping <danny.kopping@grafana.com> * Moving remote-write metrics into struct Refactoring Unexporting and refactoring memstore metrics to match Signed-off-by: Danny Kopping <danny.kopping@grafana.com> * Applying review suggestions Signed-off-by: Danny Kopping <danny.kopping@grafana.com> * Allowing for runtime changing of per-tenant remote-write queue capacity Signed-off-by: Danny Kopping <danny.kopping@grafana.com> * Appeasing the linter Signed-off-by: Danny Kopping <danny.kopping@grafana.com>
4 years ago
}
}
// CachingRulesManager holds a CachingGroupLoader to make sure the GroupLoader
// has consistent state after update operations. Manager needs to hold the same
// caching grouploader
type CachingRulesManager struct {
manager ruler.RulesManager
groupLoader *CachingGroupLoader
}
// Update reconciles the state of the CachingGroupLoader after a manager.Update.
// The GroupLoader is mutated as part of a call to Update but it might still
// contain removed files. Update tells the loader which files to keep
func (m *CachingRulesManager) Update(interval time.Duration, files []string, externalLabels labels.Labels, externalURL string, ruleGroupPostProcessFunc rules.RuleGroupPostProcessFunc) error {
err := m.manager.Update(interval, files, externalLabels, externalURL, ruleGroupPostProcessFunc)
if err != nil {
return err
}
m.groupLoader.Prune(files)
return nil
}
func (m *CachingRulesManager) Run() {
m.manager.Run()
}
func (m *CachingRulesManager) Stop() {
m.manager.Stop()
}
func (m *CachingRulesManager) RuleGroups() []*rules.Group {
return m.manager.RuleGroups()
}
func ValidateGroups(grps ...rulefmt.RuleGroup) (errs []error) {
set := map[string]struct{}{}
for i, g := range grps {
if g.Name == "" {
errs = append(errs, errors.Errorf("group %d: Groupname must not be empty", i))
}
if _, ok := set[g.Name]; ok {
errs = append(
errs,
errors.Errorf("groupname: \"%s\" is repeated in the same file", g.Name),
)
}
set[g.Name] = struct{}{}
for _, r := range g.Rules {
if err := validateRuleNode(&r, g.Name); err != nil {
errs = append(errs, err)
}
}
}
return errs
}
func validateRuleNode(r *rulefmt.RuleNode, groupName string) error {
if r.Record.Value != "" && r.Alert.Value != "" {
return errors.Errorf("only one of 'record' and 'alert' must be set")
}
if r.Record.Value == "" && r.Alert.Value == "" {
return errors.Errorf("one of 'record' or 'alert' must be set")
}
if r.Expr.Value == "" {
return errors.Errorf("field 'expr' must be set in rule")
} else if _, err := syntax.ParseExpr(r.Expr.Value); err != nil {
return errors.Wrapf(err, fmt.Sprintf("could not parse expression for record '%s' in group '%s'", r.Record.Value, groupName))
}
if r.Record.Value != "" {
if len(r.Annotations) > 0 {
return errors.Errorf("invalid field 'annotations' in recording rule")
}
if r.For != 0 {
return errors.Errorf("invalid field 'for' in recording rule")
}
if !model.IsValidMetricName(model.LabelValue(r.Record.Value)) {
return errors.Errorf("invalid recording rule name: %s", r.Record.Value)
}
}
for k, v := range r.Labels {
if !model.LabelName(k).IsValid() || k == model.MetricNameLabel {
return errors.Errorf("invalid label name: %s", k)
}
if !model.LabelValue(v).IsValid() {
return errors.Errorf("invalid label value: %s", v)
}
}
for k := range r.Annotations {
if !model.LabelName(k).IsValid() {
return errors.Errorf("invalid annotation name: %s", k)
}
}
for _, err := range testTemplateParsing(r) {
return err
}
return nil
}
// testTemplateParsing checks if the templates used in labels and annotations
// of the alerting rules are parsed correctly.
func testTemplateParsing(rl *rulefmt.RuleNode) (errs []error) {
if rl.Alert.Value == "" {
// Not an alerting rule.
return errs
}
// Trying to parse templates.
tmplData := template.AlertTemplateData(map[string]string{}, map[string]string{}, "", 0)
defs := []string{
"{{$labels := .Labels}}",
"{{$externalLabels := .ExternalLabels}}",
"{{$value := .Value}}",
}
parseTest := func(text string) error {
tmpl := template.NewTemplateExpander(
context.TODO(),
strings.Join(append(defs, text), ""),
"__alert_"+rl.Alert.Value,
tmplData,
model.Time(timestamp.FromTime(time.Now())),
nil,
nil,
nil,
)
return tmpl.ParseTest()
}
// Parsing Labels.
for k, val := range rl.Labels {
err := parseTest(val)
if err != nil {
errs = append(errs, errors.Wrapf(err, "label %q", k))
}
}
// Parsing Annotations.
for k, val := range rl.Annotations {
err := parseTest(val)
if err != nil {
errs = append(errs, errors.Wrapf(err, "annotation %q", k))
}
}
return errs
}
// Allows logql expressions to be treated as promql expressions by the prometheus rules pkg.
type exprAdapter struct {
syntax.Expr
}
func (exprAdapter) PositionRange() parser.PositionRange { return parser.PositionRange{} }
func (exprAdapter) PromQLExpr() {}
func (exprAdapter) Type() parser.ValueType { return parser.ValueType("unimplemented") }
func (exprAdapter) Pretty(level int) string { return "" }