From cf684ed38f8a45c1dbdb76b7ce2256120f3aec23 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jean-Philippe=20Qu=C3=A9m=C3=A9ner?= Date: Thu, 9 Jun 2022 09:28:32 +0200 Subject: [PATCH] Alerting: bump rule version when updating rule group interval (#50295) * Alerting: move group update to alert rule service * rename validateAlertRuleInterval to validateRuleGroupInterval * init baseinterval correctly * add seconds suffix * extract validation function for reusability * add context to err message --- pkg/services/ngalert/api/api_provisioning.go | 4 +- pkg/services/ngalert/models/alert_rule.go | 8 +++ pkg/services/ngalert/ngalert.go | 4 +- .../ngalert/provisioning/alert_rules.go | 70 ++++++++++++------- .../ngalert/provisioning/alert_rules_test.go | 48 ++++++++++--- pkg/services/ngalert/store/alert_rule.go | 5 +- 6 files changed, 100 insertions(+), 39 deletions(-) diff --git a/pkg/services/ngalert/api/api_provisioning.go b/pkg/services/ngalert/api/api_provisioning.go index e01d2f6d374..83c40b57ff8 100644 --- a/pkg/services/ngalert/api/api_provisioning.go +++ b/pkg/services/ngalert/api/api_provisioning.go @@ -62,7 +62,7 @@ type AlertRuleService interface { CreateAlertRule(ctx context.Context, rule alerting_models.AlertRule, provenance alerting_models.Provenance) (alerting_models.AlertRule, error) UpdateAlertRule(ctx context.Context, rule alerting_models.AlertRule, provenance alerting_models.Provenance) (alerting_models.AlertRule, error) DeleteAlertRule(ctx context.Context, orgID int64, ruleUID string, provenance alerting_models.Provenance) error - UpdateAlertGroup(ctx context.Context, orgID int64, folderUID, rulegroup string, interval int64) error + UpdateRuleGroup(ctx context.Context, orgID int64, folderUID, rulegroup string, interval int64) error } func (srv *ProvisioningSrv) RouteGetPolicyTree(c *models.ReqContext) response.Response { @@ -276,7 +276,7 @@ func (srv *ProvisioningSrv) RouteDeleteAlertRule(c *models.ReqContext) response. func (srv *ProvisioningSrv) RoutePutAlertRuleGroup(c *models.ReqContext, ag apimodels.AlertRuleGroup) response.Response { rulegroup := pathParam(c, groupPathParam) folderUID := pathParam(c, folderUIDPathParam) - err := srv.alertRules.UpdateAlertGroup(c.Req.Context(), c.OrgId, folderUID, rulegroup, ag.Interval) + err := srv.alertRules.UpdateRuleGroup(c.Req.Context(), c.OrgId, folderUID, rulegroup, ag.Interval) if err != nil { return ErrResp(http.StatusInternalServerError, err, "") } diff --git a/pkg/services/ngalert/models/alert_rule.go b/pkg/services/ngalert/models/alert_rule.go index 8c37eb537e1..15cffa0251a 100644 --- a/pkg/services/ngalert/models/alert_rule.go +++ b/pkg/services/ngalert/models/alert_rule.go @@ -376,3 +376,11 @@ func PatchPartialAlertRule(existingRule *AlertRule, ruleToPatch *AlertRule) { ruleToPatch.For = existingRule.For } } + +func ValidateRuleGroupInterval(intervalSeconds, baseIntervalSeconds int64) error { + if intervalSeconds%baseIntervalSeconds != 0 || intervalSeconds <= 0 { + return fmt.Errorf("%w: interval (%v) should be non-zero and divided exactly by scheduler interval: %v", + ErrAlertRuleFailedValidation, time.Duration(intervalSeconds)*time.Second, baseIntervalSeconds) + } + return nil +} diff --git a/pkg/services/ngalert/ngalert.go b/pkg/services/ngalert/ngalert.go index 4f31d391a67..52652c1a635 100644 --- a/pkg/services/ngalert/ngalert.go +++ b/pkg/services/ngalert/ngalert.go @@ -157,7 +157,9 @@ func (ng *AlertNG) init() error { contactPointService := provisioning.NewContactPointService(store, ng.SecretsService, store, store, ng.Log) templateService := provisioning.NewTemplateService(store, store, store, ng.Log) muteTimingService := provisioning.NewMuteTimingService(store, store, store, ng.Log) - alertRuleService := provisioning.NewAlertRuleService(store, store, store, int64(ng.Cfg.UnifiedAlerting.DefaultRuleEvaluationInterval.Seconds()), ng.Log) + alertRuleService := provisioning.NewAlertRuleService(store, store, store, + int64(ng.Cfg.UnifiedAlerting.DefaultRuleEvaluationInterval.Seconds()), + int64(ng.Cfg.UnifiedAlerting.BaseInterval.Seconds()), ng.Log) api := api.API{ Cfg: ng.Cfg, diff --git a/pkg/services/ngalert/provisioning/alert_rules.go b/pkg/services/ngalert/provisioning/alert_rules.go index 483373e5d50..b71bb17d25b 100644 --- a/pkg/services/ngalert/provisioning/alert_rules.go +++ b/pkg/services/ngalert/provisioning/alert_rules.go @@ -13,24 +13,27 @@ import ( ) type AlertRuleService struct { - defaultInterval int64 - ruleStore store.RuleStore - provenanceStore ProvisioningStore - xact TransactionManager - log log.Logger + defaultIntervalSeconds int64 + baseIntervalSeconds int64 + ruleStore store.RuleStore + provenanceStore ProvisioningStore + xact TransactionManager + log log.Logger } func NewAlertRuleService(ruleStore store.RuleStore, provenanceStore ProvisioningStore, xact TransactionManager, - defaultInterval int64, + defaultIntervalSeconds int64, + baseIntervalSeconds int64, log log.Logger) *AlertRuleService { return &AlertRuleService{ - defaultInterval: defaultInterval, - ruleStore: ruleStore, - provenanceStore: provenanceStore, - xact: xact, - log: log, + defaultIntervalSeconds: defaultIntervalSeconds, + baseIntervalSeconds: baseIntervalSeconds, + ruleStore: ruleStore, + provenanceStore: provenanceStore, + xact: xact, + log: log, } } @@ -57,7 +60,7 @@ func (service *AlertRuleService) CreateAlertRule(ctx context.Context, rule model interval, err := service.ruleStore.GetRuleGroupInterval(ctx, rule.OrgID, rule.NamespaceUID, rule.RuleGroup) // if the alert group does not exists we just use the default interval if err != nil && errors.Is(err, store.ErrAlertRuleGroupNotFound) { - interval = service.defaultInterval + interval = service.defaultIntervalSeconds } else if err != nil { return models.AlertRule{}, err } @@ -75,10 +78,6 @@ func (service *AlertRuleService) CreateAlertRule(ctx context.Context, rule model } else { return errors.New("couldn't find newly created id") } - err = service.ruleStore.UpdateRuleGroup(ctx, rule.OrgID, rule.NamespaceUID, rule.RuleGroup, rule.IntervalSeconds) - if err != nil { - return err - } return service.provenanceStore.SetProvenance(ctx, &rule, rule.OrgID, provenance) }) if err != nil { @@ -87,6 +86,37 @@ func (service *AlertRuleService) CreateAlertRule(ctx context.Context, rule model return rule, nil } +// UpdateRuleGroup will update the interval for all rules in the group. +func (service *AlertRuleService) UpdateRuleGroup(ctx context.Context, orgID int64, namespaceUID string, ruleGroup string, interval int64) error { + if err := models.ValidateRuleGroupInterval(interval, service.baseIntervalSeconds); err != nil { + return err + } + return service.xact.InTransaction(ctx, func(ctx context.Context) error { + query := &models.ListAlertRulesQuery{ + OrgID: orgID, + NamespaceUIDs: []string{namespaceUID}, + RuleGroup: ruleGroup, + } + err := service.ruleStore.ListAlertRules(ctx, query) + if err != nil { + return fmt.Errorf("failed to list alert rules: %w", err) + } + updateRules := make([]store.UpdateRule, 0, len(query.Result)) + for _, rule := range query.Result { + if rule.IntervalSeconds == interval { + continue + } + newRule := *rule + newRule.IntervalSeconds = interval + updateRules = append(updateRules, store.UpdateRule{ + Existing: rule, + New: newRule, + }) + } + return service.ruleStore.UpdateAlertRules(ctx, updateRules) + }) +} + func (service *AlertRuleService) UpdateAlertRule(ctx context.Context, rule models.AlertRule, provenance models.Provenance) (models.AlertRule, error) { storedRule, storedProvenance, err := service.GetAlertRule(ctx, rule.OrgID, rule.UID) if err != nil { @@ -112,10 +142,6 @@ func (service *AlertRuleService) UpdateAlertRule(ctx context.Context, rule model if err != nil { return err } - err = service.ruleStore.UpdateRuleGroup(ctx, rule.OrgID, rule.NamespaceUID, rule.RuleGroup, rule.IntervalSeconds) - if err != nil { - return err - } return service.provenanceStore.SetProvenance(ctx, &rule, rule.OrgID, provenance) }) if err != nil { @@ -145,7 +171,3 @@ func (service *AlertRuleService) DeleteAlertRule(ctx context.Context, orgID int6 return service.provenanceStore.DeleteProvenance(ctx, rule, rule.OrgID) }) } - -func (service *AlertRuleService) UpdateAlertGroup(ctx context.Context, orgID int64, folderUID, roulegroup string, interval int64) error { - return service.ruleStore.UpdateRuleGroup(ctx, orgID, folderUID, roulegroup, interval) -} diff --git a/pkg/services/ngalert/provisioning/alert_rules_test.go b/pkg/services/ngalert/provisioning/alert_rules_test.go index 37bb8f9d6eb..8bfe5cd9f71 100644 --- a/pkg/services/ngalert/provisioning/alert_rules_test.go +++ b/pkg/services/ngalert/provisioning/alert_rules_test.go @@ -39,7 +39,7 @@ func TestAlertRuleService(t *testing.T) { require.Equal(t, int64(60), rule.IntervalSeconds) var interval int64 = 120 - err = ruleService.UpdateAlertGroup(context.Background(), orgID, rule.NamespaceUID, rule.RuleGroup, 120) + err = ruleService.UpdateRuleGroup(context.Background(), orgID, rule.NamespaceUID, rule.RuleGroup, 120) require.NoError(t, err) rule, _, err = ruleService.GetAlertRule(context.Background(), orgID, rule.UID) @@ -54,7 +54,7 @@ func TestAlertRuleService(t *testing.T) { require.NoError(t, err) var interval int64 = 120 - err = ruleService.UpdateAlertGroup(context.Background(), orgID, rule.NamespaceUID, rule.RuleGroup, 120) + err = ruleService.UpdateRuleGroup(context.Background(), orgID, rule.NamespaceUID, rule.RuleGroup, 120) require.NoError(t, err) rule = dummyRule("test#4-1", orgID) @@ -63,6 +63,34 @@ func TestAlertRuleService(t *testing.T) { require.NoError(t, err) require.Equal(t, interval, rule.IntervalSeconds) }) + t.Run("updating a rule group should bump the version number", func(t *testing.T) { + const ( + orgID = 123 + namespaceUID = "abc" + ruleUID = "some_rule_uid" + ruleGroup = "abc" + newInterval int64 = 120 + ) + rule := dummyRule("my_rule", orgID) + rule.UID = ruleUID + rule.RuleGroup = ruleGroup + rule.NamespaceUID = namespaceUID + _, err := ruleService.CreateAlertRule(context.Background(), rule, models.ProvenanceNone) + require.NoError(t, err) + + rule, _, err = ruleService.GetAlertRule(context.Background(), orgID, ruleUID) + require.NoError(t, err) + require.Equal(t, int64(1), rule.Version) + require.Equal(t, int64(60), rule.IntervalSeconds) + + err = ruleService.UpdateRuleGroup(context.Background(), orgID, namespaceUID, ruleGroup, newInterval) + require.NoError(t, err) + + rule, _, err = ruleService.GetAlertRule(context.Background(), orgID, ruleUID) + require.NoError(t, err) + require.Equal(t, int64(2), rule.Version) + require.Equal(t, newInterval, rule.IntervalSeconds) + }) t.Run("alert rule provenace should be correctly checked", func(t *testing.T) { tests := []struct { name string @@ -133,11 +161,12 @@ func createAlertRuleService(t *testing.T) AlertRuleService { BaseInterval: time.Second * 10, } return AlertRuleService{ - ruleStore: store, - provenanceStore: store, - xact: sqlStore, - log: log.New("testing"), - defaultInterval: 60, + ruleStore: store, + provenanceStore: store, + xact: sqlStore, + log: log.New("testing"), + baseIntervalSeconds: 10, + defaultIntervalSeconds: 60, } } @@ -150,8 +179,9 @@ func dummyRule(title string, orgID int64) models.AlertRule { IntervalSeconds: 60, Data: []models.AlertQuery{ { - RefID: "A", - Model: json.RawMessage("{}"), + RefID: "A", + Model: json.RawMessage("{}"), + DatasourceUID: "-100", RelativeTimeRange: models.RelativeTimeRange{ From: models.Duration(60), To: models.Duration(0), diff --git a/pkg/services/ngalert/store/alert_rule.go b/pkg/services/ngalert/store/alert_rule.go index ea52f90e55a..b7181c3e1cd 100644 --- a/pkg/services/ngalert/store/alert_rule.go +++ b/pkg/services/ngalert/store/alert_rule.go @@ -5,7 +5,6 @@ import ( "errors" "fmt" "strings" - "time" "github.com/grafana/grafana/pkg/models" "github.com/grafana/grafana/pkg/services/guardian" @@ -445,8 +444,8 @@ func (st DBstore) validateAlertRule(alertRule ngmodels.AlertRule) error { return fmt.Errorf("%w: title is empty", ngmodels.ErrAlertRuleFailedValidation) } - if alertRule.IntervalSeconds%int64(st.BaseInterval.Seconds()) != 0 || alertRule.IntervalSeconds <= 0 { - return fmt.Errorf("%w: interval (%v) should be non-zero and divided exactly by scheduler interval: %v", ngmodels.ErrAlertRuleFailedValidation, time.Duration(alertRule.IntervalSeconds)*time.Second, st.BaseInterval) + if err := ngmodels.ValidateRuleGroupInterval(alertRule.IntervalSeconds, int64(st.BaseInterval.Seconds())); err != nil { + return err } // enfore max name length in SQLite