diff --git a/pkg/services/ngalert/CHANGELOG.md b/pkg/services/ngalert/CHANGELOG.md index cf356cf0d65..1def4603cbb 100644 --- a/pkg/services/ngalert/CHANGELOG.md +++ b/pkg/services/ngalert/CHANGELOG.md @@ -45,6 +45,7 @@ Scopes must have an order to ensure consistency and ease of search, this helps u ## Grafana Alerting - main / unreleased +- [FEATURE] use optimistic lock by version field when updating alert rules #50274 - [ENHANCEMENT] Scheduler: Drop ticks if rule evaluation is too slow and adds a metric grafana_alerting_schedule_rule_evaluations_missed_total to track missed evaluations per rule #48885 - [ENHANCEMENT] Ticker to tick at predictable time #50197 diff --git a/pkg/services/ngalert/api/api_provisioning.go b/pkg/services/ngalert/api/api_provisioning.go index 438266093d8..d6a103a11c7 100644 --- a/pkg/services/ngalert/api/api_provisioning.go +++ b/pkg/services/ngalert/api/api_provisioning.go @@ -258,6 +258,9 @@ func (srv *ProvisioningSrv) RoutePostAlertRule(c *models.ReqContext, ar definiti return ErrResp(http.StatusBadRequest, err, "") } if err != nil { + if errors.Is(err, store.ErrOptimisticLock) { + return ErrResp(http.StatusConflict, err, "") + } return ErrResp(http.StatusInternalServerError, err, "") } ar.ID = createdAlertRule.ID @@ -275,6 +278,9 @@ func (srv *ProvisioningSrv) RoutePutAlertRule(c *models.ReqContext, ar definitio return ErrResp(http.StatusBadRequest, err, "") } if err != nil { + if errors.Is(err, store.ErrOptimisticLock) { + return ErrResp(http.StatusConflict, err, "") + } return ErrResp(http.StatusInternalServerError, err, "") } ar.Updated = updatedAlertRule.Updated @@ -295,6 +301,9 @@ func (srv *ProvisioningSrv) RoutePutAlertRuleGroup(c *models.ReqContext, ag defi folderUID := pathParam(c, folderUIDPathParam) err := srv.alertRules.UpdateRuleGroup(c.Req.Context(), c.OrgId, folderUID, rulegroup, ag.Interval) if err != nil { + if errors.Is(err, store.ErrOptimisticLock) { + return ErrResp(http.StatusConflict, err, "") + } return ErrResp(http.StatusInternalServerError, err, "") } return response.JSON(http.StatusOK, ag) diff --git a/pkg/services/ngalert/api/api_ruler.go b/pkg/services/ngalert/api/api_ruler.go index e9f409f1069..721638c98d4 100644 --- a/pkg/services/ngalert/api/api_ruler.go +++ b/pkg/services/ngalert/api/api_ruler.go @@ -467,6 +467,8 @@ func (srv RulerSrv) updateAlertRulesInGroup(c *models.ReqContext, groupKey ngmod return ErrResp(http.StatusForbidden, err, "") } else if errors.Is(err, ErrAuthorization) { return ErrResp(http.StatusUnauthorized, err, "") + } else if errors.Is(err, store.ErrOptimisticLock) { + return ErrResp(http.StatusConflict, err, "") } return ErrResp(http.StatusInternalServerError, err, "failed to update rule group") } diff --git a/pkg/services/ngalert/models/alert_rule.go b/pkg/services/ngalert/models/alert_rule.go index 827e0b26884..4d6b0932138 100644 --- a/pkg/services/ngalert/models/alert_rule.go +++ b/pkg/services/ngalert/models/alert_rule.go @@ -111,7 +111,7 @@ type AlertRule struct { Data []AlertQuery Updated time.Time IntervalSeconds int64 - Version int64 + Version int64 `xorm:"version"` // this tag makes xorm add optimistic lock (see https://xorm.io/docs/chapter-06/1.lock/) UID string `xorm:"uid"` NamespaceUID string `xorm:"namespace_uid"` DashboardUID *string `xorm:"dashboard_uid"` diff --git a/pkg/services/ngalert/store/alert_rule.go b/pkg/services/ngalert/store/alert_rule.go index 0b71a77fd77..18fbc9a59b5 100644 --- a/pkg/services/ngalert/store/alert_rule.go +++ b/pkg/services/ngalert/store/alert_rule.go @@ -34,6 +34,7 @@ type UpdateRule struct { var ( ErrAlertRuleGroupNotFound = errors.New("rulegroup not found") + ErrOptimisticLock = errors.New("version conflict while updating a record in the database with optimistic locking") ) // RuleStore is the interface for persisting alert rules and instances @@ -93,7 +94,7 @@ func (st DBstore) DeleteAlertRulesByUID(ctx context.Context, orgID int64, ruleUI }) } -// DeleteAlertInstanceByRuleUID is a handler for deleting alert instances by alert rule UID when a rule has been updated +// DeleteAlertInstancesByRuleUID is a handler for deleting alert instances by alert rule UID when a rule has been updated func (st DBstore) DeleteAlertInstancesByRuleUID(ctx context.Context, orgID int64, ruleUID string) error { return st.SQLStore.WithTransactionalDbSession(ctx, func(sess *sqlstore.DBSession) error { _, err := sess.Exec("DELETE FROM alert_instance WHERE rule_org_id = ? AND rule_uid = ?", orgID, ruleUID) @@ -205,7 +206,7 @@ func (st DBstore) UpdateAlertRules(ctx context.Context, rules []UpdateRule) erro for _, r := range rules { var parentVersion int64 r.New.ID = r.Existing.ID - r.New.Version = r.Existing.Version + 1 + r.New.Version = r.Existing.Version // xorm will take care of increasing it (see https://xorm.io/docs/chapter-06/1.lock/) if err := st.validateAlertRule(r.New); err != nil { return err } @@ -213,11 +214,14 @@ func (st DBstore) UpdateAlertRules(ctx context.Context, rules []UpdateRule) erro return err } // no way to update multiple rules at once - if _, err := sess.ID(r.Existing.ID).AllCols().Update(r.New); err != nil { - if st.SQLStore.Dialect.IsUniqueConstraintViolation(err) { - return ngmodels.ErrAlertRuleUniqueConstraintViolation + if updated, err := sess.ID(r.Existing.ID).AllCols().Update(r.New); err != nil || updated == 0 { + if err != nil { + if st.SQLStore.Dialect.IsUniqueConstraintViolation(err) { + return ngmodels.ErrAlertRuleUniqueConstraintViolation + } + return fmt.Errorf("failed to update rule [%s] %s: %w", r.New.UID, r.New.Title, err) } - return fmt.Errorf("failed to update rule [%s] %s: %w", r.New.UID, r.New.Title, err) + return fmt.Errorf("%w: alert rule UID %s version %d", ErrOptimisticLock, r.New.UID, r.New.Version) } parentVersion = r.Existing.Version ruleVersions = append(ruleVersions, ngmodels.AlertRuleVersion{ @@ -226,7 +230,7 @@ func (st DBstore) UpdateAlertRules(ctx context.Context, rules []UpdateRule) erro RuleNamespaceUID: r.New.NamespaceUID, RuleGroup: r.New.RuleGroup, ParentVersion: parentVersion, - Version: r.New.Version, + Version: r.New.Version + 1, Created: r.New.Updated, Condition: r.New.Condition, Title: r.New.Title, @@ -248,7 +252,7 @@ func (st DBstore) UpdateAlertRules(ctx context.Context, rules []UpdateRule) erro }) } -// GetOrgAlertRules is a handler for retrieving alert rules of specific organisation. +// ListAlertRules is a handler for retrieving alert rules of specific organisation. func (st DBstore) ListAlertRules(ctx context.Context, query *ngmodels.ListAlertRulesQuery) error { return st.SQLStore.WithDbSession(ctx, func(sess *sqlstore.DBSession) error { q := sess.Table("alert_rule") @@ -317,7 +321,7 @@ func (st DBstore) GetRuleGroupInterval(ctx context.Context, orgID int64, namespa }) } -// GetNamespaces returns the folders that are visible to the user and have at least one alert in it +// GetUserVisibleNamespaces returns the folders that are visible to the user and have at least one alert in it func (st DBstore) GetUserVisibleNamespaces(ctx context.Context, orgID int64, user *models.SignedInUser) (map[string]*models.Folder, error) { namespaceMap := make(map[string]*models.Folder) diff --git a/pkg/services/ngalert/store/alert_rule_test.go b/pkg/services/ngalert/store/alert_rule_test.go new file mode 100644 index 00000000000..ff6f039991e --- /dev/null +++ b/pkg/services/ngalert/store/alert_rule_test.go @@ -0,0 +1,91 @@ +package store + +import ( + "context" + "errors" + "fmt" + "testing" + "time" + + "github.com/stretchr/testify/require" + "golang.org/x/exp/rand" + + "github.com/grafana/grafana/pkg/services/ngalert/models" + "github.com/grafana/grafana/pkg/services/sqlstore" + "github.com/grafana/grafana/pkg/util" +) + +func TestUpdateAlertRules(t *testing.T) { + sqlStore := sqlstore.InitTestDB(t) + store := DBstore{ + SQLStore: sqlStore, + BaseInterval: time.Duration(rand.Int63n(100)) * time.Second, + } + createRule := func(t *testing.T) *models.AlertRule { + t.Helper() + rule := models.AlertRuleGen(withIntervalMatching(store.BaseInterval))() + err := sqlStore.WithDbSession(context.Background(), func(sess *sqlstore.DBSession) error { + _, err := sess.Table(models.AlertRule{}).InsertOne(rule) + if err != nil { + return err + } + dbRule := &models.AlertRule{} + exist, err := sess.Table(models.AlertRule{}).ID(rule.ID).Get(dbRule) + if err != nil { + return err + } + if !exist { + return errors.New("cannot read inserted record") + } + rule = dbRule + return nil + }) + require.NoError(t, err) + return rule + } + + t.Run("should increase version", func(t *testing.T) { + rule := createRule(t) + newRule := models.CopyRule(rule) + newRule.Title = util.GenerateShortUID() + err := store.UpdateAlertRules(context.Background(), []UpdateRule{{ + Existing: rule, + New: *newRule, + }, + }) + require.NoError(t, err) + + dbrule := &models.AlertRule{} + err = sqlStore.WithDbSession(context.Background(), func(sess *sqlstore.DBSession) error { + exist, err := sess.Table(models.AlertRule{}).ID(rule.ID).Get(dbrule) + require.Truef(t, exist, fmt.Sprintf("rule with ID %d does not exist", rule.ID)) + return err + }) + + require.NoError(t, err) + require.Equal(t, rule.Version+1, dbrule.Version) + }) + + t.Run("should fail due to optimistic locking if version does not match", func(t *testing.T) { + rule := createRule(t) + rule.Version-- // simulate version discrepancy + + newRule := models.CopyRule(rule) + newRule.Title = util.GenerateShortUID() + + err := store.UpdateAlertRules(context.Background(), []UpdateRule{{ + Existing: rule, + New: *newRule, + }, + }) + + require.ErrorIs(t, err, ErrOptimisticLock) + }) +} + +func withIntervalMatching(baseInterval time.Duration) func(*models.AlertRule) { + return func(rule *models.AlertRule) { + rule.IntervalSeconds = int64(baseInterval.Seconds()) * rand.Int63n(10) + rule.For = time.Duration(rule.IntervalSeconds*rand.Int63n(9)+1) * time.Second + } +}