diff --git a/pkg/services/ngalert/models/instance.go b/pkg/services/ngalert/models/instance.go index d2537ba7f03..64b2db99430 100644 --- a/pkg/services/ngalert/models/instance.go +++ b/pkg/services/ngalert/models/instance.go @@ -55,6 +55,7 @@ func (i InstanceStateType) IsValid() bool { type ListAlertInstancesQuery struct { RuleUID string RuleOrgID int64 `json:"-"` + RuleGroup string } // ValidateAlertInstance validates that the alert instance contains an alert rule id, diff --git a/pkg/services/ngalert/models/testing.go b/pkg/services/ngalert/models/testing.go index b8b2b5ceed2..af53eca7e82 100644 --- a/pkg/services/ngalert/models/testing.go +++ b/pkg/services/ngalert/models/testing.go @@ -587,14 +587,6 @@ func GenerateRuleKey(orgID int64) AlertRuleKey { } } -// GenerateRuleKeyWithGroup generates a random alert rule key with group -func GenerateRuleKeyWithGroup(orgID int64) AlertRuleKeyWithGroup { - return AlertRuleKeyWithGroup{ - AlertRuleKey: GenerateRuleKey(orgID), - RuleGroup: util.GenerateShortUID(), - } -} - // GenerateGroupKey generates a random group key func GenerateGroupKey(orgID int64) AlertRuleGroupKey { return AlertRuleGroupKey{ diff --git a/pkg/services/ngalert/state/manager.go b/pkg/services/ngalert/state/manager.go index 94c5371e308..c656787dbd2 100644 --- a/pkg/services/ngalert/state/manager.go +++ b/pkg/services/ngalert/state/manager.go @@ -33,6 +33,7 @@ type AlertInstanceManager interface { type StatePersister interface { Async(ctx context.Context, cache *cache) Sync(ctx context.Context, span trace.Span, states StateTransitions) + SyncRule(ctx context.Context, span trace.Span, ruleKey ngModels.AlertRuleKeyWithGroup, states StateTransitions) } // Sender is an optional callback intended for sending the states to an alertmanager. @@ -57,7 +58,8 @@ type Manager struct { applyNoDataAndErrorToAllStates bool rulesPerRuleGroupLimit int64 - persister StatePersister + persister StatePersister + useRuleStatePersister bool } type ManagerCfg struct { @@ -75,6 +77,8 @@ type ManagerCfg struct { // to all states when corresponding execution in the rule definition is set to either `Alerting` or `OK` ApplyNoDataAndErrorToAllStates bool RulesPerRuleGroupLimit int64 + // If true, then SyncRule method of the StatePersister is called + UseRuleStatePersister bool DisableExecution bool @@ -108,6 +112,7 @@ func NewManager(cfg ManagerCfg, statePersister StatePersister) *Manager { applyNoDataAndErrorToAllStates: cfg.ApplyNoDataAndErrorToAllStates, rulesPerRuleGroupLimit: cfg.RulesPerRuleGroupLimit, persister: statePersister, + useRuleStatePersister: cfg.UseRuleStatePersister, tracer: cfg.Tracer, } @@ -347,7 +352,11 @@ func (st *Manager) ProcessEvalResults( statesToSend = st.updateLastSentAt(allChanges, evaluatedAt) } - st.persister.Sync(ctx, span, allChanges) + if st.useRuleStatePersister { + st.persister.SyncRule(ctx, span, alertRule.GetKeyWithGroup(), allChanges) + } else { + st.persister.Sync(ctx, span, allChanges) + } if st.historian != nil { st.historian.Record(ctx, history_model.NewRuleMeta(alertRule, logger), allChanges) } diff --git a/pkg/services/ngalert/state/persister_async.go b/pkg/services/ngalert/state/persister_async.go index 91807f26921..0f3ade3b229 100644 --- a/pkg/services/ngalert/state/persister_async.go +++ b/pkg/services/ngalert/state/persister_async.go @@ -9,6 +9,7 @@ import ( "github.com/grafana/grafana/pkg/infra/log" "github.com/grafana/grafana/pkg/services/ngalert/metrics" + "github.com/grafana/grafana/pkg/services/ngalert/models" ) type AsyncStatePersister struct { @@ -67,3 +68,7 @@ func (a *AsyncStatePersister) fullSync(ctx context.Context, cache *cache) error func (a *AsyncStatePersister) Sync(_ context.Context, _ trace.Span, _ StateTransitions) { a.log.Debug("Sync: No-Op") } + +func (a *AsyncStatePersister) SyncRule(_ context.Context, _ trace.Span, _ models.AlertRuleKeyWithGroup, _ StateTransitions) { + a.log.Debug("SyncRule: No-Op") +} diff --git a/pkg/services/ngalert/state/persister_noop.go b/pkg/services/ngalert/state/persister_noop.go index 0275bc5f351..990db759230 100644 --- a/pkg/services/ngalert/state/persister_noop.go +++ b/pkg/services/ngalert/state/persister_noop.go @@ -4,12 +4,16 @@ import ( "context" "go.opentelemetry.io/otel/trace" + + "github.com/grafana/grafana/pkg/services/ngalert/models" ) type NoopPersister struct{} func (n *NoopPersister) Async(_ context.Context, _ *cache) {} func (n *NoopPersister) Sync(_ context.Context, _ trace.Span, _ StateTransitions) {} +func (n *NoopPersister) SyncRule(_ context.Context, _ trace.Span, _ models.AlertRuleKeyWithGroup, _ StateTransitions) { +} func NewNoopPersister() StatePersister { return &NoopPersister{} diff --git a/pkg/services/ngalert/state/persister_rule_sync.go b/pkg/services/ngalert/state/persister_rule_sync.go new file mode 100644 index 00000000000..0fb9c353417 --- /dev/null +++ b/pkg/services/ngalert/state/persister_rule_sync.go @@ -0,0 +1,85 @@ +package state + +import ( + "context" + "time" + + "go.opentelemetry.io/otel/trace" + + "github.com/grafana/grafana/pkg/infra/log" + ngModels "github.com/grafana/grafana/pkg/services/ngalert/models" +) + +type SyncRuleStatePersister struct { + log log.Logger + store InstanceStore + // doNotSaveNormalState controls whether eval.Normal state is persisted to the database and returned by get methods. + doNotSaveNormalState bool +} + +func NewSyncRuleStatePersisiter(log log.Logger, cfg ManagerCfg) StatePersister { + return &SyncRuleStatePersister{ + log: log, + store: cfg.InstanceStore, + doNotSaveNormalState: cfg.DoNotSaveNormalState, + } +} + +func (a *SyncRuleStatePersister) Async(_ context.Context, _ *cache) { + a.log.Debug("Async: No-Op") +} +func (a *SyncRuleStatePersister) Sync(ctx context.Context, span trace.Span, allStates StateTransitions) { + a.log.Debug("Sync: No-Op") +} + +// SyncRule persists the state transitions of the rule to the database +func (a *SyncRuleStatePersister) SyncRule(ctx context.Context, span trace.Span, ruleKey ngModels.AlertRuleKeyWithGroup, states StateTransitions) { + if a.store == nil || len(states) == 0 { + return + } + logger := a.log.FromContext(ctx) + + instancesToSave := make([]ngModels.AlertInstance, 0, len(states)) + + for _, s := range states { + if s.IsStale() { + continue + } + + if a.doNotSaveNormalState && IsNormalStateWithNoReason(s.State) && !s.Changed() { + continue + } + + key, err := s.GetAlertInstanceKey() + if err != nil { + logger.Error("Failed to create a key for alert state to save it to database. The state will be ignored ", "cacheID", s.CacheID, "error", err, "labels", s.Labels.String()) + continue + } + + instance := ngModels.AlertInstance{ + AlertInstanceKey: key, + Labels: ngModels.InstanceLabels(s.Labels), + CurrentState: ngModels.InstanceStateType(s.State.State.String()), + CurrentReason: s.StateReason, + LastEvalTime: s.LastEvaluationTime, + CurrentStateSince: s.StartsAt, + CurrentStateEnd: s.EndsAt, + ResolvedAt: s.ResolvedAt, + LastSentAt: s.LastSentAt, + ResultFingerprint: s.ResultFingerprint.String(), + } + + instancesToSave = append(instancesToSave, instance) + } + + start := time.Now() + logger.Debug("Saving alert states", "count", len(instancesToSave)) + err := a.store.SaveAlertInstancesForRule(ctx, ruleKey, instancesToSave) + if err != nil { + logger.Error("Failed to save alert rule state", "error", err, "duration", time.Since(start)) + return + } + + logger.Debug("Saving alert states done", "count", len(instancesToSave), "duration", time.Since(start)) + span.AddEvent("updated database") +} diff --git a/pkg/services/ngalert/state/persister_sync.go b/pkg/services/ngalert/state/persister_sync.go index 1aee0bed09e..9605f61605d 100644 --- a/pkg/services/ngalert/state/persister_sync.go +++ b/pkg/services/ngalert/state/persister_sync.go @@ -34,6 +34,10 @@ func (a *SyncStatePersister) Async(_ context.Context, _ *cache) { a.log.Debug("Async: No-Op") } +func (a *SyncStatePersister) SyncRule(_ context.Context, _ trace.Span, _ ngModels.AlertRuleKeyWithGroup, _ StateTransitions) { + a.log.Debug("SyncRule: No-Op") +} + // Sync persists the state transitions to the database. It deletes stale states and saves the current states. func (a *SyncStatePersister) Sync(ctx context.Context, span trace.Span, allStates StateTransitions) { staleStates := allStates.StaleStates() diff --git a/pkg/services/ngalert/store/instance_database.go b/pkg/services/ngalert/store/instance_database.go index 58e3593e64a..379f8b28e09 100644 --- a/pkg/services/ngalert/store/instance_database.go +++ b/pkg/services/ngalert/store/instance_database.go @@ -32,6 +32,10 @@ func (st DBstore) ListAlertInstances(ctx context.Context, cmd *models.ListAlertI if cmd.RuleUID != "" { addToQuery(` AND rule_uid = ?`, cmd.RuleUID) } + if cmd.RuleGroup != "" { + st.Logger.Warn("ListAlertInstancesQuery.RuleGroup filter is not supported in alerting DB store") + } + if st.FeatureToggles.IsEnabled(ctx, featuremgmt.FlagAlertingNoNormalState) { s.WriteString(fmt.Sprintf(" AND NOT (current_state = '%s' AND current_reason = '')", models.InstanceStateNormal)) }