Alerting: SyncRulePersister for alert rule state

pull/94509/head
Alexander Akhmetov 9 months ago
parent 35a15a11df
commit 8486713ea7
No known key found for this signature in database
GPG Key ID: A5A8947133B1B31B
  1. 1
      pkg/services/ngalert/models/instance.go
  2. 8
      pkg/services/ngalert/models/testing.go
  3. 13
      pkg/services/ngalert/state/manager.go
  4. 5
      pkg/services/ngalert/state/persister_async.go
  5. 4
      pkg/services/ngalert/state/persister_noop.go
  6. 85
      pkg/services/ngalert/state/persister_rule_sync.go
  7. 4
      pkg/services/ngalert/state/persister_sync.go
  8. 4
      pkg/services/ngalert/store/instance_database.go

@ -55,6 +55,7 @@ func (i InstanceStateType) IsValid() bool {
type ListAlertInstancesQuery struct {
RuleUID string
RuleOrgID int64 `json:"-"`
RuleGroup string
}
// ValidateAlertInstance validates that the alert instance contains an alert rule id,

@ -587,14 +587,6 @@ func GenerateRuleKey(orgID int64) AlertRuleKey {
}
}
// GenerateRuleKeyWithGroup generates a random alert rule key with group
func GenerateRuleKeyWithGroup(orgID int64) AlertRuleKeyWithGroup {
return AlertRuleKeyWithGroup{
AlertRuleKey: GenerateRuleKey(orgID),
RuleGroup: util.GenerateShortUID(),
}
}
// GenerateGroupKey generates a random group key
func GenerateGroupKey(orgID int64) AlertRuleGroupKey {
return AlertRuleGroupKey{

@ -33,6 +33,7 @@ type AlertInstanceManager interface {
type StatePersister interface {
Async(ctx context.Context, cache *cache)
Sync(ctx context.Context, span trace.Span, states StateTransitions)
SyncRule(ctx context.Context, span trace.Span, ruleKey ngModels.AlertRuleKeyWithGroup, states StateTransitions)
}
// Sender is an optional callback intended for sending the states to an alertmanager.
@ -57,7 +58,8 @@ type Manager struct {
applyNoDataAndErrorToAllStates bool
rulesPerRuleGroupLimit int64
persister StatePersister
persister StatePersister
useRuleStatePersister bool
}
type ManagerCfg struct {
@ -75,6 +77,8 @@ type ManagerCfg struct {
// to all states when corresponding execution in the rule definition is set to either `Alerting` or `OK`
ApplyNoDataAndErrorToAllStates bool
RulesPerRuleGroupLimit int64
// If true, then SyncRule method of the StatePersister is called
UseRuleStatePersister bool
DisableExecution bool
@ -108,6 +112,7 @@ func NewManager(cfg ManagerCfg, statePersister StatePersister) *Manager {
applyNoDataAndErrorToAllStates: cfg.ApplyNoDataAndErrorToAllStates,
rulesPerRuleGroupLimit: cfg.RulesPerRuleGroupLimit,
persister: statePersister,
useRuleStatePersister: cfg.UseRuleStatePersister,
tracer: cfg.Tracer,
}
@ -347,7 +352,11 @@ func (st *Manager) ProcessEvalResults(
statesToSend = st.updateLastSentAt(allChanges, evaluatedAt)
}
st.persister.Sync(ctx, span, allChanges)
if st.useRuleStatePersister {
st.persister.SyncRule(ctx, span, alertRule.GetKeyWithGroup(), allChanges)
} else {
st.persister.Sync(ctx, span, allChanges)
}
if st.historian != nil {
st.historian.Record(ctx, history_model.NewRuleMeta(alertRule, logger), allChanges)
}

@ -9,6 +9,7 @@ import (
"github.com/grafana/grafana/pkg/infra/log"
"github.com/grafana/grafana/pkg/services/ngalert/metrics"
"github.com/grafana/grafana/pkg/services/ngalert/models"
)
type AsyncStatePersister struct {
@ -67,3 +68,7 @@ func (a *AsyncStatePersister) fullSync(ctx context.Context, cache *cache) error
func (a *AsyncStatePersister) Sync(_ context.Context, _ trace.Span, _ StateTransitions) {
a.log.Debug("Sync: No-Op")
}
func (a *AsyncStatePersister) SyncRule(_ context.Context, _ trace.Span, _ models.AlertRuleKeyWithGroup, _ StateTransitions) {
a.log.Debug("SyncRule: No-Op")
}

@ -4,12 +4,16 @@ import (
"context"
"go.opentelemetry.io/otel/trace"
"github.com/grafana/grafana/pkg/services/ngalert/models"
)
type NoopPersister struct{}
func (n *NoopPersister) Async(_ context.Context, _ *cache) {}
func (n *NoopPersister) Sync(_ context.Context, _ trace.Span, _ StateTransitions) {}
func (n *NoopPersister) SyncRule(_ context.Context, _ trace.Span, _ models.AlertRuleKeyWithGroup, _ StateTransitions) {
}
func NewNoopPersister() StatePersister {
return &NoopPersister{}

@ -0,0 +1,85 @@
package state
import (
"context"
"time"
"go.opentelemetry.io/otel/trace"
"github.com/grafana/grafana/pkg/infra/log"
ngModels "github.com/grafana/grafana/pkg/services/ngalert/models"
)
type SyncRuleStatePersister struct {
log log.Logger
store InstanceStore
// doNotSaveNormalState controls whether eval.Normal state is persisted to the database and returned by get methods.
doNotSaveNormalState bool
}
func NewSyncRuleStatePersisiter(log log.Logger, cfg ManagerCfg) StatePersister {
return &SyncRuleStatePersister{
log: log,
store: cfg.InstanceStore,
doNotSaveNormalState: cfg.DoNotSaveNormalState,
}
}
func (a *SyncRuleStatePersister) Async(_ context.Context, _ *cache) {
a.log.Debug("Async: No-Op")
}
func (a *SyncRuleStatePersister) Sync(ctx context.Context, span trace.Span, allStates StateTransitions) {
a.log.Debug("Sync: No-Op")
}
// SyncRule persists the state transitions of the rule to the database
func (a *SyncRuleStatePersister) SyncRule(ctx context.Context, span trace.Span, ruleKey ngModels.AlertRuleKeyWithGroup, states StateTransitions) {
if a.store == nil || len(states) == 0 {
return
}
logger := a.log.FromContext(ctx)
instancesToSave := make([]ngModels.AlertInstance, 0, len(states))
for _, s := range states {
if s.IsStale() {
continue
}
if a.doNotSaveNormalState && IsNormalStateWithNoReason(s.State) && !s.Changed() {
continue
}
key, err := s.GetAlertInstanceKey()
if err != nil {
logger.Error("Failed to create a key for alert state to save it to database. The state will be ignored ", "cacheID", s.CacheID, "error", err, "labels", s.Labels.String())
continue
}
instance := ngModels.AlertInstance{
AlertInstanceKey: key,
Labels: ngModels.InstanceLabels(s.Labels),
CurrentState: ngModels.InstanceStateType(s.State.State.String()),
CurrentReason: s.StateReason,
LastEvalTime: s.LastEvaluationTime,
CurrentStateSince: s.StartsAt,
CurrentStateEnd: s.EndsAt,
ResolvedAt: s.ResolvedAt,
LastSentAt: s.LastSentAt,
ResultFingerprint: s.ResultFingerprint.String(),
}
instancesToSave = append(instancesToSave, instance)
}
start := time.Now()
logger.Debug("Saving alert states", "count", len(instancesToSave))
err := a.store.SaveAlertInstancesForRule(ctx, ruleKey, instancesToSave)
if err != nil {
logger.Error("Failed to save alert rule state", "error", err, "duration", time.Since(start))
return
}
logger.Debug("Saving alert states done", "count", len(instancesToSave), "duration", time.Since(start))
span.AddEvent("updated database")
}

@ -34,6 +34,10 @@ func (a *SyncStatePersister) Async(_ context.Context, _ *cache) {
a.log.Debug("Async: No-Op")
}
func (a *SyncStatePersister) SyncRule(_ context.Context, _ trace.Span, _ ngModels.AlertRuleKeyWithGroup, _ StateTransitions) {
a.log.Debug("SyncRule: No-Op")
}
// Sync persists the state transitions to the database. It deletes stale states and saves the current states.
func (a *SyncStatePersister) Sync(ctx context.Context, span trace.Span, allStates StateTransitions) {
staleStates := allStates.StaleStates()

@ -32,6 +32,10 @@ func (st DBstore) ListAlertInstances(ctx context.Context, cmd *models.ListAlertI
if cmd.RuleUID != "" {
addToQuery(` AND rule_uid = ?`, cmd.RuleUID)
}
if cmd.RuleGroup != "" {
st.Logger.Warn("ListAlertInstancesQuery.RuleGroup filter is not supported in alerting DB store")
}
if st.FeatureToggles.IsEnabled(ctx, featuremgmt.FlagAlertingNoNormalState) {
s.WriteString(fmt.Sprintf(" AND NOT (current_state = '%s' AND current_reason = '')", models.InstanceStateNormal))
}

Loading…
Cancel
Save