mirror of https://github.com/grafana/grafana
feat(alerting): add state persister interface (#80384)
parent
2d49fb6a7a
commit
82638d059f
@ -0,0 +1,17 @@ |
||||
package state |
||||
|
||||
import ( |
||||
"context" |
||||
|
||||
"github.com/benbjohnson/clock" |
||||
"go.opentelemetry.io/otel/trace" |
||||
) |
||||
|
||||
type NoopPersister struct{} |
||||
|
||||
func (n *NoopPersister) Async(_ context.Context, _ *clock.Ticker, _ *cache) {} |
||||
func (n *NoopPersister) Sync(_ context.Context, _ trace.Span, _, _ []StateTransition) {} |
||||
|
||||
func NewNoopPersister() StatePersister { |
||||
return &NoopPersister{} |
||||
} |
@ -0,0 +1,111 @@ |
||||
package state |
||||
|
||||
import ( |
||||
"context" |
||||
"time" |
||||
|
||||
"github.com/benbjohnson/clock" |
||||
"github.com/grafana/dskit/concurrency" |
||||
"go.opentelemetry.io/otel/attribute" |
||||
"go.opentelemetry.io/otel/trace" |
||||
|
||||
"github.com/grafana/grafana/pkg/infra/log" |
||||
ngModels "github.com/grafana/grafana/pkg/services/ngalert/models" |
||||
) |
||||
|
||||
type SyncStatePersister struct { |
||||
log log.Logger |
||||
store InstanceStore |
||||
// doNotSaveNormalState controls whether eval.Normal state is persisted to the database and returned by get methods.
|
||||
doNotSaveNormalState bool |
||||
// maxStateSaveConcurrency controls the number of goroutines (per rule) that can save alert state in parallel.
|
||||
maxStateSaveConcurrency int |
||||
} |
||||
|
||||
func NewSyncStatePersisiter(log log.Logger, cfg ManagerCfg) StatePersister { |
||||
return &SyncStatePersister{ |
||||
log: log, |
||||
store: cfg.InstanceStore, |
||||
doNotSaveNormalState: cfg.DoNotSaveNormalState, |
||||
maxStateSaveConcurrency: cfg.MaxStateSaveConcurrency, |
||||
} |
||||
} |
||||
|
||||
func (a *SyncStatePersister) Async(_ context.Context, _ *clock.Ticker, _ *cache) { |
||||
a.log.Debug("Async: No-Op") |
||||
} |
||||
func (a *SyncStatePersister) Sync(ctx context.Context, span trace.Span, states, staleStates []StateTransition) { |
||||
a.deleteAlertStates(ctx, staleStates) |
||||
if len(staleStates) > 0 { |
||||
span.AddEvent("deleted stale states", trace.WithAttributes( |
||||
attribute.Int64("state_transitions", int64(len(staleStates))), |
||||
)) |
||||
} |
||||
|
||||
a.saveAlertStates(ctx, states...) |
||||
span.AddEvent("updated database") |
||||
} |
||||
|
||||
func (a *SyncStatePersister) deleteAlertStates(ctx context.Context, states []StateTransition) { |
||||
if a.store == nil || len(states) == 0 { |
||||
return |
||||
} |
||||
|
||||
a.log.Debug("Deleting alert states", "count", len(states)) |
||||
toDelete := make([]ngModels.AlertInstanceKey, 0, len(states)) |
||||
|
||||
for _, s := range states { |
||||
key, err := s.GetAlertInstanceKey() |
||||
if err != nil { |
||||
a.log.Error("Failed to delete alert instance with invalid labels", "cacheID", s.CacheID, "error", err) |
||||
continue |
||||
} |
||||
toDelete = append(toDelete, key) |
||||
} |
||||
|
||||
err := a.store.DeleteAlertInstances(ctx, toDelete...) |
||||
if err != nil { |
||||
a.log.Error("Failed to delete stale states", "error", err) |
||||
} |
||||
} |
||||
|
||||
func (a *SyncStatePersister) saveAlertStates(ctx context.Context, states ...StateTransition) { |
||||
if a.store == nil || len(states) == 0 { |
||||
return |
||||
} |
||||
|
||||
saveState := func(ctx context.Context, idx int) error { |
||||
s := states[idx] |
||||
// Do not save normal state to database and remove transition to Normal state but keep mapped states
|
||||
if a.doNotSaveNormalState && IsNormalStateWithNoReason(s.State) && !s.Changed() { |
||||
return nil |
||||
} |
||||
|
||||
key, err := s.GetAlertInstanceKey() |
||||
if err != nil { |
||||
a.log.Error("Failed to create a key for alert state to save it to database. The state will be ignored ", "cacheID", s.CacheID, "error", err, "labels", s.Labels.String()) |
||||
return nil |
||||
} |
||||
instance := ngModels.AlertInstance{ |
||||
AlertInstanceKey: key, |
||||
Labels: ngModels.InstanceLabels(s.Labels), |
||||
CurrentState: ngModels.InstanceStateType(s.State.State.String()), |
||||
CurrentReason: s.StateReason, |
||||
LastEvalTime: s.LastEvaluationTime, |
||||
CurrentStateSince: s.StartsAt, |
||||
CurrentStateEnd: s.EndsAt, |
||||
} |
||||
|
||||
err = a.store.SaveAlertInstance(ctx, instance) |
||||
if err != nil { |
||||
a.log.Error("Failed to save alert state", "labels", s.Labels.String(), "state", s.State, "error", err) |
||||
return nil |
||||
} |
||||
return nil |
||||
} |
||||
|
||||
start := time.Now() |
||||
a.log.Debug("Saving alert states", "count", len(states), "max_state_save_concurrency", a.maxStateSaveConcurrency) |
||||
_ = concurrency.ForEachJob(ctx, len(states), a.maxStateSaveConcurrency, saveState) |
||||
a.log.Debug("Saving alert states done", "count", len(states), "max_state_save_concurrency", a.maxStateSaveConcurrency, "duration", time.Since(start)) |
||||
} |
@ -0,0 +1,103 @@ |
||||
package state |
||||
|
||||
import ( |
||||
"context" |
||||
"fmt" |
||||
"testing" |
||||
|
||||
"github.com/stretchr/testify/assert" |
||||
"github.com/stretchr/testify/require" |
||||
"k8s.io/component-base/tracing" |
||||
|
||||
"github.com/grafana/grafana/pkg/infra/log/logtest" |
||||
"github.com/grafana/grafana/pkg/services/ngalert/eval" |
||||
ngmodels "github.com/grafana/grafana/pkg/services/ngalert/models" |
||||
"github.com/grafana/grafana/pkg/util" |
||||
) |
||||
|
||||
func TestSyncPersister_saveAlertStates(t *testing.T) { |
||||
type stateWithReason struct { |
||||
State eval.State |
||||
Reason string |
||||
} |
||||
create := func(s eval.State, r string) stateWithReason { |
||||
return stateWithReason{ |
||||
State: s, |
||||
Reason: r, |
||||
} |
||||
} |
||||
allStates := [...]stateWithReason{ |
||||
create(eval.Normal, ""), |
||||
create(eval.Normal, eval.NoData.String()), |
||||
create(eval.Normal, eval.Error.String()), |
||||
create(eval.Normal, util.GenerateShortUID()), |
||||
create(eval.Alerting, ""), |
||||
create(eval.Pending, ""), |
||||
create(eval.NoData, ""), |
||||
create(eval.Error, ""), |
||||
} |
||||
|
||||
transitionToKey := map[ngmodels.AlertInstanceKey]StateTransition{} |
||||
transitions := make([]StateTransition, 0) |
||||
for _, fromState := range allStates { |
||||
for i, toState := range allStates { |
||||
tr := StateTransition{ |
||||
State: &State{ |
||||
State: toState.State, |
||||
StateReason: toState.Reason, |
||||
Labels: ngmodels.GenerateAlertLabels(5, fmt.Sprintf("%d--", i)), |
||||
}, |
||||
PreviousState: fromState.State, |
||||
PreviousStateReason: fromState.Reason, |
||||
} |
||||
key, err := tr.GetAlertInstanceKey() |
||||
require.NoError(t, err) |
||||
transitionToKey[key] = tr |
||||
transitions = append(transitions, tr) |
||||
} |
||||
} |
||||
|
||||
t.Run("should save all transitions if doNotSaveNormalState is false", func(t *testing.T) { |
||||
trace := tracing.NewNoopTracerProvider().Tracer("test") |
||||
_, span := trace.Start(context.Background(), "") |
||||
st := &FakeInstanceStore{} |
||||
syncStatePersister := NewSyncStatePersisiter(&logtest.Fake{}, ManagerCfg{ |
||||
InstanceStore: st, |
||||
MaxStateSaveConcurrency: 1, |
||||
}) |
||||
syncStatePersister.Sync(context.Background(), span, transitions, nil) |
||||
savedKeys := map[ngmodels.AlertInstanceKey]ngmodels.AlertInstance{} |
||||
for _, op := range st.RecordedOps { |
||||
saved := op.(ngmodels.AlertInstance) |
||||
savedKeys[saved.AlertInstanceKey] = saved |
||||
} |
||||
assert.Len(t, transitionToKey, len(savedKeys)) |
||||
|
||||
for key, tr := range transitionToKey { |
||||
assert.Containsf(t, savedKeys, key, "state %s (%s) was not saved but should be", tr.State.State, tr.StateReason) |
||||
} |
||||
}) |
||||
|
||||
t.Run("should not save Normal->Normal if doNotSaveNormalState is true", func(t *testing.T) { |
||||
trace := tracing.NewNoopTracerProvider().Tracer("test") |
||||
_, span := trace.Start(context.Background(), "") |
||||
st := &FakeInstanceStore{} |
||||
syncStatePersister := NewSyncStatePersisiter(&logtest.Fake{}, ManagerCfg{ |
||||
InstanceStore: st, |
||||
MaxStateSaveConcurrency: 1, |
||||
}) |
||||
syncStatePersister.Sync(context.Background(), span, transitions, nil) |
||||
|
||||
savedKeys := map[ngmodels.AlertInstanceKey]ngmodels.AlertInstance{} |
||||
for _, op := range st.RecordedOps { |
||||
saved := op.(ngmodels.AlertInstance) |
||||
savedKeys[saved.AlertInstanceKey] = saved |
||||
} |
||||
for key, tr := range transitionToKey { |
||||
if tr.State.State == eval.Normal && tr.StateReason == "" && tr.PreviousState == eval.Normal && tr.PreviousStateReason == "" { |
||||
continue |
||||
} |
||||
assert.Containsf(t, savedKeys, key, "state %s (%s) was not saved but should be", tr.State.State, tr.StateReason) |
||||
} |
||||
}) |
||||
} |
Loading…
Reference in new issue