mirror of https://github.com/grafana/grafana
feat(alerting): add state persister interface (#80384)
parent
2d49fb6a7a
commit
82638d059f
@ -0,0 +1,17 @@ |
|||||||
|
package state |
||||||
|
|
||||||
|
import ( |
||||||
|
"context" |
||||||
|
|
||||||
|
"github.com/benbjohnson/clock" |
||||||
|
"go.opentelemetry.io/otel/trace" |
||||||
|
) |
||||||
|
|
||||||
|
type NoopPersister struct{} |
||||||
|
|
||||||
|
func (n *NoopPersister) Async(_ context.Context, _ *clock.Ticker, _ *cache) {} |
||||||
|
func (n *NoopPersister) Sync(_ context.Context, _ trace.Span, _, _ []StateTransition) {} |
||||||
|
|
||||||
|
func NewNoopPersister() StatePersister { |
||||||
|
return &NoopPersister{} |
||||||
|
} |
@ -0,0 +1,111 @@ |
|||||||
|
package state |
||||||
|
|
||||||
|
import ( |
||||||
|
"context" |
||||||
|
"time" |
||||||
|
|
||||||
|
"github.com/benbjohnson/clock" |
||||||
|
"github.com/grafana/dskit/concurrency" |
||||||
|
"go.opentelemetry.io/otel/attribute" |
||||||
|
"go.opentelemetry.io/otel/trace" |
||||||
|
|
||||||
|
"github.com/grafana/grafana/pkg/infra/log" |
||||||
|
ngModels "github.com/grafana/grafana/pkg/services/ngalert/models" |
||||||
|
) |
||||||
|
|
||||||
|
type SyncStatePersister struct { |
||||||
|
log log.Logger |
||||||
|
store InstanceStore |
||||||
|
// doNotSaveNormalState controls whether eval.Normal state is persisted to the database and returned by get methods.
|
||||||
|
doNotSaveNormalState bool |
||||||
|
// maxStateSaveConcurrency controls the number of goroutines (per rule) that can save alert state in parallel.
|
||||||
|
maxStateSaveConcurrency int |
||||||
|
} |
||||||
|
|
||||||
|
func NewSyncStatePersisiter(log log.Logger, cfg ManagerCfg) StatePersister { |
||||||
|
return &SyncStatePersister{ |
||||||
|
log: log, |
||||||
|
store: cfg.InstanceStore, |
||||||
|
doNotSaveNormalState: cfg.DoNotSaveNormalState, |
||||||
|
maxStateSaveConcurrency: cfg.MaxStateSaveConcurrency, |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
func (a *SyncStatePersister) Async(_ context.Context, _ *clock.Ticker, _ *cache) { |
||||||
|
a.log.Debug("Async: No-Op") |
||||||
|
} |
||||||
|
func (a *SyncStatePersister) Sync(ctx context.Context, span trace.Span, states, staleStates []StateTransition) { |
||||||
|
a.deleteAlertStates(ctx, staleStates) |
||||||
|
if len(staleStates) > 0 { |
||||||
|
span.AddEvent("deleted stale states", trace.WithAttributes( |
||||||
|
attribute.Int64("state_transitions", int64(len(staleStates))), |
||||||
|
)) |
||||||
|
} |
||||||
|
|
||||||
|
a.saveAlertStates(ctx, states...) |
||||||
|
span.AddEvent("updated database") |
||||||
|
} |
||||||
|
|
||||||
|
func (a *SyncStatePersister) deleteAlertStates(ctx context.Context, states []StateTransition) { |
||||||
|
if a.store == nil || len(states) == 0 { |
||||||
|
return |
||||||
|
} |
||||||
|
|
||||||
|
a.log.Debug("Deleting alert states", "count", len(states)) |
||||||
|
toDelete := make([]ngModels.AlertInstanceKey, 0, len(states)) |
||||||
|
|
||||||
|
for _, s := range states { |
||||||
|
key, err := s.GetAlertInstanceKey() |
||||||
|
if err != nil { |
||||||
|
a.log.Error("Failed to delete alert instance with invalid labels", "cacheID", s.CacheID, "error", err) |
||||||
|
continue |
||||||
|
} |
||||||
|
toDelete = append(toDelete, key) |
||||||
|
} |
||||||
|
|
||||||
|
err := a.store.DeleteAlertInstances(ctx, toDelete...) |
||||||
|
if err != nil { |
||||||
|
a.log.Error("Failed to delete stale states", "error", err) |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
func (a *SyncStatePersister) saveAlertStates(ctx context.Context, states ...StateTransition) { |
||||||
|
if a.store == nil || len(states) == 0 { |
||||||
|
return |
||||||
|
} |
||||||
|
|
||||||
|
saveState := func(ctx context.Context, idx int) error { |
||||||
|
s := states[idx] |
||||||
|
// Do not save normal state to database and remove transition to Normal state but keep mapped states
|
||||||
|
if a.doNotSaveNormalState && IsNormalStateWithNoReason(s.State) && !s.Changed() { |
||||||
|
return nil |
||||||
|
} |
||||||
|
|
||||||
|
key, err := s.GetAlertInstanceKey() |
||||||
|
if err != nil { |
||||||
|
a.log.Error("Failed to create a key for alert state to save it to database. The state will be ignored ", "cacheID", s.CacheID, "error", err, "labels", s.Labels.String()) |
||||||
|
return nil |
||||||
|
} |
||||||
|
instance := ngModels.AlertInstance{ |
||||||
|
AlertInstanceKey: key, |
||||||
|
Labels: ngModels.InstanceLabels(s.Labels), |
||||||
|
CurrentState: ngModels.InstanceStateType(s.State.State.String()), |
||||||
|
CurrentReason: s.StateReason, |
||||||
|
LastEvalTime: s.LastEvaluationTime, |
||||||
|
CurrentStateSince: s.StartsAt, |
||||||
|
CurrentStateEnd: s.EndsAt, |
||||||
|
} |
||||||
|
|
||||||
|
err = a.store.SaveAlertInstance(ctx, instance) |
||||||
|
if err != nil { |
||||||
|
a.log.Error("Failed to save alert state", "labels", s.Labels.String(), "state", s.State, "error", err) |
||||||
|
return nil |
||||||
|
} |
||||||
|
return nil |
||||||
|
} |
||||||
|
|
||||||
|
start := time.Now() |
||||||
|
a.log.Debug("Saving alert states", "count", len(states), "max_state_save_concurrency", a.maxStateSaveConcurrency) |
||||||
|
_ = concurrency.ForEachJob(ctx, len(states), a.maxStateSaveConcurrency, saveState) |
||||||
|
a.log.Debug("Saving alert states done", "count", len(states), "max_state_save_concurrency", a.maxStateSaveConcurrency, "duration", time.Since(start)) |
||||||
|
} |
@ -0,0 +1,103 @@ |
|||||||
|
package state |
||||||
|
|
||||||
|
import ( |
||||||
|
"context" |
||||||
|
"fmt" |
||||||
|
"testing" |
||||||
|
|
||||||
|
"github.com/stretchr/testify/assert" |
||||||
|
"github.com/stretchr/testify/require" |
||||||
|
"k8s.io/component-base/tracing" |
||||||
|
|
||||||
|
"github.com/grafana/grafana/pkg/infra/log/logtest" |
||||||
|
"github.com/grafana/grafana/pkg/services/ngalert/eval" |
||||||
|
ngmodels "github.com/grafana/grafana/pkg/services/ngalert/models" |
||||||
|
"github.com/grafana/grafana/pkg/util" |
||||||
|
) |
||||||
|
|
||||||
|
func TestSyncPersister_saveAlertStates(t *testing.T) { |
||||||
|
type stateWithReason struct { |
||||||
|
State eval.State |
||||||
|
Reason string |
||||||
|
} |
||||||
|
create := func(s eval.State, r string) stateWithReason { |
||||||
|
return stateWithReason{ |
||||||
|
State: s, |
||||||
|
Reason: r, |
||||||
|
} |
||||||
|
} |
||||||
|
allStates := [...]stateWithReason{ |
||||||
|
create(eval.Normal, ""), |
||||||
|
create(eval.Normal, eval.NoData.String()), |
||||||
|
create(eval.Normal, eval.Error.String()), |
||||||
|
create(eval.Normal, util.GenerateShortUID()), |
||||||
|
create(eval.Alerting, ""), |
||||||
|
create(eval.Pending, ""), |
||||||
|
create(eval.NoData, ""), |
||||||
|
create(eval.Error, ""), |
||||||
|
} |
||||||
|
|
||||||
|
transitionToKey := map[ngmodels.AlertInstanceKey]StateTransition{} |
||||||
|
transitions := make([]StateTransition, 0) |
||||||
|
for _, fromState := range allStates { |
||||||
|
for i, toState := range allStates { |
||||||
|
tr := StateTransition{ |
||||||
|
State: &State{ |
||||||
|
State: toState.State, |
||||||
|
StateReason: toState.Reason, |
||||||
|
Labels: ngmodels.GenerateAlertLabels(5, fmt.Sprintf("%d--", i)), |
||||||
|
}, |
||||||
|
PreviousState: fromState.State, |
||||||
|
PreviousStateReason: fromState.Reason, |
||||||
|
} |
||||||
|
key, err := tr.GetAlertInstanceKey() |
||||||
|
require.NoError(t, err) |
||||||
|
transitionToKey[key] = tr |
||||||
|
transitions = append(transitions, tr) |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
t.Run("should save all transitions if doNotSaveNormalState is false", func(t *testing.T) { |
||||||
|
trace := tracing.NewNoopTracerProvider().Tracer("test") |
||||||
|
_, span := trace.Start(context.Background(), "") |
||||||
|
st := &FakeInstanceStore{} |
||||||
|
syncStatePersister := NewSyncStatePersisiter(&logtest.Fake{}, ManagerCfg{ |
||||||
|
InstanceStore: st, |
||||||
|
MaxStateSaveConcurrency: 1, |
||||||
|
}) |
||||||
|
syncStatePersister.Sync(context.Background(), span, transitions, nil) |
||||||
|
savedKeys := map[ngmodels.AlertInstanceKey]ngmodels.AlertInstance{} |
||||||
|
for _, op := range st.RecordedOps { |
||||||
|
saved := op.(ngmodels.AlertInstance) |
||||||
|
savedKeys[saved.AlertInstanceKey] = saved |
||||||
|
} |
||||||
|
assert.Len(t, transitionToKey, len(savedKeys)) |
||||||
|
|
||||||
|
for key, tr := range transitionToKey { |
||||||
|
assert.Containsf(t, savedKeys, key, "state %s (%s) was not saved but should be", tr.State.State, tr.StateReason) |
||||||
|
} |
||||||
|
}) |
||||||
|
|
||||||
|
t.Run("should not save Normal->Normal if doNotSaveNormalState is true", func(t *testing.T) { |
||||||
|
trace := tracing.NewNoopTracerProvider().Tracer("test") |
||||||
|
_, span := trace.Start(context.Background(), "") |
||||||
|
st := &FakeInstanceStore{} |
||||||
|
syncStatePersister := NewSyncStatePersisiter(&logtest.Fake{}, ManagerCfg{ |
||||||
|
InstanceStore: st, |
||||||
|
MaxStateSaveConcurrency: 1, |
||||||
|
}) |
||||||
|
syncStatePersister.Sync(context.Background(), span, transitions, nil) |
||||||
|
|
||||||
|
savedKeys := map[ngmodels.AlertInstanceKey]ngmodels.AlertInstance{} |
||||||
|
for _, op := range st.RecordedOps { |
||||||
|
saved := op.(ngmodels.AlertInstance) |
||||||
|
savedKeys[saved.AlertInstanceKey] = saved |
||||||
|
} |
||||||
|
for key, tr := range transitionToKey { |
||||||
|
if tr.State.State == eval.Normal && tr.StateReason == "" && tr.PreviousState == eval.Normal && tr.PreviousStateReason == "" { |
||||||
|
continue |
||||||
|
} |
||||||
|
assert.Containsf(t, savedKeys, key, "state %s (%s) was not saved but should be", tr.State.State, tr.StateReason) |
||||||
|
} |
||||||
|
}) |
||||||
|
} |
Loading…
Reference in new issue