Alerting: Refactor State Transition (part 2 of n) (#99985)

* split create to create and patch and move to state

patch will be refactored further

* move setNextState to state transition

* move tests

* split tests for patch function
pull/100641/head
Yuri Tseretyan 3 months ago committed by GitHub
parent 71f97f380d
commit 9dd75aee32
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
  1. 75
      pkg/services/ngalert/state/cache.go
  2. 311
      pkg/services/ngalert/state/cache_test.go
  3. 128
      pkg/services/ngalert/state/manager.go
  4. 174
      pkg/services/ngalert/state/state.go
  5. 8
      pkg/services/ngalert/state/state_bench_test.go
  6. 331
      pkg/services/ngalert/state/state_test.go

@ -143,81 +143,6 @@ func expandAnnotationsAndLabels(ctx context.Context, log log.Logger, alertRule *
return lbs, annotations
}
func (c *cache) create(ctx context.Context, log log.Logger, alertRule *ngModels.AlertRule, result eval.Result, extraLabels data.Labels, externalURL *url.URL) *State {
lbs, annotations := expandAnnotationsAndLabels(ctx, log, alertRule, result, extraLabels, externalURL)
cacheID := lbs.Fingerprint()
// For new states, we set StartsAt & EndsAt to EvaluatedAt as this is the
// expected value for a Normal state during state transition.
newState := State{
OrgID: alertRule.OrgID,
AlertRuleUID: alertRule.UID,
CacheID: cacheID,
State: eval.Normal,
StateReason: "",
ResultFingerprint: result.Instance.Fingerprint(), // remember original result fingerprint
LatestResult: nil,
Error: nil,
Image: nil,
Annotations: annotations,
Labels: lbs,
Values: nil,
StartsAt: result.EvaluatedAt,
EndsAt: result.EvaluatedAt,
ResolvedAt: nil,
LastSentAt: nil,
LastEvaluationString: "",
LastEvaluationTime: result.EvaluatedAt,
EvaluationDuration: result.EvaluationDuration,
}
existingState := c.get(alertRule.OrgID, alertRule.UID, cacheID)
if existingState == nil {
return &newState
}
// if there is existing state, copy over the current values that may be needed to determine the final state.
// TODO remove some unnecessary assignments below because they are overridden in setNextState
newState.State = existingState.State
newState.StateReason = existingState.StateReason
newState.Image = existingState.Image
newState.LatestResult = existingState.LatestResult
newState.Error = existingState.Error
newState.Values = existingState.Values
newState.LastEvaluationString = existingState.LastEvaluationString
newState.StartsAt = existingState.StartsAt
newState.EndsAt = existingState.EndsAt
newState.ResolvedAt = existingState.ResolvedAt
newState.LastSentAt = existingState.LastSentAt
// Annotations can change over time, however we also want to maintain
// certain annotations across evaluations
for key := range ngModels.InternalAnnotationNameSet { // Changing in
value, ok := existingState.Annotations[key]
if !ok {
continue
}
// If the annotation is not present then it should be copied from
// the current state to the new state
if _, ok = newState.Annotations[key]; !ok {
newState.Annotations[key] = value
}
}
// if the current state is "data source error" then it may have additional labels that may not exist in the new state.
// See https://github.com/grafana/grafana/blob/c7fdf8ce706c2c9d438f5e6eabd6e580bac4946b/pkg/services/ngalert/state/state.go#L161-L163
// copy known labels over to the new instance, it can help reduce flapping
// TODO fix this?
if existingState.State == eval.Error && result.State == eval.Error {
setIfExist := func(lbl string) {
if v, ok := existingState.Labels[lbl]; ok {
newState.Labels[lbl] = v
}
}
setIfExist("datasource_uid")
setIfExist("ref_id")
}
return &newState
}
// expand returns the expanded templates of all annotations or labels for the template data.
// If a template cannot be expanded due to an error in the template the original template is
// maintained and an error is added to the multierror. All errors in the multierror are

@ -3,15 +3,11 @@ package state
import (
"context"
"errors"
"fmt"
"math/rand"
"net/url"
"testing"
"time"
"github.com/google/uuid"
"github.com/grafana/grafana-plugin-sdk-go/data"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
"github.com/grafana/grafana/pkg/infra/log"
@ -118,313 +114,6 @@ func Test_expand(t *testing.T) {
})
}
func Test_create(t *testing.T) {
url := &url.URL{
Scheme: "http",
Host: "localhost:3000",
Path: "/test",
}
l := log.New("test")
c := newCache()
gen := models.RuleGen
generateRule := gen.With(gen.WithNotEmptyLabels(5, "rule-")).GenerateRef
t.Run("should combine all labels", func(t *testing.T) {
rule := generateRule()
extraLabels := models.GenerateAlertLabels(5, "extra-")
result := eval.Result{
Instance: models.GenerateAlertLabels(5, "result-"),
}
state := c.create(context.Background(), l, rule, result, extraLabels, url)
for key, expected := range extraLabels {
require.Equal(t, expected, state.Labels[key])
}
assert.Len(t, state.Labels, len(extraLabels)+len(rule.Labels)+len(result.Instance))
for key, expected := range extraLabels {
assert.Equal(t, expected, state.Labels[key])
}
for key, expected := range rule.Labels {
assert.Equal(t, expected, state.Labels[key])
}
for key, expected := range result.Instance {
assert.Equal(t, expected, state.Labels[key])
}
})
t.Run("extra labels should take precedence over rule and result labels", func(t *testing.T) {
rule := generateRule()
extraLabels := models.GenerateAlertLabels(2, "extra-")
result := eval.Result{
Instance: models.GenerateAlertLabels(5, "result-"),
}
for key := range extraLabels {
rule.Labels[key] = "rule-" + util.GenerateShortUID()
result.Instance[key] = "result-" + util.GenerateShortUID()
}
state := c.create(context.Background(), l, rule, result, extraLabels, url)
for key, expected := range extraLabels {
require.Equal(t, expected, state.Labels[key])
}
})
t.Run("rule labels should take precedence over result labels", func(t *testing.T) {
rule := generateRule()
extraLabels := models.GenerateAlertLabels(2, "extra-")
result := eval.Result{
Instance: models.GenerateAlertLabels(5, "result-"),
}
for key := range rule.Labels {
result.Instance[key] = "result-" + util.GenerateShortUID()
}
state := c.create(context.Background(), l, rule, result, extraLabels, url)
for key, expected := range rule.Labels {
require.Equal(t, expected, state.Labels[key])
}
})
t.Run("rule labels should be able to be expanded with result and extra labels", func(t *testing.T) {
result := eval.Result{
Instance: models.GenerateAlertLabels(5, "result-"),
}
rule := generateRule()
extraLabels := models.GenerateAlertLabels(2, "extra-")
labelTemplates := make(data.Labels)
for key := range extraLabels {
labelTemplates["rule-"+key] = fmt.Sprintf("{{ with (index .Labels \"%s\") }}{{.}}{{end}}", key)
}
for key := range result.Instance {
labelTemplates["rule-"+key] = fmt.Sprintf("{{ with (index .Labels \"%s\") }}{{.}}{{end}}", key)
}
rule.Labels = labelTemplates
state := c.create(context.Background(), l, rule, result, extraLabels, url)
for key, expected := range extraLabels {
assert.Equal(t, expected, state.Labels["rule-"+key])
}
for key, expected := range result.Instance {
assert.Equal(t, expected, state.Labels["rule-"+key])
}
})
t.Run("rule annotations should be able to be expanded with result and extra labels", func(t *testing.T) {
result := eval.Result{
Instance: models.GenerateAlertLabels(5, "result-"),
}
rule := generateRule()
extraLabels := models.GenerateAlertLabels(2, "extra-")
annotationTemplates := make(data.Labels)
for key := range extraLabels {
annotationTemplates["rule-"+key] = fmt.Sprintf("{{ with (index .Labels \"%s\") }}{{.}}{{end}}", key)
}
for key := range result.Instance {
annotationTemplates["rule-"+key] = fmt.Sprintf("{{ with (index .Labels \"%s\") }}{{.}}{{end}}", key)
}
rule.Annotations = annotationTemplates
state := c.create(context.Background(), l, rule, result, extraLabels, url)
for key, expected := range extraLabels {
assert.Equal(t, expected, state.Annotations["rule-"+key])
}
for key, expected := range result.Instance {
assert.Equal(t, expected, state.Annotations["rule-"+key])
}
})
t.Run("when result labels collide with system labels from LabelsUserCannotSpecify", func(t *testing.T) {
result := eval.Result{
Instance: models.GenerateAlertLabels(5, "result-"),
}
m := models.LabelsUserCannotSpecify
t.Cleanup(func() {
models.LabelsUserCannotSpecify = m
})
models.LabelsUserCannotSpecify = map[string]struct{}{
"__label1__": {},
"label2__": {},
"__label3": {},
"label4": {},
}
result.Instance["__label1__"] = uuid.NewString()
result.Instance["label2__"] = uuid.NewString()
result.Instance["__label3"] = uuid.NewString()
result.Instance["label4"] = uuid.NewString()
rule := generateRule()
state := c.create(context.Background(), l, rule, result, nil, url)
for key := range models.LabelsUserCannotSpecify {
assert.NotContains(t, state.Labels, key)
}
assert.Contains(t, state.Labels, "label1")
assert.Equal(t, state.Labels["label1"], result.Instance["__label1__"])
assert.Contains(t, state.Labels, "label2")
assert.Equal(t, state.Labels["label2"], result.Instance["label2__"])
assert.Contains(t, state.Labels, "label3")
assert.Equal(t, state.Labels["label3"], result.Instance["__label3"])
assert.Contains(t, state.Labels, "label4_user")
assert.Equal(t, state.Labels["label4_user"], result.Instance["label4"])
t.Run("should drop label if renamed collides with existing", func(t *testing.T) {
result.Instance["label1"] = uuid.NewString()
result.Instance["label1_user"] = uuid.NewString()
result.Instance["label4_user"] = uuid.NewString()
state = c.create(context.Background(), l, rule, result, nil, url)
assert.NotContains(t, state.Labels, "__label1__")
assert.Contains(t, state.Labels, "label1")
assert.Equal(t, state.Labels["label1"], result.Instance["label1"])
assert.Equal(t, state.Labels["label1_user"], result.Instance["label1_user"])
assert.NotContains(t, state.Labels, "label4")
assert.Equal(t, state.Labels["label4_user"], result.Instance["label4_user"])
})
})
t.Run("creates a state with preset fields if there is no current state", func(t *testing.T) {
rule := generateRule()
extraLabels := models.GenerateAlertLabels(2, "extra-")
result := eval.Result{
Instance: models.GenerateAlertLabels(5, "result-"),
}
expectedLbl, expectedAnn := expandAnnotationsAndLabels(context.Background(), l, rule, result, extraLabels, url)
state := c.create(context.Background(), l, rule, result, extraLabels, url)
assert.Equal(t, rule.OrgID, state.OrgID)
assert.Equal(t, rule.UID, state.AlertRuleUID)
assert.Equal(t, state.Labels.Fingerprint(), state.CacheID)
assert.Equal(t, result.State, state.State)
assert.Equal(t, "", state.StateReason)
assert.Equal(t, result.Instance.Fingerprint(), state.ResultFingerprint)
assert.Nil(t, state.LatestResult)
assert.Nil(t, state.Error)
assert.Nil(t, state.Image)
assert.EqualValues(t, expectedAnn, state.Annotations)
assert.EqualValues(t, expectedLbl, state.Labels)
assert.Nil(t, state.Values)
assert.Equal(t, result.EvaluatedAt, state.StartsAt)
assert.Equal(t, result.EvaluatedAt, state.EndsAt)
assert.Nil(t, state.ResolvedAt)
assert.Nil(t, state.LastSentAt)
assert.Equal(t, "", state.LastEvaluationString)
assert.Equal(t, result.EvaluatedAt, state.LastEvaluationTime)
assert.Equal(t, result.EvaluationDuration, state.EvaluationDuration)
})
t.Run("it populates some fields from the current state if it exists", func(t *testing.T) {
rule := generateRule()
extraLabels := models.GenerateAlertLabels(2, "extra-")
result := eval.Result{
Instance: models.GenerateAlertLabels(5, "result-"),
}
expectedLbl, expectedAnn := expandAnnotationsAndLabels(context.Background(), l, rule, result, extraLabels, url)
current := randomSate(rule.GetKey())
current.CacheID = expectedLbl.Fingerprint()
c.set(&current)
state := c.create(context.Background(), l, rule, result, extraLabels, url)
assert.Equal(t, rule.OrgID, state.OrgID)
assert.Equal(t, rule.UID, state.AlertRuleUID)
assert.Equal(t, state.Labels.Fingerprint(), state.CacheID)
assert.Equal(t, result.Instance.Fingerprint(), state.ResultFingerprint)
assert.EqualValues(t, expectedAnn, state.Annotations)
assert.EqualValues(t, expectedLbl, state.Labels)
assert.Equal(t, result.EvaluatedAt, state.LastEvaluationTime)
assert.Equal(t, result.EvaluationDuration, state.EvaluationDuration)
assert.Equal(t, current.State, state.State)
assert.Equal(t, current.StateReason, state.StateReason)
assert.Equal(t, current.Image, state.Image)
assert.Equal(t, current.LatestResult, state.LatestResult)
assert.Equal(t, current.Error, state.Error)
assert.Equal(t, current.Values, state.Values)
assert.Equal(t, current.StartsAt, state.StartsAt)
assert.Equal(t, current.EndsAt, state.EndsAt)
assert.Equal(t, current.ResolvedAt, state.ResolvedAt)
assert.Equal(t, current.LastSentAt, state.LastSentAt)
assert.Equal(t, current.LastEvaluationString, state.LastEvaluationString)
t.Run("if result Error and current state is Error it should copy datasource_uid and ref_id labels", func(t *testing.T) {
current = randomSate(rule.GetKey())
current.CacheID = expectedLbl.Fingerprint()
current.State = eval.Error
current.Labels["datasource_uid"] = util.GenerateShortUID()
current.Labels["ref_id"] = util.GenerateShortUID()
c.set(&current)
result.State = eval.Error
state = c.create(context.Background(), l, rule, result, extraLabels, url)
l := expectedLbl.Copy()
l["datasource_uid"] = current.Labels["datasource_uid"]
l["ref_id"] = current.Labels["ref_id"]
assert.Equal(t, current.CacheID, state.CacheID)
assert.EqualValues(t, l, state.Labels)
assert.Equal(t, rule.OrgID, state.OrgID)
assert.Equal(t, rule.UID, state.AlertRuleUID)
assert.Equal(t, result.Instance.Fingerprint(), state.ResultFingerprint)
assert.EqualValues(t, expectedAnn, state.Annotations)
assert.Equal(t, result.EvaluatedAt, state.LastEvaluationTime)
assert.Equal(t, result.EvaluationDuration, state.EvaluationDuration)
assert.Equal(t, current.State, state.State)
assert.Equal(t, current.StateReason, state.StateReason)
assert.Equal(t, current.Image, state.Image)
assert.Equal(t, current.LatestResult, state.LatestResult)
assert.Equal(t, current.Error, state.Error)
assert.Equal(t, current.Values, state.Values)
assert.Equal(t, current.StartsAt, state.StartsAt)
assert.Equal(t, current.EndsAt, state.EndsAt)
assert.Equal(t, current.ResolvedAt, state.ResolvedAt)
assert.Equal(t, current.LastSentAt, state.LastSentAt)
assert.Equal(t, current.LastEvaluationString, state.LastEvaluationString)
})
t.Run("copies system-owned annotations from current state", func(t *testing.T) {
current = randomSate(rule.GetKey())
current.CacheID = expectedLbl.Fingerprint()
current.State = eval.Error
for key := range models.InternalAnnotationNameSet {
current.Annotations[key] = util.GenerateShortUID()
}
c.set(&current)
result.State = eval.Error
state = c.create(context.Background(), l, rule, result, extraLabels, url)
ann := expectedAnn.Copy()
for key := range models.InternalAnnotationNameSet {
ann[key] = current.Annotations[key]
}
assert.EqualValues(t, expectedLbl, state.Labels)
assert.EqualValues(t, ann, state.Annotations)
})
})
}
func Test_mergeLabels(t *testing.T) {
t.Run("merges two maps", func(t *testing.T) {
a := models.GenerateAlertLabels(5, "set1-")

@ -444,9 +444,16 @@ func (st *Manager) setNextStateForRule(ctx context.Context, alertRule *ngModels.
}
transitions := make([]StateTransition, 0, len(results))
for _, result := range results {
currentState := st.cache.create(ctx, logger, alertRule, result, extraLabels, st.externalURL)
s := st.setNextState(alertRule, currentState, result, nil, logger, takeImageFn)
st.cache.set(currentState) // replace the existing state with the new one
newState := newState(ctx, logger, alertRule, result, extraLabels, st.externalURL)
if curState := st.cache.get(alertRule.OrgID, alertRule.UID, newState.CacheID); curState != nil {
patch(newState, curState, result)
}
start := st.clock.Now()
s := newState.transition(alertRule, result, nil, logger, takeImageFn)
if st.metrics != nil {
st.metrics.StateUpdateDuration.Observe(st.clock.Now().Sub(start).Seconds())
}
st.cache.set(newState) // replace the existing state with the new one
transitions = append(transitions, s)
}
return transitions
@ -459,8 +466,12 @@ func (st *Manager) setNextStateForAll(alertRule *ngModels.AlertRule, result eval
states: make(map[data.Fingerprint]*State, len(currentStates)),
}
for _, currentState := range currentStates {
start := st.clock.Now()
newState := currentState.Copy()
t := st.setNextState(alertRule, newState, result, extraAnnotations, logger, takeImageFn)
t := newState.transition(alertRule, result, extraAnnotations, logger, takeImageFn)
if st.metrics != nil {
st.metrics.StateUpdateDuration.Observe(st.clock.Now().Sub(start).Seconds())
}
updated.states[newState.CacheID] = newState
transitions = append(transitions, t)
}
@ -468,115 +479,6 @@ func (st *Manager) setNextStateForAll(alertRule *ngModels.AlertRule, result eval
return transitions
}
// Set the current state based on evaluation results
func (st *Manager) setNextState(alertRule *ngModels.AlertRule, currentState *State, result eval.Result, extraAnnotations data.Labels, logger log.Logger, takeImageFn takeImageFn) StateTransition {
start := st.clock.Now()
currentState.LastEvaluationTime = result.EvaluatedAt
currentState.EvaluationDuration = result.EvaluationDuration
currentState.SetNextValues(result)
currentState.LatestResult = &Evaluation{
EvaluationTime: result.EvaluatedAt,
EvaluationState: result.State,
Values: currentState.Values,
Condition: alertRule.Condition,
}
currentState.LastEvaluationString = result.EvaluationString
oldState := currentState.State
oldReason := currentState.StateReason
// Add the instance to the log context to help correlate log lines for a state
logger = logger.New("instance", result.Instance)
// if the current state is Error but the result is different, then we need o clean up the extra labels
// that were added after the state key was calculated
// https://github.com/grafana/grafana/blob/1df4d332c982dc5e394201bb2ef35b442727ce63/pkg/services/ngalert/state/state.go#L298-L311
// Usually, it happens in the case of classic conditions when the evalResult does not have labels.
//
// This is temporary change to make sure that the labels are not persistent in the state after it was in Error state
// TODO yuri. Remove it when correct Error result with labels is provided
if currentState.State == eval.Error && result.State != eval.Error {
// This is possible because state was updated after the CacheID was calculated.
_, curOk := currentState.Labels["ref_id"]
_, resOk := result.Instance["ref_id"]
if curOk && !resOk {
delete(currentState.Labels, "ref_id")
}
_, curOk = currentState.Labels["datasource_uid"]
_, resOk = result.Instance["datasource_uid"]
if curOk && !resOk {
delete(currentState.Labels, "datasource_uid")
}
}
switch result.State {
case eval.Normal:
logger.Debug("Setting next state", "handler", "resultNormal")
resultNormal(currentState, alertRule, result, logger, "")
case eval.Alerting:
logger.Debug("Setting next state", "handler", "resultAlerting")
resultAlerting(currentState, alertRule, result, logger, "")
case eval.Error:
logger.Debug("Setting next state", "handler", "resultError")
resultError(currentState, alertRule, result, logger)
case eval.NoData:
logger.Debug("Setting next state", "handler", "resultNoData")
resultNoData(currentState, alertRule, result, logger)
case eval.Pending: // we do not emit results with this state
logger.Debug("Ignoring set next state as result is pending")
}
// Set reason iff: result and state are different, reason is not Alerting or Normal
currentState.StateReason = ""
if currentState.State != result.State &&
result.State != eval.Normal &&
result.State != eval.Alerting {
currentState.StateReason = resultStateReason(result, alertRule)
}
// Set Resolved property so the scheduler knows to send a postable alert
// to Alertmanager.
newlyResolved := false
if oldState == eval.Alerting && currentState.State == eval.Normal {
currentState.ResolvedAt = &result.EvaluatedAt
newlyResolved = true
} else if currentState.State != eval.Normal && currentState.State != eval.Pending { // Retain the last resolved time for Normal->Normal and Normal->Pending.
currentState.ResolvedAt = nil
}
if reason := shouldTakeImage(currentState.State, oldState, currentState.Image, newlyResolved); reason != "" {
image := takeImageFn(reason)
if image != nil {
currentState.Image = image
}
}
for key, val := range extraAnnotations {
currentState.Annotations[key] = val
}
nextState := StateTransition{
State: currentState,
PreviousState: oldState,
PreviousStateReason: oldReason,
}
if st.metrics != nil {
st.metrics.StateUpdateDuration.Observe(st.clock.Now().Sub(start).Seconds())
}
return nextState
}
func resultStateReason(result eval.Result, rule *ngModels.AlertRule) string {
if rule.ExecErrState == ngModels.KeepLastErrState || rule.NoDataState == ngModels.KeepLast {
return ngModels.ConcatReasons(result.State.String(), ngModels.StateReasonKeepLast)
}
return result.State.String()
}
func (st *Manager) GetAll(orgID int64) []*State {
allStates := st.cache.getAll(orgID)
return allStates

@ -7,6 +7,7 @@ import (
"fmt"
"maps"
"math"
"net/url"
"strings"
"time"
@ -76,6 +77,35 @@ type State struct {
EvaluationDuration time.Duration
}
func newState(ctx context.Context, log log.Logger, alertRule *models.AlertRule, result eval.Result, extraLabels data.Labels, externalURL *url.URL) *State {
lbs, annotations := expandAnnotationsAndLabels(ctx, log, alertRule, result, extraLabels, externalURL)
cacheID := lbs.Fingerprint()
// For new states, we set StartsAt & EndsAt to EvaluatedAt as this is the
// expected value for a Normal state during state transition.
return &State{
OrgID: alertRule.OrgID,
AlertRuleUID: alertRule.UID,
CacheID: cacheID,
State: eval.Normal,
StateReason: "",
ResultFingerprint: result.Instance.Fingerprint(), // remember original result fingerprint
LatestResult: nil,
Error: nil,
Image: nil,
Annotations: annotations,
Labels: lbs,
Values: nil,
StartsAt: result.EvaluatedAt,
EndsAt: result.EvaluatedAt,
ResolvedAt: nil,
LastSentAt: nil,
LastEvaluationString: "",
LastEvaluationTime: result.EvaluatedAt,
EvaluationDuration: result.EvaluationDuration,
}
}
// Copy creates a shallow copy of the State except for labels and annotations.
func (a *State) Copy() *State {
// Deep copy annotations and labels
@ -664,3 +694,147 @@ func GetRuleExtraLabels(l log.Logger, rule *models.AlertRule, folderTitle string
}
return extraLabels
}
func patch(newState, existingState *State, result eval.Result) {
// if there is existing state, copy over the current values that may be needed to determine the final state.
// TODO remove some unnecessary assignments below because they are overridden in setNextState
newState.State = existingState.State
newState.StateReason = existingState.StateReason
newState.Image = existingState.Image
newState.LatestResult = existingState.LatestResult
newState.Error = existingState.Error
newState.Values = existingState.Values
newState.LastEvaluationString = existingState.LastEvaluationString
newState.StartsAt = existingState.StartsAt
newState.EndsAt = existingState.EndsAt
newState.ResolvedAt = existingState.ResolvedAt
newState.LastSentAt = existingState.LastSentAt
// Annotations can change over time, however we also want to maintain
// certain annotations across evaluations
for key := range models.InternalAnnotationNameSet { // Changing in
value, ok := existingState.Annotations[key]
if !ok {
continue
}
// If the annotation is not present then it should be copied from
// the current state to the new state
if _, ok = newState.Annotations[key]; !ok {
newState.Annotations[key] = value
}
}
// if the current state is "data source error" then it may have additional labels that may not exist in the new state.
// See https://github.com/grafana/grafana/blob/c7fdf8ce706c2c9d438f5e6eabd6e580bac4946b/pkg/services/ngalert/state/state.go#L161-L163
// copy known labels over to the new instance, it can help reduce flapping
// TODO fix this?
if existingState.State == eval.Error && result.State == eval.Error {
setIfExist := func(lbl string) {
if v, ok := existingState.Labels[lbl]; ok {
newState.Labels[lbl] = v
}
}
setIfExist("datasource_uid")
setIfExist("ref_id")
}
}
func (a *State) transition(alertRule *models.AlertRule, result eval.Result, extraAnnotations data.Labels, logger log.Logger, takeImageFn takeImageFn) StateTransition {
a.LastEvaluationTime = result.EvaluatedAt
a.EvaluationDuration = result.EvaluationDuration
a.SetNextValues(result)
a.LatestResult = &Evaluation{
EvaluationTime: result.EvaluatedAt,
EvaluationState: result.State,
Values: a.Values,
Condition: alertRule.Condition,
}
a.LastEvaluationString = result.EvaluationString
oldState := a.State
oldReason := a.StateReason
// Add the instance to the log context to help correlate log lines for a state
logger = logger.New("instance", result.Instance)
// if the current state is Error but the result is different, then we need o clean up the extra labels
// that were added after the state key was calculated
// https://github.com/grafana/grafana/blob/1df4d332c982dc5e394201bb2ef35b442727ce63/pkg/services/ngalert/state/state.go#L298-L311
// Usually, it happens in the case of classic conditions when the evalResult does not have labels.
//
// This is temporary change to make sure that the labels are not persistent in the state after it was in Error state
// TODO yuri. Remove it when correct Error result with labels is provided
if a.State == eval.Error && result.State != eval.Error {
// This is possible because state was updated after the CacheID was calculated.
_, curOk := a.Labels["ref_id"]
_, resOk := result.Instance["ref_id"]
if curOk && !resOk {
delete(a.Labels, "ref_id")
}
_, curOk = a.Labels["datasource_uid"]
_, resOk = result.Instance["datasource_uid"]
if curOk && !resOk {
delete(a.Labels, "datasource_uid")
}
}
switch result.State {
case eval.Normal:
logger.Debug("Setting next state", "handler", "resultNormal")
resultNormal(a, alertRule, result, logger, "")
case eval.Alerting:
logger.Debug("Setting next state", "handler", "resultAlerting")
resultAlerting(a, alertRule, result, logger, "")
case eval.Error:
logger.Debug("Setting next state", "handler", "resultError")
resultError(a, alertRule, result, logger)
case eval.NoData:
logger.Debug("Setting next state", "handler", "resultNoData")
resultNoData(a, alertRule, result, logger)
case eval.Pending: // we do not emit results with this state
logger.Debug("Ignoring set next state as result is pending")
}
// Set reason iff: result and state are different, reason is not Alerting or Normal
a.StateReason = ""
if a.State != result.State &&
result.State != eval.Normal &&
result.State != eval.Alerting {
a.StateReason = resultStateReason(result, alertRule)
}
// Set Resolved property so the scheduler knows to send a postable alert
// to Alertmanager.
newlyResolved := false
if oldState == eval.Alerting && a.State == eval.Normal {
a.ResolvedAt = &result.EvaluatedAt
newlyResolved = true
} else if a.State != eval.Normal && a.State != eval.Pending { // Retain the last resolved time for Normal->Normal and Normal->Pending.
a.ResolvedAt = nil
}
if reason := shouldTakeImage(a.State, oldState, a.Image, newlyResolved); reason != "" {
image := takeImageFn(reason)
if image != nil {
a.Image = image
}
}
for key, val := range extraAnnotations {
a.Annotations[key] = val
}
nextState := StateTransition{
State: a,
PreviousState: oldState,
PreviousStateReason: oldReason,
}
return nextState
}
func resultStateReason(result eval.Result, rule *models.AlertRule) string {
if rule.ExecErrState == models.KeepLastErrState || rule.NoDataState == models.KeepLast {
return models.ConcatReasons(result.State.String(), models.StateReasonKeepLast)
}
return result.State.String()
}

@ -14,7 +14,7 @@ import (
"github.com/grafana/grafana/pkg/services/ngalert/models"
)
func BenchmarkGetOrCreateTest(b *testing.B) {
func BenchmarkCreateAndPatch(b *testing.B) {
cache := newCache()
rule := models.RuleGen.With(func(rule *models.AlertRule) {
for i := 0; i < 2; i++ {
@ -43,7 +43,11 @@ func BenchmarkGetOrCreateTest(b *testing.B) {
// values := make([]int64, count)
b.RunParallel(func(pb *testing.PB) {
for pb.Next() {
_ = cache.create(ctx, log, rule, result, nil, u)
s := newState(ctx, log, rule, result, nil, u)
current := cache.get(rule.OrgID, rule.UID, s.CacheID)
if current == nil {
patch(s, current, result)
}
}
})
}

@ -3,14 +3,17 @@ package state
import (
"context"
"errors"
"fmt"
"math"
"math/rand"
"net/url"
"testing"
"time"
"github.com/benbjohnson/clock"
"github.com/golang/mock/gomock"
"github.com/google/uuid"
"github.com/grafana/grafana-plugin-sdk-go/data"
"github.com/prometheus/common/model"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
@ -801,3 +804,331 @@ func TestGetRuleExtraLabels(t *testing.T) {
})
}
}
func TestNewState(t *testing.T) {
url := &url.URL{
Scheme: "http",
Host: "localhost:3000",
Path: "/test",
}
l := log.New("test")
gen := ngmodels.RuleGen
generateRule := gen.With(gen.WithNotEmptyLabels(5, "rule-")).GenerateRef
t.Run("should combine all labels", func(t *testing.T) {
rule := generateRule()
extraLabels := ngmodels.GenerateAlertLabels(5, "extra-")
result := eval.Result{
Instance: ngmodels.GenerateAlertLabels(5, "result-"),
}
state := newState(context.Background(), l, rule, result, extraLabels, url)
for key, expected := range extraLabels {
require.Equal(t, expected, state.Labels[key])
}
assert.Len(t, state.Labels, len(extraLabels)+len(rule.Labels)+len(result.Instance))
for key, expected := range extraLabels {
assert.Equal(t, expected, state.Labels[key])
}
for key, expected := range rule.Labels {
assert.Equal(t, expected, state.Labels[key])
}
for key, expected := range result.Instance {
assert.Equal(t, expected, state.Labels[key])
}
})
t.Run("extra labels should take precedence over rule and result labels", func(t *testing.T) {
rule := generateRule()
extraLabels := ngmodels.GenerateAlertLabels(2, "extra-")
result := eval.Result{
Instance: ngmodels.GenerateAlertLabels(5, "result-"),
}
for key := range extraLabels {
rule.Labels[key] = "rule-" + util.GenerateShortUID()
result.Instance[key] = "result-" + util.GenerateShortUID()
}
state := newState(context.Background(), l, rule, result, extraLabels, url)
for key, expected := range extraLabels {
require.Equal(t, expected, state.Labels[key])
}
})
t.Run("rule labels should take precedence over result labels", func(t *testing.T) {
rule := generateRule()
extraLabels := ngmodels.GenerateAlertLabels(2, "extra-")
result := eval.Result{
Instance: ngmodels.GenerateAlertLabels(5, "result-"),
}
for key := range rule.Labels {
result.Instance[key] = "result-" + util.GenerateShortUID()
}
state := newState(context.Background(), l, rule, result, extraLabels, url)
for key, expected := range rule.Labels {
require.Equal(t, expected, state.Labels[key])
}
})
t.Run("rule labels should be able to be expanded with result and extra labels", func(t *testing.T) {
result := eval.Result{
Instance: ngmodels.GenerateAlertLabels(5, "result-"),
}
rule := generateRule()
extraLabels := ngmodels.GenerateAlertLabels(2, "extra-")
labelTemplates := make(data.Labels)
for key := range extraLabels {
labelTemplates["rule-"+key] = fmt.Sprintf("{{ with (index .Labels \"%s\") }}{{.}}{{end}}", key)
}
for key := range result.Instance {
labelTemplates["rule-"+key] = fmt.Sprintf("{{ with (index .Labels \"%s\") }}{{.}}{{end}}", key)
}
rule.Labels = labelTemplates
state := newState(context.Background(), l, rule, result, extraLabels, url)
for key, expected := range extraLabels {
assert.Equal(t, expected, state.Labels["rule-"+key])
}
for key, expected := range result.Instance {
assert.Equal(t, expected, state.Labels["rule-"+key])
}
})
t.Run("rule annotations should be able to be expanded with result and extra labels", func(t *testing.T) {
result := eval.Result{
Instance: ngmodels.GenerateAlertLabels(5, "result-"),
}
rule := generateRule()
extraLabels := ngmodels.GenerateAlertLabels(2, "extra-")
annotationTemplates := make(data.Labels)
for key := range extraLabels {
annotationTemplates["rule-"+key] = fmt.Sprintf("{{ with (index .Labels \"%s\") }}{{.}}{{end}}", key)
}
for key := range result.Instance {
annotationTemplates["rule-"+key] = fmt.Sprintf("{{ with (index .Labels \"%s\") }}{{.}}{{end}}", key)
}
rule.Annotations = annotationTemplates
state := newState(context.Background(), l, rule, result, extraLabels, url)
for key, expected := range extraLabels {
assert.Equal(t, expected, state.Annotations["rule-"+key])
}
for key, expected := range result.Instance {
assert.Equal(t, expected, state.Annotations["rule-"+key])
}
})
t.Run("when result labels collide with system labels from LabelsUserCannotSpecify", func(t *testing.T) {
result := eval.Result{
Instance: ngmodels.GenerateAlertLabels(5, "result-"),
}
m := ngmodels.LabelsUserCannotSpecify
t.Cleanup(func() {
ngmodels.LabelsUserCannotSpecify = m
})
ngmodels.LabelsUserCannotSpecify = map[string]struct{}{
"__label1__": {},
"label2__": {},
"__label3": {},
"label4": {},
}
result.Instance["__label1__"] = uuid.NewString()
result.Instance["label2__"] = uuid.NewString()
result.Instance["__label3"] = uuid.NewString()
result.Instance["label4"] = uuid.NewString()
rule := generateRule()
state := newState(context.Background(), l, rule, result, nil, url)
for key := range ngmodels.LabelsUserCannotSpecify {
assert.NotContains(t, state.Labels, key)
}
assert.Contains(t, state.Labels, "label1")
assert.Equal(t, state.Labels["label1"], result.Instance["__label1__"])
assert.Contains(t, state.Labels, "label2")
assert.Equal(t, state.Labels["label2"], result.Instance["label2__"])
assert.Contains(t, state.Labels, "label3")
assert.Equal(t, state.Labels["label3"], result.Instance["__label3"])
assert.Contains(t, state.Labels, "label4_user")
assert.Equal(t, state.Labels["label4_user"], result.Instance["label4"])
t.Run("should drop label if renamed collides with existing", func(t *testing.T) {
result.Instance["label1"] = uuid.NewString()
result.Instance["label1_user"] = uuid.NewString()
result.Instance["label4_user"] = uuid.NewString()
state = newState(context.Background(), l, rule, result, nil, url)
assert.NotContains(t, state.Labels, "__label1__")
assert.Contains(t, state.Labels, "label1")
assert.Equal(t, state.Labels["label1"], result.Instance["label1"])
assert.Equal(t, state.Labels["label1_user"], result.Instance["label1_user"])
assert.NotContains(t, state.Labels, "label4")
assert.Equal(t, state.Labels["label4_user"], result.Instance["label4_user"])
})
})
t.Run("creates a state with preset fields if there is no current state", func(t *testing.T) {
rule := generateRule()
extraLabels := ngmodels.GenerateAlertLabels(2, "extra-")
result := eval.Result{
Instance: ngmodels.GenerateAlertLabels(5, "result-"),
}
expectedLbl, expectedAnn := expandAnnotationsAndLabels(context.Background(), l, rule, result, extraLabels, url)
state := newState(context.Background(), l, rule, result, extraLabels, url)
assert.Equal(t, rule.OrgID, state.OrgID)
assert.Equal(t, rule.UID, state.AlertRuleUID)
assert.Equal(t, state.Labels.Fingerprint(), state.CacheID)
assert.Equal(t, result.State, state.State)
assert.Equal(t, "", state.StateReason)
assert.Equal(t, result.Instance.Fingerprint(), state.ResultFingerprint)
assert.Nil(t, state.LatestResult)
assert.Nil(t, state.Error)
assert.Nil(t, state.Image)
assert.EqualValues(t, expectedAnn, state.Annotations)
assert.EqualValues(t, expectedLbl, state.Labels)
assert.Nil(t, state.Values)
assert.Equal(t, result.EvaluatedAt, state.StartsAt)
assert.Equal(t, result.EvaluatedAt, state.EndsAt)
assert.Nil(t, state.ResolvedAt)
assert.Nil(t, state.LastSentAt)
assert.Equal(t, "", state.LastEvaluationString)
assert.Equal(t, result.EvaluatedAt, state.LastEvaluationTime)
assert.Equal(t, result.EvaluationDuration, state.EvaluationDuration)
})
}
func TestPatch(t *testing.T) {
key := ngmodels.GenerateRuleKey(1)
t.Run("it populates some fields from the current state if it exists", func(t *testing.T) {
result := eval.Result{
Instance: ngmodels.GenerateAlertLabels(5, "result-"),
}
state := randomSate(key)
orig := state.Copy()
current := randomSate(key)
patch(&state, &current, result)
// Fields that should not change
assert.Equal(t, orig.OrgID, state.OrgID)
assert.Equal(t, orig.AlertRuleUID, state.AlertRuleUID)
assert.Equal(t, orig.CacheID, state.CacheID)
assert.Equal(t, orig.ResultFingerprint, state.ResultFingerprint)
assert.EqualValues(t, orig.Annotations, state.Annotations)
assert.EqualValues(t, orig.Labels, state.Labels)
assert.Equal(t, orig.LastEvaluationTime, state.LastEvaluationTime)
assert.Equal(t, orig.EvaluationDuration, state.EvaluationDuration)
assert.Equal(t, current.State, state.State)
assert.Equal(t, current.StateReason, state.StateReason)
assert.Equal(t, current.Image, state.Image)
assert.Equal(t, current.LatestResult, state.LatestResult)
assert.Equal(t, current.Error, state.Error)
assert.Equal(t, current.Values, state.Values)
assert.Equal(t, current.StartsAt, state.StartsAt)
assert.Equal(t, current.EndsAt, state.EndsAt)
assert.Equal(t, current.ResolvedAt, state.ResolvedAt)
assert.Equal(t, current.LastSentAt, state.LastSentAt)
assert.Equal(t, current.LastEvaluationString, state.LastEvaluationString)
})
t.Run("copies system-owned annotations from current state", func(t *testing.T) {
state := randomSate(key)
orig := state.Copy()
expectedAnnotations := data.Labels(state.Annotations).Copy()
current := randomSate(key)
for key := range ngmodels.InternalAnnotationNameSet {
val := util.GenerateShortUID()
current.Annotations[key] = val
expectedAnnotations[key] = val
}
result := eval.Result{
Instance: ngmodels.GenerateAlertLabels(5, "result-"),
}
patch(&state, &current, result)
assert.EqualValues(t, expectedAnnotations, state.Annotations)
assert.Equal(t, current.State, state.State)
assert.Equal(t, current.StateReason, state.StateReason)
assert.Equal(t, current.Image, state.Image)
assert.Equal(t, current.LatestResult, state.LatestResult)
assert.Equal(t, current.Error, state.Error)
assert.Equal(t, current.Values, state.Values)
assert.Equal(t, current.StartsAt, state.StartsAt)
assert.Equal(t, current.EndsAt, state.EndsAt)
assert.Equal(t, current.ResolvedAt, state.ResolvedAt)
assert.Equal(t, current.LastSentAt, state.LastSentAt)
assert.Equal(t, current.LastEvaluationString, state.LastEvaluationString)
// Fields that should not change
assert.Equal(t, orig.OrgID, state.OrgID)
assert.Equal(t, orig.AlertRuleUID, state.AlertRuleUID)
assert.Equal(t, orig.CacheID, state.CacheID)
assert.Equal(t, orig.ResultFingerprint, state.ResultFingerprint)
assert.EqualValues(t, orig.Labels, state.Labels)
assert.Equal(t, orig.LastEvaluationTime, state.LastEvaluationTime)
assert.Equal(t, orig.EvaluationDuration, state.EvaluationDuration)
})
t.Run("if result Error and current state is Error it should copy datasource_uid and ref_id labels", func(t *testing.T) {
state := randomSate(key)
orig := state.Copy()
current := randomSate(key)
current.State = eval.Error
current.Labels["datasource_uid"] = util.GenerateShortUID()
current.Labels["ref_id"] = util.GenerateShortUID()
result := eval.Result{
Instance: ngmodels.GenerateAlertLabels(5, "result-"),
State: eval.Error,
}
expectedLabels := orig.Labels.Copy()
expectedLabels["datasource_uid"] = current.Labels["datasource_uid"]
expectedLabels["ref_id"] = current.Labels["ref_id"]
patch(&state, &current, result)
assert.Equal(t, expectedLabels, state.Labels)
assert.Equal(t, current.State, state.State)
assert.Equal(t, current.StateReason, state.StateReason)
assert.Equal(t, current.Image, state.Image)
assert.Equal(t, current.LatestResult, state.LatestResult)
assert.Equal(t, current.Error, state.Error)
assert.Equal(t, current.Values, state.Values)
assert.Equal(t, current.StartsAt, state.StartsAt)
assert.Equal(t, current.EndsAt, state.EndsAt)
assert.Equal(t, current.ResolvedAt, state.ResolvedAt)
assert.Equal(t, current.LastSentAt, state.LastSentAt)
assert.Equal(t, current.LastEvaluationString, state.LastEvaluationString)
// Fields that should not change
assert.Equal(t, orig.OrgID, state.OrgID)
assert.Equal(t, orig.AlertRuleUID, state.AlertRuleUID)
assert.Equal(t, orig.CacheID, state.CacheID)
assert.Equal(t, orig.ResultFingerprint, state.ResultFingerprint)
assert.Equal(t, orig.LastEvaluationTime, state.LastEvaluationTime)
assert.Equal(t, orig.EvaluationDuration, state.EvaluationDuration)
assert.EqualValues(t, orig.Annotations, state.Annotations)
})
}

Loading…
Cancel
Save