From 046a9bb7c1d1aa946347e17ccdfab67ca5cddd25 Mon Sep 17 00:00:00 2001 From: Alexander Weaver Date: Wed, 25 Jan 2023 11:29:57 -0600 Subject: [PATCH] Alerting: Copy rule definitions into state history (#62032) * Copy rules instead of accepting pointer * Deep-copy the rule, for even more guarantees * Create struct just for needed fields * Move RuleMeta to historian/model package, iron out package dependencies * Move tests for dash ID parsing to model package along with code --- .../ngalert/state/historian/annotation.go | 7 +- pkg/services/ngalert/state/historian/core.go | 17 ++-- .../ngalert/state/historian/core_test.go | 67 ---------------- pkg/services/ngalert/state/historian/loki.go | 7 +- .../ngalert/state/historian/model/rule.go | 46 +++++++++++ .../state/historian/model/rule_test.go | 77 +++++++++++++++++++ pkg/services/ngalert/state/historian/noop.go | 4 +- pkg/services/ngalert/state/historian/sql.go | 3 +- pkg/services/ngalert/state/manager.go | 3 +- pkg/services/ngalert/state/persist.go | 3 +- pkg/services/ngalert/state/testing.go | 3 +- 11 files changed, 146 insertions(+), 91 deletions(-) create mode 100644 pkg/services/ngalert/state/historian/model/rule.go create mode 100644 pkg/services/ngalert/state/historian/model/rule_test.go diff --git a/pkg/services/ngalert/state/historian/annotation.go b/pkg/services/ngalert/state/historian/annotation.go index 03f1b73faaa..188c9010371 100644 --- a/pkg/services/ngalert/state/historian/annotation.go +++ b/pkg/services/ngalert/state/historian/annotation.go @@ -16,6 +16,7 @@ import ( "github.com/grafana/grafana/pkg/services/ngalert/eval" ngmodels "github.com/grafana/grafana/pkg/services/ngalert/models" "github.com/grafana/grafana/pkg/services/ngalert/state" + history_model "github.com/grafana/grafana/pkg/services/ngalert/state/historian/model" ) // AnnotationBackend is an implementation of state.Historian that uses Grafana Annotations as the backing datastore. @@ -40,7 +41,7 @@ func NewAnnotationBackend(annotations annotations.Repository, dashboards dashboa } // RecordStates writes a number of state transitions for a given rule to state history. -func (h *AnnotationBackend) RecordStatesAsync(ctx context.Context, rule *ngmodels.AlertRule, states []state.StateTransition) <-chan error { +func (h *AnnotationBackend) RecordStatesAsync(ctx context.Context, rule history_model.RuleMeta, states []state.StateTransition) <-chan error { logger := h.log.FromContext(ctx) // Build annotations before starting goroutine, to make sure all data is copied and won't mutate underneath us. annotations := h.buildAnnotations(rule, states, logger) @@ -136,7 +137,7 @@ func (h *AnnotationBackend) QueryStates(ctx context.Context, query ngmodels.Hist return frame, nil } -func (h *AnnotationBackend) buildAnnotations(rule *ngmodels.AlertRule, states []state.StateTransition, logger log.Logger) []annotations.Item { +func (h *AnnotationBackend) buildAnnotations(rule history_model.RuleMeta, states []state.StateTransition, logger log.Logger) []annotations.Item { items := make([]annotations.Item, 0, len(states)) for _, state := range states { if !shouldRecord(state) { @@ -184,7 +185,7 @@ func (h *AnnotationBackend) recordAnnotationsSync(ctx context.Context, panel *pa return nil } -func buildAnnotationTextAndData(rule *ngmodels.AlertRule, currentState *state.State) (string, *simplejson.Json) { +func buildAnnotationTextAndData(rule history_model.RuleMeta, currentState *state.State) (string, *simplejson.Json) { jsonData := simplejson.New() var value string diff --git a/pkg/services/ngalert/state/historian/core.go b/pkg/services/ngalert/state/historian/core.go index eef8e7e7236..d8d49868413 100644 --- a/pkg/services/ngalert/state/historian/core.go +++ b/pkg/services/ngalert/state/historian/core.go @@ -1,7 +1,6 @@ package historian import ( - "strconv" "strings" "github.com/grafana/grafana-plugin-sdk-go/data" @@ -9,6 +8,7 @@ import ( "github.com/grafana/grafana/pkg/services/ngalert/eval" "github.com/grafana/grafana/pkg/services/ngalert/models" "github.com/grafana/grafana/pkg/services/ngalert/state" + history_model "github.com/grafana/grafana/pkg/services/ngalert/state/historian/model" ) func shouldRecord(transition state.StateTransition) bool { @@ -37,19 +37,12 @@ type panelKey struct { } // panelKey attempts to get the key of the panel attached to the given rule. Returns nil if the rule is not attached to a panel. -func parsePanelKey(rule *models.AlertRule, logger log.Logger) *panelKey { - dashUID, ok := rule.Annotations[models.DashboardUIDAnnotation] - if ok { - panelAnno := rule.Annotations[models.PanelIDAnnotation] - panelID, err := strconv.ParseInt(panelAnno, 10, 64) - if err != nil { - logger.Error("Error parsing panelUID for alert annotation", "actual", panelAnno, "error", err) - return nil - } +func parsePanelKey(rule history_model.RuleMeta, logger log.Logger) *panelKey { + if rule.DashboardUID != "" { return &panelKey{ orgID: rule.OrgID, - dashUID: dashUID, - panelID: panelID, + dashUID: rule.DashboardUID, + panelID: rule.PanelID, } } return nil diff --git a/pkg/services/ngalert/state/historian/core_test.go b/pkg/services/ngalert/state/historian/core_test.go index b9a2cfd1108..0eae1303640 100644 --- a/pkg/services/ngalert/state/historian/core_test.go +++ b/pkg/services/ngalert/state/historian/core_test.go @@ -7,7 +7,6 @@ import ( "github.com/stretchr/testify/require" "github.com/grafana/grafana-plugin-sdk-go/data" - "github.com/grafana/grafana/pkg/infra/log" "github.com/grafana/grafana/pkg/services/ngalert/eval" "github.com/grafana/grafana/pkg/services/ngalert/models" "github.com/grafana/grafana/pkg/services/ngalert/state" @@ -153,69 +152,3 @@ func TestRemovePrivateLabels(t *testing.T) { }) } } - -func TestParsePanelKey(t *testing.T) { - logger := log.NewNopLogger() - - type testCase struct { - name string - in models.AlertRule - exp *panelKey - } - - cases := []testCase{ - { - name: "no dash UID", - in: models.AlertRule{ - OrgID: 1, - Annotations: map[string]string{ - models.PanelIDAnnotation: "123", - }, - }, - exp: nil, - }, - { - name: "no panel ID", - in: models.AlertRule{ - OrgID: 1, - Annotations: map[string]string{ - models.DashboardUIDAnnotation: "abcd-uid", - }, - }, - exp: nil, - }, - { - name: "invalid panel ID", - in: models.AlertRule{ - OrgID: 1, - Annotations: map[string]string{ - models.DashboardUIDAnnotation: "abcd-uid", - models.PanelIDAnnotation: "bad-id", - }, - }, - exp: nil, - }, - { - name: "success", - in: models.AlertRule{ - OrgID: 1, - Annotations: map[string]string{ - models.DashboardUIDAnnotation: "abcd-uid", - models.PanelIDAnnotation: "123", - }, - }, - exp: &panelKey{ - orgID: 1, - dashUID: "abcd-uid", - panelID: 123, - }, - }, - } - - for _, tc := range cases { - t.Run(tc.name, func(t *testing.T) { - res := parsePanelKey(&tc.in, logger) - require.Equal(t, tc.exp, res) - }) - } -} diff --git a/pkg/services/ngalert/state/historian/loki.go b/pkg/services/ngalert/state/historian/loki.go index dcb6dd2da67..a37f6b40bac 100644 --- a/pkg/services/ngalert/state/historian/loki.go +++ b/pkg/services/ngalert/state/historian/loki.go @@ -12,6 +12,7 @@ import ( "github.com/grafana/grafana/pkg/services/ngalert/eval" "github.com/grafana/grafana/pkg/services/ngalert/models" "github.com/grafana/grafana/pkg/services/ngalert/state" + history_model "github.com/grafana/grafana/pkg/services/ngalert/state/historian/model" ) const ( @@ -43,7 +44,7 @@ func (h *RemoteLokiBackend) TestConnection() error { return h.client.ping() } -func (h *RemoteLokiBackend) RecordStatesAsync(ctx context.Context, rule *models.AlertRule, states []state.StateTransition) <-chan error { +func (h *RemoteLokiBackend) RecordStatesAsync(ctx context.Context, rule history_model.RuleMeta, states []state.StateTransition) <-chan error { logger := h.log.FromContext(ctx) streams := h.statesToStreams(rule, states, logger) return h.recordStreamsAsync(ctx, streams, logger) @@ -53,7 +54,7 @@ func (h *RemoteLokiBackend) QueryStates(ctx context.Context, query models.Histor return data.NewFrame("states"), nil } -func (h *RemoteLokiBackend) statesToStreams(rule *models.AlertRule, states []state.StateTransition, logger log.Logger) []stream { +func (h *RemoteLokiBackend) statesToStreams(rule history_model.RuleMeta, states []state.StateTransition, logger log.Logger) []stream { buckets := make(map[string][]row) // label repr -> entries for _, state := range states { if !shouldRecord(state) { @@ -63,7 +64,7 @@ func (h *RemoteLokiBackend) statesToStreams(rule *models.AlertRule, states []sta labels := removePrivateLabels(state.State.Labels) labels[OrgIDLabel] = fmt.Sprint(rule.OrgID) labels[RuleUIDLabel] = fmt.Sprint(rule.UID) - labels[GroupLabel] = fmt.Sprint(rule.RuleGroup) + labels[GroupLabel] = fmt.Sprint(rule.Group) labels[FolderUIDLabel] = fmt.Sprint(rule.NamespaceUID) repr := labels.String() diff --git a/pkg/services/ngalert/state/historian/model/rule.go b/pkg/services/ngalert/state/historian/model/rule.go new file mode 100644 index 00000000000..a63656c7560 --- /dev/null +++ b/pkg/services/ngalert/state/historian/model/rule.go @@ -0,0 +1,46 @@ +package model + +import ( + "strconv" + + "github.com/grafana/grafana/pkg/cmd/grafana-cli/logger" + "github.com/grafana/grafana/pkg/infra/log" + "github.com/grafana/grafana/pkg/services/ngalert/models" +) + +// RuleMeta is the metadata about a rule that is needed by state history. +type RuleMeta struct { + ID int64 + OrgID int64 + UID string + Title string + Group string + NamespaceUID string + DashboardUID string + PanelID int64 +} + +func NewRuleMeta(r *models.AlertRule, log log.Logger) RuleMeta { + dashUID, ok := r.Annotations[models.DashboardUIDAnnotation] + var panelID int64 + if ok { + panelAnno := r.Annotations[models.PanelIDAnnotation] + pid, err := strconv.ParseInt(panelAnno, 10, 64) + if err != nil { + logger.Error("Error parsing panelUID for alert annotation", "ruleID", r.ID, "dash", dashUID, "actual", panelAnno, "error", err) + pid = 0 + dashUID = "" + } + panelID = pid + } + return RuleMeta{ + ID: r.ID, + OrgID: r.OrgID, + UID: r.UID, + Title: r.Title, + Group: r.RuleGroup, + NamespaceUID: r.NamespaceUID, + DashboardUID: dashUID, + PanelID: panelID, + } +} diff --git a/pkg/services/ngalert/state/historian/model/rule_test.go b/pkg/services/ngalert/state/historian/model/rule_test.go new file mode 100644 index 00000000000..228e9537307 --- /dev/null +++ b/pkg/services/ngalert/state/historian/model/rule_test.go @@ -0,0 +1,77 @@ +package model + +import ( + "testing" + + "github.com/grafana/grafana/pkg/infra/log" + "github.com/grafana/grafana/pkg/services/ngalert/models" + "github.com/stretchr/testify/require" +) + +func TestNewRuleMeta(t *testing.T) { + logger := log.NewNopLogger() + + type testCase struct { + name string + in models.AlertRule + expDash string + expPanel int64 + } + + cases := []testCase{ + { + name: "no dash UID", + in: models.AlertRule{ + OrgID: 1, + Annotations: map[string]string{ + models.PanelIDAnnotation: "123", + }, + }, + expDash: "", + expPanel: 0, + }, + { + name: "no panel ID", + in: models.AlertRule{ + OrgID: 1, + Annotations: map[string]string{ + models.DashboardUIDAnnotation: "abcd-uid", + }, + }, + expDash: "", + expPanel: 0, + }, + { + name: "invalid panel ID", + in: models.AlertRule{ + OrgID: 1, + Annotations: map[string]string{ + models.DashboardUIDAnnotation: "abcd-uid", + models.PanelIDAnnotation: "bad-id", + }, + }, + expDash: "", + expPanel: 0, + }, + { + name: "success", + in: models.AlertRule{ + OrgID: 1, + Annotations: map[string]string{ + models.DashboardUIDAnnotation: "abcd-uid", + models.PanelIDAnnotation: "123", + }, + }, + expDash: "abcd-uid", + expPanel: 123, + }, + } + + for _, tc := range cases { + t.Run(tc.name, func(t *testing.T) { + res := NewRuleMeta(&tc.in, logger) + require.Equal(t, tc.expDash, res.DashboardUID) + require.Equal(t, tc.expPanel, res.PanelID) + }) + } +} diff --git a/pkg/services/ngalert/state/historian/noop.go b/pkg/services/ngalert/state/historian/noop.go index a4b7ce1126b..2acee1ace1f 100644 --- a/pkg/services/ngalert/state/historian/noop.go +++ b/pkg/services/ngalert/state/historian/noop.go @@ -3,8 +3,8 @@ package historian import ( "context" - "github.com/grafana/grafana/pkg/services/ngalert/models" "github.com/grafana/grafana/pkg/services/ngalert/state" + history_model "github.com/grafana/grafana/pkg/services/ngalert/state/historian/model" ) // NoOpHistorian is a state.Historian that does nothing with the resulting data, to be used in contexts where history is not needed. @@ -14,7 +14,7 @@ func NewNopHistorian() *NoOpHistorian { return &NoOpHistorian{} } -func (f *NoOpHistorian) RecordStatesAsync(ctx context.Context, _ *models.AlertRule, _ []state.StateTransition) <-chan error { +func (f *NoOpHistorian) RecordStatesAsync(ctx context.Context, _ history_model.RuleMeta, _ []state.StateTransition) <-chan error { errCh := make(chan error) close(errCh) return errCh diff --git a/pkg/services/ngalert/state/historian/sql.go b/pkg/services/ngalert/state/historian/sql.go index 8e5d0cf2294..64e716204b8 100644 --- a/pkg/services/ngalert/state/historian/sql.go +++ b/pkg/services/ngalert/state/historian/sql.go @@ -7,6 +7,7 @@ import ( "github.com/grafana/grafana/pkg/infra/log" "github.com/grafana/grafana/pkg/services/ngalert/models" "github.com/grafana/grafana/pkg/services/ngalert/state" + history_model "github.com/grafana/grafana/pkg/services/ngalert/state/historian/model" ) type SqlBackend struct { @@ -19,7 +20,7 @@ func NewSqlBackend() *SqlBackend { } } -func (h *SqlBackend) RecordStatesAsync(ctx context.Context, _ *models.AlertRule, _ []state.StateTransition) <-chan error { +func (h *SqlBackend) RecordStatesAsync(ctx context.Context, _ history_model.RuleMeta, _ []state.StateTransition) <-chan error { errCh := make(chan error) close(errCh) return errCh diff --git a/pkg/services/ngalert/state/manager.go b/pkg/services/ngalert/state/manager.go index 41b06c37f41..779066d64da 100644 --- a/pkg/services/ngalert/state/manager.go +++ b/pkg/services/ngalert/state/manager.go @@ -12,6 +12,7 @@ import ( "github.com/grafana/grafana/pkg/services/ngalert/eval" "github.com/grafana/grafana/pkg/services/ngalert/metrics" ngModels "github.com/grafana/grafana/pkg/services/ngalert/models" + history_model "github.com/grafana/grafana/pkg/services/ngalert/state/historian/model" ) var ( @@ -197,7 +198,7 @@ func (st *Manager) ProcessEvalResults(ctx context.Context, evaluatedAt time.Time allChanges := append(states, staleStates...) if st.historian != nil { - st.historian.RecordStatesAsync(ctx, alertRule, allChanges) + st.historian.RecordStatesAsync(ctx, history_model.NewRuleMeta(alertRule, logger), allChanges) } return allChanges } diff --git a/pkg/services/ngalert/state/persist.go b/pkg/services/ngalert/state/persist.go index 1665444d248..237235f2627 100644 --- a/pkg/services/ngalert/state/persist.go +++ b/pkg/services/ngalert/state/persist.go @@ -4,6 +4,7 @@ import ( "context" "github.com/grafana/grafana/pkg/services/ngalert/models" + history_model "github.com/grafana/grafana/pkg/services/ngalert/state/historian/model" ) // InstanceStore represents the ability to fetch and write alert instances. @@ -25,7 +26,7 @@ type Historian interface { // RecordStates writes a number of state transitions for a given rule to state history. It returns a channel that // is closed when writing the state transitions has completed. If an error has occurred, the channel will contain a // non-nil error. - RecordStatesAsync(ctx context.Context, rule *models.AlertRule, states []StateTransition) <-chan error + RecordStatesAsync(ctx context.Context, rule history_model.RuleMeta, states []StateTransition) <-chan error } // ImageCapturer captures images. diff --git a/pkg/services/ngalert/state/testing.go b/pkg/services/ngalert/state/testing.go index 199d07d7060..dcb1f31e362 100644 --- a/pkg/services/ngalert/state/testing.go +++ b/pkg/services/ngalert/state/testing.go @@ -5,6 +5,7 @@ import ( "sync" "github.com/grafana/grafana/pkg/services/ngalert/models" + history_model "github.com/grafana/grafana/pkg/services/ngalert/state/historian/model" "github.com/grafana/grafana/pkg/services/screenshot" ) @@ -62,7 +63,7 @@ func (f *FakeRuleReader) ListAlertRules(_ context.Context, q *models.ListAlertRu type FakeHistorian struct{} -func (f *FakeHistorian) RecordStatesAsync(ctx context.Context, rule *models.AlertRule, states []StateTransition) <-chan error { +func (f *FakeHistorian) RecordStatesAsync(ctx context.Context, rule history_model.RuleMeta, states []StateTransition) <-chan error { errCh := make(chan error) close(errCh) return errCh