Alerting: Add un-documented toggle for changing state history backend, add shells for remote loki and sql (#61072)

* Add toggle for state history backend and shells

* Extract some shared logic and add tests
pull/61108/head^2
Alexander Weaver 3 years ago committed by GitHub
parent cd04f4e564
commit eb960d9725
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
  1. 26
      pkg/services/ngalert/ngalert.go
  2. 63
      pkg/services/ngalert/state/historian/annotation.go
  3. 56
      pkg/services/ngalert/state/historian/core.go
  4. 125
      pkg/services/ngalert/state/historian/core_test.go
  5. 22
      pkg/services/ngalert/state/historian/loki.go
  6. 22
      pkg/services/ngalert/state/historian/sql.go
  7. 4
      pkg/services/ngalert/state/manager_test.go
  8. 2
      pkg/setting/setting_unified_alerting.go

@ -200,11 +200,9 @@ func (ng *AlertNG) init() error {
AlertSender: alertsRouter,
}
var history state.Historian
if ng.Cfg.UnifiedAlerting.StateHistory.Enabled {
history = historian.NewAnnotationHistorian(ng.annotationsRepo, ng.dashboardService)
} else {
history = historian.NewNopHistorian()
history, err := configureHistorianBackend(ng.Cfg.UnifiedAlerting.StateHistory, ng.annotationsRepo, ng.dashboardService)
if err != nil {
return err
}
stateManager := state.NewManager(ng.Metrics.GetStateMetrics(), appUrl, store, ng.imageService, clk, history)
scheduler := schedule.NewScheduler(schedCfg, stateManager)
@ -365,3 +363,21 @@ func readQuotaConfig(cfg *setting.Cfg) (*quota.Map, error) {
limits.Set(orgQuotaTag, alertOrgQuota)
return limits, nil
}
func configureHistorianBackend(cfg setting.UnifiedAlertingStateHistorySettings, ar annotations.Repository, ds dashboards.DashboardService) (state.Historian, error) {
if !cfg.Enabled {
return historian.NewNopHistorian(), nil
}
if cfg.Backend == "annotations" {
return historian.NewAnnotationBackend(ar, ds), nil
}
if cfg.Backend == "loki" {
return historian.NewRemoteLokiBackend(), nil
}
if cfg.Backend == "sql" {
return historian.NewSqlBackend(), nil
}
return nil, fmt.Errorf("unrecognized state history backend: %s", cfg.Backend)
}

@ -4,12 +4,9 @@ import (
"context"
"fmt"
"sort"
"strconv"
"strings"
"time"
"github.com/grafana/grafana-plugin-sdk-go/data"
"github.com/grafana/grafana/pkg/components/simplejson"
"github.com/grafana/grafana/pkg/infra/log"
"github.com/grafana/grafana/pkg/services/annotations"
@ -19,15 +16,15 @@ import (
"github.com/grafana/grafana/pkg/services/ngalert/state"
)
// AnnotationStateHistorian is an implementation of state.Historian that uses Grafana Annotations as the backing datastore.
type AnnotationStateHistorian struct {
// AnnotationBackend is an implementation of state.Historian that uses Grafana Annotations as the backing datastore.
type AnnotationBackend struct {
annotations annotations.Repository
dashboards *dashboardResolver
log log.Logger
}
func NewAnnotationHistorian(annotations annotations.Repository, dashboards dashboards.DashboardService) *AnnotationStateHistorian {
return &AnnotationStateHistorian{
func NewAnnotationBackend(annotations annotations.Repository, dashboards dashboards.DashboardService) *AnnotationBackend {
return &AnnotationBackend{
annotations: annotations,
dashboards: newDashboardResolver(dashboards, defaultDashboardCacheExpiry),
log: log.New("ngalert.state.historian"),
@ -35,7 +32,7 @@ func NewAnnotationHistorian(annotations annotations.Repository, dashboards dashb
}
// RecordStates writes a number of state transitions for a given rule to state history.
func (h *AnnotationStateHistorian) RecordStatesAsync(ctx context.Context, rule *ngmodels.AlertRule, states []state.StateTransition) {
func (h *AnnotationBackend) RecordStatesAsync(ctx context.Context, rule *ngmodels.AlertRule, states []state.StateTransition) {
logger := h.log.FromContext(ctx)
// Build annotations before starting goroutine, to make sure all data is copied and won't mutate underneath us.
annotations := h.buildAnnotations(rule, states, logger)
@ -43,10 +40,10 @@ func (h *AnnotationStateHistorian) RecordStatesAsync(ctx context.Context, rule *
go h.recordAnnotationsSync(ctx, panel, annotations, logger)
}
func (h *AnnotationStateHistorian) buildAnnotations(rule *ngmodels.AlertRule, states []state.StateTransition, logger log.Logger) []annotations.Item {
func (h *AnnotationBackend) buildAnnotations(rule *ngmodels.AlertRule, states []state.StateTransition, logger log.Logger) []annotations.Item {
items := make([]annotations.Item, 0, len(states))
for _, state := range states {
if !shouldAnnotate(state) {
if !shouldRecord(state) {
continue
}
logger.Debug("Alert state changed creating annotation", "newState", state.Formatted(), "oldState", state.PreviousFormatted())
@ -68,33 +65,7 @@ func (h *AnnotationStateHistorian) buildAnnotations(rule *ngmodels.AlertRule, st
return items
}
// panelKey uniquely identifies a panel.
type panelKey struct {
orgID int64
dashUID string
panelID int64
}
// panelKey attempts to get the key of the panel attached to the given rule. Returns nil if the rule is not attached to a panel.
func parsePanelKey(rule *ngmodels.AlertRule, logger log.Logger) *panelKey {
dashUID, ok := rule.Annotations[ngmodels.DashboardUIDAnnotation]
if ok {
panelAnno := rule.Annotations[ngmodels.PanelIDAnnotation]
panelID, err := strconv.ParseInt(panelAnno, 10, 64)
if err != nil {
logger.Error("Error parsing panelUID for alert annotation", "actual", panelAnno, "error", err)
return nil
}
return &panelKey{
orgID: rule.OrgID,
dashUID: dashUID,
panelID: panelID,
}
}
return nil
}
func (h *AnnotationStateHistorian) recordAnnotationsSync(ctx context.Context, panel *panelKey, annotations []annotations.Item, logger log.Logger) {
func (h *AnnotationBackend) recordAnnotationsSync(ctx context.Context, panel *panelKey, annotations []annotations.Item, logger log.Logger) {
if panel != nil {
dashID, err := h.dashboards.getID(ctx, panel.orgID, panel.dashUID)
if err != nil {
@ -148,21 +119,3 @@ func buildAnnotationTextAndData(rule *ngmodels.AlertRule, currentState *state.St
labels := removePrivateLabels(currentState.Labels)
return fmt.Sprintf("%s {%s} - %s", rule.Title, labels.String(), value), jsonData
}
func removePrivateLabels(labels data.Labels) data.Labels {
result := make(data.Labels)
for k, v := range labels {
if !strings.HasPrefix(k, "__") && !strings.HasSuffix(k, "__") {
result[k] = v
}
}
return result
}
func shouldAnnotate(transition state.StateTransition) bool {
// Do not log not transitioned states normal states if it was marked as stale
if !transition.Changed() || transition.StateReason == ngmodels.StateReasonMissingSeries && transition.PreviousState == eval.Normal && transition.State.State == eval.Normal {
return false
}
return true
}

@ -0,0 +1,56 @@
package historian
import (
"strconv"
"strings"
"github.com/grafana/grafana-plugin-sdk-go/data"
"github.com/grafana/grafana/pkg/infra/log"
"github.com/grafana/grafana/pkg/services/ngalert/eval"
"github.com/grafana/grafana/pkg/services/ngalert/models"
"github.com/grafana/grafana/pkg/services/ngalert/state"
)
func shouldRecord(transition state.StateTransition) bool {
// Do not log not transitioned states normal states if it was marked as stale
if !transition.Changed() || transition.StateReason == models.StateReasonMissingSeries && transition.PreviousState == eval.Normal && transition.State.State == eval.Normal {
return false
}
return true
}
func removePrivateLabels(labels data.Labels) data.Labels {
result := make(data.Labels)
for k, v := range labels {
if !strings.HasPrefix(k, "__") && !strings.HasSuffix(k, "__") {
result[k] = v
}
}
return result
}
// panelKey uniquely identifies a panel.
type panelKey struct {
orgID int64
dashUID string
panelID int64
}
// panelKey attempts to get the key of the panel attached to the given rule. Returns nil if the rule is not attached to a panel.
func parsePanelKey(rule *models.AlertRule, logger log.Logger) *panelKey {
dashUID, ok := rule.Annotations[models.DashboardUIDAnnotation]
if ok {
panelAnno := rule.Annotations[models.PanelIDAnnotation]
panelID, err := strconv.ParseInt(panelAnno, 10, 64)
if err != nil {
logger.Error("Error parsing panelUID for alert annotation", "actual", panelAnno, "error", err)
return nil
}
return &panelKey{
orgID: rule.OrgID,
dashUID: dashUID,
panelID: panelID,
}
}
return nil
}

@ -6,12 +6,14 @@ import (
"github.com/stretchr/testify/require"
"github.com/grafana/grafana-plugin-sdk-go/data"
"github.com/grafana/grafana/pkg/infra/log"
"github.com/grafana/grafana/pkg/services/ngalert/eval"
"github.com/grafana/grafana/pkg/services/ngalert/models"
"github.com/grafana/grafana/pkg/services/ngalert/state"
)
func TestShouldAnnotate(t *testing.T) {
func TestShouldRecord(t *testing.T) {
allStates := []eval.State{
eval.Normal,
eval.Alerting,
@ -94,7 +96,126 @@ func TestShouldAnnotate(t *testing.T) {
}
t.Run(fmt.Sprintf("%s -> %s should be %v", trans.PreviousFormatted(), trans.Formatted(), !ok), func(t *testing.T) {
require.Equal(t, !ok, shouldAnnotate(trans))
require.Equal(t, !ok, shouldRecord(trans))
})
}
}
func TestRemovePrivateLabels(t *testing.T) {
type testCase struct {
name string
in data.Labels
exp data.Labels
}
cases := []testCase{
{
name: "empty",
in: map[string]string{},
exp: map[string]string{},
},
{
name: "nil",
in: nil,
exp: map[string]string{},
},
{
name: "prefix",
in: map[string]string{"__asdf": "one", "b": "c"},
exp: map[string]string{"b": "c"},
},
{
name: "suffix",
in: map[string]string{"asdf__": "one", "b": "c"},
exp: map[string]string{"b": "c"},
},
{
name: "both",
in: map[string]string{"__asdf__": "one", "b": "c"},
exp: map[string]string{"b": "c"},
},
{
name: "all",
in: map[string]string{"__a__": "a", "__b": "b", "c__": "c"},
exp: map[string]string{},
},
{
name: "whitespace",
in: map[string]string{" __asdf__ ": "one", "b": "c"},
exp: map[string]string{" __asdf__ ": "one", "b": "c"},
},
}
for _, tc := range cases {
t.Run(tc.name, func(t *testing.T) {
res := removePrivateLabels(tc.in)
require.Equal(t, tc.exp, res)
})
}
}
func TestParsePanelKey(t *testing.T) {
logger := log.NewNopLogger()
type testCase struct {
name string
in models.AlertRule
exp *panelKey
}
cases := []testCase{
{
name: "no dash UID",
in: models.AlertRule{
OrgID: 1,
Annotations: map[string]string{
models.PanelIDAnnotation: "123",
},
},
exp: nil,
},
{
name: "no panel ID",
in: models.AlertRule{
OrgID: 1,
Annotations: map[string]string{
models.DashboardUIDAnnotation: "abcd-uid",
},
},
exp: nil,
},
{
name: "invalid panel ID",
in: models.AlertRule{
OrgID: 1,
Annotations: map[string]string{
models.DashboardUIDAnnotation: "abcd-uid",
models.PanelIDAnnotation: "bad-id",
},
},
exp: nil,
},
{
name: "success",
in: models.AlertRule{
OrgID: 1,
Annotations: map[string]string{
models.DashboardUIDAnnotation: "abcd-uid",
models.PanelIDAnnotation: "123",
},
},
exp: &panelKey{
orgID: 1,
dashUID: "abcd-uid",
panelID: 123,
},
},
}
for _, tc := range cases {
t.Run(tc.name, func(t *testing.T) {
res := parsePanelKey(&tc.in, logger)
require.Equal(t, tc.exp, res)
})
}
}

@ -0,0 +1,22 @@
package historian
import (
"context"
"github.com/grafana/grafana/pkg/infra/log"
"github.com/grafana/grafana/pkg/services/ngalert/models"
"github.com/grafana/grafana/pkg/services/ngalert/state"
)
type RemoteLokiBackend struct {
log log.Logger
}
func NewRemoteLokiBackend() *RemoteLokiBackend {
return &RemoteLokiBackend{
log: log.New("ngalert.state.historian"),
}
}
func (h *RemoteLokiBackend) RecordStatesAsync(ctx context.Context, _ *models.AlertRule, _ []state.StateTransition) {
}

@ -0,0 +1,22 @@
package historian
import (
"context"
"github.com/grafana/grafana/pkg/infra/log"
"github.com/grafana/grafana/pkg/services/ngalert/models"
"github.com/grafana/grafana/pkg/services/ngalert/state"
)
type SqlBackend struct {
log log.Logger
}
func NewSqlBackend() *SqlBackend {
return &SqlBackend{
log: log.New("ngalert.state.historian"),
}
}
func (h *SqlBackend) RecordStatesAsync(ctx context.Context, _ *models.AlertRule, _ []state.StateTransition) {
}

@ -210,7 +210,7 @@ func TestDashboardAnnotations(t *testing.T) {
_, dbstore := tests.SetupTestEnv(t, 1)
fakeAnnoRepo := annotationstest.NewFakeAnnotationsRepo()
hist := historian.NewAnnotationHistorian(fakeAnnoRepo, &dashboards.FakeDashboardService{})
hist := historian.NewAnnotationBackend(fakeAnnoRepo, &dashboards.FakeDashboardService{})
st := state.NewManager(testMetrics.GetStateMetrics(), nil, dbstore, &state.NoopImageService{}, clock.New(), hist)
const mainOrgID int64 = 1
@ -2191,7 +2191,7 @@ func TestProcessEvalResults(t *testing.T) {
for _, tc := range testCases {
fakeAnnoRepo := annotationstest.NewFakeAnnotationsRepo()
hist := historian.NewAnnotationHistorian(fakeAnnoRepo, &dashboards.FakeDashboardService{})
hist := historian.NewAnnotationBackend(fakeAnnoRepo, &dashboards.FakeDashboardService{})
st := state.NewManager(testMetrics.GetStateMetrics(), nil, &state.FakeInstanceStore{}, &state.NotAvailableImageService{}, clock.New(), hist)
t.Run(tc.desc, func(t *testing.T) {
for _, res := range tc.evalResults {

@ -102,6 +102,7 @@ type UnifiedAlertingReservedLabelSettings struct {
type UnifiedAlertingStateHistorySettings struct {
Enabled bool
Backend string
}
// IsEnabled returns true if UnifiedAlertingSettings.Enabled is either nil or true.
@ -313,6 +314,7 @@ func (cfg *Cfg) ReadUnifiedAlertingSettings(iniFile *ini.File) error {
stateHistory := iniFile.Section("unified_alerting.state_history")
uaCfgStateHistory := UnifiedAlertingStateHistorySettings{
Enabled: stateHistory.Key("enabled").MustBool(stateHistoryDefaultEnabled),
Backend: stateHistory.Key("backend").MustString("annotations"),
}
uaCfg.StateHistory = uaCfgStateHistory

Loading…
Cancel
Save