diff --git a/pkg/services/ngalert/api/api_prometheus.go b/pkg/services/ngalert/api/api_prometheus.go index 354943408c3..0c3f4320c40 100644 --- a/pkg/services/ngalert/api/api_prometheus.go +++ b/pkg/services/ngalert/api/api_prometheus.go @@ -131,10 +131,8 @@ func (srv PrometheusSrv) RouteGetRuleStatuses(c *models.ReqContext) response.Res groupId, namespaceUID, namespace := r[0], r[1], r[2] newGroup := &apimodels.RuleGroup{ - Name: groupId, - // This doesn't make sense in our architecture - // so we use this field for passing to the frontend the namespace - File: namespace, + Name: groupId, + File: namespace, // file is what Prometheus uses for provisioning, we replace it with namespace. LastEvaluation: time.Time{}, EvaluationTime: 0, // TODO: see if we are able to pass this along with evaluation results } @@ -149,17 +147,10 @@ func (srv PrometheusSrv) RouteGetRuleStatuses(c *models.ReqContext) response.Res if !ok { continue } - var queryStr string - encodedQuery, err := json.Marshal(rule.Data) - if err != nil { - queryStr = err.Error() - } else { - queryStr = string(encodedQuery) - } alertingRule := apimodels.AlertingRule{ State: "inactive", Name: rule.Title, - Query: queryStr, + Query: ruleToQuery(srv.log, rule), Duration: rule.For.Seconds(), Annotations: rule.Annotations, } @@ -211,6 +202,7 @@ func (srv PrometheusSrv) RouteGetRuleStatuses(c *models.ReqContext) response.Res newRule.LastError = alertState.Error.Error() newRule.Health = "error" } + alertingRule.Alerts = append(alertingRule.Alerts, alert) } @@ -220,3 +212,45 @@ func (srv PrometheusSrv) RouteGetRuleStatuses(c *models.ReqContext) response.Res } return response.JSON(http.StatusOK, ruleResponse) } + +// ruleToQuery attempts to extract the datasource queries from the alert query model. +// Returns the whole JSON model as a string if it fails to extract a minimum of 1 query. +func ruleToQuery(logger log.Logger, rule *ngmodels.AlertRule) string { + var queryErr error + var queries []string + + for _, q := range rule.Data { + q, err := q.GetQuery() + if err != nil { + // If we can't find the query simply omit it, and try the rest. + // Even single query alerts would have 2 `AlertQuery`, one for the query and one for the condition. + if errors.Is(err, ngmodels.ErrNoQuery) { + continue + } + + // For any other type of error, it is unexpected abort and return the whole JSON. + logger.Debug("failed to parse a query", "err", err) + queryErr = err + break + } + + queries = append(queries, q) + } + + // If we were able to extract at least one query without failure use it. + if queryErr == nil && len(queries) > 0 { + return strings.Join(queries, " | ") + } + + return encodedQueriesOrError(rule.Data) +} + +// encodedQueriesOrError tries to encode rule query data into JSON if it fails returns the encoding error as a string. +func encodedQueriesOrError(rules []ngmodels.AlertQuery) string { + encodedQueries, err := json.Marshal(rules) + if err == nil { + return string(encodedQueries) + } + + return err.Error() +} diff --git a/pkg/services/ngalert/api/api_prometheus_test.go b/pkg/services/ngalert/api/api_prometheus_test.go index f8b45baddc4..2c86b540595 100644 --- a/pkg/services/ngalert/api/api_prometheus_test.go +++ b/pkg/services/ngalert/api/api_prometheus_test.go @@ -1,13 +1,22 @@ package api import ( + "context" + "encoding/json" + "fmt" "net/http" "testing" + "time" "github.com/grafana/grafana/pkg/infra/log" "github.com/grafana/grafana/pkg/models" + ngmodels "github.com/grafana/grafana/pkg/services/ngalert/models" + "github.com/grafana/grafana/pkg/services/ngalert/state" "github.com/grafana/grafana/pkg/services/ngalert/store" + "github.com/grafana/grafana/pkg/util" + "github.com/grafana/grafana/pkg/web" + "github.com/grafana/grafana-plugin-sdk-go/data" "github.com/stretchr/testify/require" ) @@ -16,7 +25,7 @@ func TestRouteGetAlertStatuses(t *testing.T) { fakeAlertInstanceManager := NewFakeAlertInstanceManager(t) orgID := int64(1) - server := PrometheusSrv{ + api := PrometheusSrv{ log: log.NewNopLogger(), manager: fakeAlertInstanceManager, store: fakeStore, @@ -25,7 +34,7 @@ func TestRouteGetAlertStatuses(t *testing.T) { c := &models.ReqContext{SignedInUser: &models.SignedInUser{OrgId: orgID}} t.Run("with no alerts", func(t *testing.T) { - r := server.RouteGetAlertStatuses(c) + r := api.RouteGetAlertStatuses(c) require.Equal(t, http.StatusOK, r.Status()) require.JSONEq(t, ` { @@ -38,8 +47,8 @@ func TestRouteGetAlertStatuses(t *testing.T) { }) t.Run("with two alerts", func(t *testing.T) { - fakeAlertInstanceManager.GenerateAlertInstances(1, 2) - r := server.RouteGetAlertStatuses(c) + fakeAlertInstanceManager.GenerateAlertInstances(1, util.GenerateShortUID(), 2) + r := api.RouteGetAlertStatuses(c) require.Equal(t, http.StatusOK, r.Status()) require.JSONEq(t, ` { @@ -78,3 +87,224 @@ func TestRouteGetAlertStatuses(t *testing.T) { }`, string(r.Body())) }) } + +func TestRouteGetRuleStatuses(t *testing.T) { + timeNow = func() time.Time { return time.Date(2022, 3, 10, 14, 0, 0, 0, time.UTC) } + orgID := int64(1) + + req, err := http.NewRequest("GET", "/api/v1/rules", nil) + require.NoError(t, err) + c := &models.ReqContext{Context: &web.Context{Req: req}, SignedInUser: &models.SignedInUser{OrgId: orgID}} + + t.Run("with no rules", func(t *testing.T) { + _, _, api := setupAPI(t) + r := api.RouteGetRuleStatuses(c) + + require.JSONEq(t, ` +{ + "status": "success", + "data": { + "groups": [] + } +} +`, string(r.Body())) + }) + + t.Run("with a rule that only has one query", func(t *testing.T) { + fakeStore, fakeAIM, api := setupAPI(t) + generateRuleAndInstanceWithQuery(t, orgID, fakeAIM, fakeStore, withClassicConditionSingleQuery()) + + r := api.RouteGetRuleStatuses(c) + require.Equal(t, http.StatusOK, r.Status()) + require.JSONEq(t, ` +{ + "status": "success", + "data": { + "groups": [{ + "name": "rule-group", + "file": "namespaceUID", + "rules": [{ + "state": "inactive", + "name": "AlwaysFiring", + "query": "vector(1)", + "alerts": [{ + "labels": { + "job": "prometheus" + }, + "annotations": { + "severity": "critical" + }, + "state": "Normal", + "activeAt": "0001-01-01T00:00:00Z", + "value": "" + }], + "labels": null, + "health": "ok", + "lastError": "", + "type": "alerting", + "lastEvaluation": "2022-03-10T14:01:00Z", + "duration": 180, + "evaluationTime": 60 + }], + "interval": 60, + "lastEvaluation": "2022-03-10T14:01:00Z", + "evaluationTime": 0 + }] + } +} +`, string(r.Body())) + }) + + t.Run("with a rule that has multiple queries", func(t *testing.T) { + fakeStore, fakeAIM, api := setupAPI(t) + generateRuleAndInstanceWithQuery(t, orgID, fakeAIM, fakeStore, withExpressionsMultiQuery()) + + r := api.RouteGetRuleStatuses(c) + require.Equal(t, http.StatusOK, r.Status()) + require.JSONEq(t, ` +{ + "status": "success", + "data": { + "groups": [{ + "name": "rule-group", + "file": "namespaceUID", + "rules": [{ + "state": "inactive", + "name": "AlwaysFiring", + "query": "vector(1) | vector(1)", + "alerts": [{ + "labels": { + "job": "prometheus" + }, + "annotations": { + "severity": "critical" + }, + "state": "Normal", + "activeAt": "0001-01-01T00:00:00Z", + "value": "" + }], + "labels": null, + "health": "ok", + "lastError": "", + "type": "alerting", + "lastEvaluation": "2022-03-10T14:01:00Z", + "duration": 180, + "evaluationTime": 60 + }], + "interval": 60, + "lastEvaluation": "2022-03-10T14:01:00Z", + "evaluationTime": 0 + }] + } +} +`, string(r.Body())) + }) +} + +func setupAPI(t *testing.T) (*store.FakeRuleStore, *fakeAlertInstanceManager, PrometheusSrv) { + fakeStore := store.NewFakeRuleStore(t) + fakeAIM := NewFakeAlertInstanceManager(t) + api := PrometheusSrv{ + log: log.NewNopLogger(), + manager: fakeAIM, + store: fakeStore, + } + + return fakeStore, fakeAIM, api +} + +func generateRuleAndInstanceWithQuery(t *testing.T, orgID int64, fakeAIM *fakeAlertInstanceManager, fakeStore *store.FakeRuleStore, query func(r *ngmodels.AlertRule)) { + t.Helper() + + rules := ngmodels.GenerateAlertRules(1, ngmodels.AlertRuleGen(withOrgID(orgID), asFixture(), query)) + + fakeAIM.GenerateAlertInstances(orgID, rules[0].UID, 1, func(s *state.State) *state.State { + s.Labels = data.Labels{"job": "prometheus"} + s.Annotations = data.Labels{"severity": "critical"} + return s + }) + + for _, r := range rules { + fakeStore.PutRule(context.Background(), r) + } +} + +// asFixture removes variable values of the alert rule. +// we're not too interested in variability of the rule in this scenario. +func asFixture() func(r *ngmodels.AlertRule) { + return func(r *ngmodels.AlertRule) { + r.Title = "AlwaysFiring" + r.NamespaceUID = "namespaceUID" + r.RuleGroup = "rule-group" + r.UID = "RuleUID" + r.Labels = nil + r.Annotations = nil + r.IntervalSeconds = 60 + r.For = 180 * time.Second + } +} + +func withClassicConditionSingleQuery() func(r *ngmodels.AlertRule) { + return func(r *ngmodels.AlertRule) { + queries := []ngmodels.AlertQuery{ + { + RefID: "A", + QueryType: "", + RelativeTimeRange: ngmodels.RelativeTimeRange{From: ngmodels.Duration(0), To: ngmodels.Duration(0)}, + DatasourceUID: "AUID", + Model: json.RawMessage(fmt.Sprintf(prometheusQueryModel, "A")), + }, + { + RefID: "B", + QueryType: "", + RelativeTimeRange: ngmodels.RelativeTimeRange{From: ngmodels.Duration(0), To: ngmodels.Duration(0)}, + DatasourceUID: "-100", + Model: json.RawMessage(fmt.Sprintf(classicConditionsModel, "A", "B")), + }, + } + r.Data = queries + } +} + +func withExpressionsMultiQuery() func(r *ngmodels.AlertRule) { + return func(r *ngmodels.AlertRule) { + queries := []ngmodels.AlertQuery{ + { + RefID: "A", + QueryType: "", + RelativeTimeRange: ngmodels.RelativeTimeRange{From: ngmodels.Duration(0), To: ngmodels.Duration(0)}, + DatasourceUID: "AUID", + Model: json.RawMessage(fmt.Sprintf(prometheusQueryModel, "A")), + }, + { + RefID: "B", + QueryType: "", + RelativeTimeRange: ngmodels.RelativeTimeRange{From: ngmodels.Duration(0), To: ngmodels.Duration(0)}, + DatasourceUID: "BUID", + Model: json.RawMessage(fmt.Sprintf(prometheusQueryModel, "B")), + }, + { + RefID: "C", + QueryType: "", + RelativeTimeRange: ngmodels.RelativeTimeRange{From: ngmodels.Duration(0), To: ngmodels.Duration(0)}, + DatasourceUID: "-100", + Model: json.RawMessage(fmt.Sprintf(reduceLastExpressionModel, "A", "C")), + }, + { + RefID: "D", + QueryType: "", + RelativeTimeRange: ngmodels.RelativeTimeRange{From: ngmodels.Duration(0), To: ngmodels.Duration(0)}, + DatasourceUID: "-100", + Model: json.RawMessage(fmt.Sprintf(reduceLastExpressionModel, "B", "D")), + }, + { + RefID: "E", + QueryType: "", + RelativeTimeRange: ngmodels.RelativeTimeRange{From: ngmodels.Duration(0), To: ngmodels.Duration(0)}, + DatasourceUID: "-100", + Model: json.RawMessage(fmt.Sprintf(mathExpressionModel, "A", "B", "E")), + }, + } + r.Data = queries + } +} diff --git a/pkg/services/ngalert/api/api_prometheus_test_fixtures.go b/pkg/services/ngalert/api/api_prometheus_test_fixtures.go new file mode 100644 index 00000000000..5260149f608 --- /dev/null +++ b/pkg/services/ngalert/api/api_prometheus_test_fixtures.go @@ -0,0 +1,118 @@ +package api + +// prometheusQueryModel represents the way we express a prometheus query as part of an alert query. +// It supports formatting for the refID of the query. +const prometheusQueryModel = ` +{ + "exemplar": false, + "expr": "vector(1)", + "hide": false, + "interval": "", + "intervalMs": 1000, + "legendFormat": "", + "maxDataPoints": 43200, + "refId": "%s" +} +` + +// classicConditionsModel represents the way we express a classic condition as part of an alert query. +// It supports formatting for 1) the refID of the query and 2) the refID of the condition. +const classicConditionsModel = ` +{ + "conditions": [{ + "evaluator": { + "params": [0.5], + "type": "gt" + }, + "operator": { + "type": "and" + }, + "query": { + "params": ["%s"] + }, + "reducer": { + "params": [], + "type": "last" + }, + "type": "query" + }], + "datasource": { + "type": "__expr__", + "uid": "-100" + }, + "hide": false, + "intervalMs": 1000, + "maxDataPoints": 43200, + "refId": "%s", + "type": "classic_conditions" +} +` + +// reduceLastExpressionModel represents the way we express reduce to last data point as part of an alert query. +// It supports formatting for 1) the refID of the query to target and 2) the refID of the condition. +const reduceLastExpressionModel = ` +{ + "conditions": [{ + "evaluator": { + "params": [0,0], + "type": "gt" + }, + "operator": { + "type": "and" + }, + "query": { + "params": [] + }, + "reducer": { + "params": [], + "type": "last" + }, + "type": "query" + }], + "datasource": { + "type": "__expr__", + "uid": "__expr__" + }, + "expression": "%s", + "hide": false, + "intervalMs": 1000, + "maxDataPoints": 43200, + "reducer": "last", + "refId": "%s", + "type": "reduce" +} +` + +// reduceLastExpressionModel represents the way we express a math (sum of two refIDs greater than 1) operation of an alert query. +// It supports formatting for 1) refID of the first operand, 2) refID of the second operand and 3) the refID of the math operation. +const mathExpressionModel = ` +{ + "conditions": [{ + "evaluator": { + "params": [0, 0], + "type": "gt" + }, + "operator": { + "type": "and" + }, + "query": { + "params": [] + }, + "reducer": { + "params": [], + "type": "avg" + }, + "type": "query" + }], + "datasource": { + "type": "__expr__", + "uid": "__expr__" + }, + "expression": "$%s + $%s \u003e 1", + "hide": false, + "intervalMs": 1000, + "maxDataPoints": 43200, + "refId": "%s", + "type": "math" +} +` diff --git a/pkg/services/ngalert/api/testing.go b/pkg/services/ngalert/api/testing.go index b9b538bf398..26344d66b64 100644 --- a/pkg/services/ngalert/api/testing.go +++ b/pkg/services/ngalert/api/testing.go @@ -11,7 +11,6 @@ import ( "github.com/grafana/grafana/pkg/services/ngalert/models" "github.com/grafana/grafana/pkg/services/ngalert/state" "github.com/grafana/grafana/pkg/services/ngalert/store" - "github.com/grafana/grafana/pkg/util" "github.com/grafana/grafana-plugin-sdk-go/data" ) @@ -73,13 +72,15 @@ func (f *fakeAlertInstanceManager) GetStatesForRuleUID(orgID int64, alertRuleUID return f.states[orgID][alertRuleUID] } -func (f *fakeAlertInstanceManager) GenerateAlertInstances(orgID int64, count int) { +// forEachState represents the callback used when generating alert instances that allows us to modify the generated result +type forEachState func(s *state.State) *state.State + +func (f *fakeAlertInstanceManager) GenerateAlertInstances(orgID int64, alertRuleUID string, count int, callbacks ...forEachState) { f.mtx.Lock() defer f.mtx.Unlock() - evaluationTime := time.Now() + evaluationTime := timeNow() evaluationDuration := 1 * time.Minute - alertRuleUID := util.GenerateShortUID() for i := 0; i < count; i++ { _, ok := f.states[orgID] @@ -91,8 +92,8 @@ func (f *fakeAlertInstanceManager) GenerateAlertInstances(orgID int64, count int f.states[orgID][alertRuleUID] = []*state.State{} } - f.states[orgID][alertRuleUID] = append(f.states[orgID][alertRuleUID], &state.State{ - AlertRuleUID: fmt.Sprintf("alert_rule_%v", i), + newState := &state.State{ + AlertRuleUID: alertRuleUID, OrgID: 1, Labels: data.Labels{ "__alert_rule_namespace_uid__": "test_namespace_uid", @@ -117,6 +118,14 @@ func (f *fakeAlertInstanceManager) GenerateAlertInstances(orgID int64, count int LastEvaluationTime: evaluationTime.Add(1 * time.Minute), EvaluationDuration: evaluationDuration, Annotations: map[string]string{"annotation": "test"}, - }) + } + + if len(callbacks) != 0 { + for _, cb := range callbacks { + newState = cb(newState) + } + } + + f.states[orgID][alertRuleUID] = append(f.states[orgID][alertRuleUID], newState) } } diff --git a/pkg/services/ngalert/models/alert_query.go b/pkg/services/ngalert/models/alert_query.go index 173c50b030f..a57ceff1737 100644 --- a/pkg/services/ngalert/models/alert_query.go +++ b/pkg/services/ngalert/models/alert_query.go @@ -2,6 +2,7 @@ package models import ( "encoding/json" + "errors" "fmt" "time" @@ -12,6 +13,8 @@ import ( const defaultMaxDataPoints float64 = 43200 // 12 hours at 1sec interval const defaultIntervalMS float64 = 1000 +var ErrNoQuery = errors.New("no `expr` property in the query model") + // Duration is a type used for marshalling durations. type Duration time.Duration @@ -174,6 +177,28 @@ func (aq *AlertQuery) GetDatasource() (string, error) { return aq.DatasourceUID, nil } +// GetQuery returns the query defined by `expr` within the model. +// Returns an ErrNoQuery if it is unable to find the query. +// Returns an error if it is not able to cast the query to a string. +func (aq *AlertQuery) GetQuery() (string, error) { + if aq.modelProps == nil { + err := aq.setModelProps() + if err != nil { + return "", err + } + } + query, ok := aq.modelProps["expr"] + if !ok { + return "", ErrNoQuery + } + + q, ok := query.(string) + if !ok { + return "", fmt.Errorf("failed to cast query to string: %v", aq.modelProps["expr"]) + } + return q, nil +} + func (aq *AlertQuery) GetModel() ([]byte, error) { err := aq.setMaxDatapoints() if err != nil { diff --git a/pkg/services/ngalert/models/alert_query_test.go b/pkg/services/ngalert/models/alert_query_test.go index 36d86f67168..68eccbd6a9e 100644 --- a/pkg/services/ngalert/models/alert_query_test.go +++ b/pkg/services/ngalert/models/alert_query_test.go @@ -2,6 +2,7 @@ package models import ( "encoding/json" + "errors" "fmt" "testing" "time" @@ -110,7 +111,7 @@ func TestAlertQuery(t *testing.T) { Model: json.RawMessage(`{ "queryType": "metricQuery", "intervalMs": "invalid", - "extraParam": "some text" + "extraParam": "some text" }`), }, expectedIsExpression: false, @@ -245,3 +246,38 @@ func TestAlertQueryMarshalling(t *testing.T) { } } } + +func TestAlertQuery_GetQuery(t *testing.T) { + tc := []struct { + name string + alertQuery AlertQuery + expected string + err error + }{ + { + name: "when a query is present", + alertQuery: AlertQuery{Model: json.RawMessage(`{"expr": "sum by (job) (up)"}`)}, + expected: "sum by (job) (up)", + }, + { + name: "when no query is found", + alertQuery: AlertQuery{Model: json.RawMessage(`{"exprisnot": "sum by (job) (up)"}`)}, + err: ErrNoQuery, + }, + { + name: "when we're unable to cast the query to a string", + alertQuery: AlertQuery{Model: json.RawMessage(`{"expr": {"key": 1}}`)}, + err: errors.New("failed to cast query to string: map[key:1]"), + }, + } + + for _, tt := range tc { + t.Run(tt.name, func(t *testing.T) { + expected, err := tt.alertQuery.GetQuery() + if err != nil { + require.Equal(t, tt.err, err) + } + require.Equal(t, tt.expected, expected) + }) + } +} diff --git a/pkg/services/ngalert/store/testing.go b/pkg/services/ngalert/store/testing.go index d2cdef6a14a..46f33565cfc 100644 --- a/pkg/services/ngalert/store/testing.go +++ b/pkg/services/ngalert/store/testing.go @@ -34,8 +34,9 @@ func NewFakeRuleStore(t *testing.T) *FakeRuleStore { // FakeRuleStore mocks the RuleStore of the scheduler. type FakeRuleStore struct { - t *testing.T - mtx sync.Mutex + t *testing.T + mtx sync.Mutex + // OrgID -> RuleGroup -> Namespace -> Rules Rules map[int64]map[string]map[string][]*models.AlertRule Hook func(cmd interface{}) error // use Hook if you need to intercept some query and return an error RecordedOps []interface{} @@ -141,10 +142,25 @@ func (f *FakeRuleStore) GetAlertRulesForScheduling(_ context.Context, q *models. return nil } + func (f *FakeRuleStore) GetOrgAlertRules(_ context.Context, q *models.ListAlertRulesQuery) error { f.mtx.Lock() defer f.mtx.Unlock() f.RecordedOps = append(f.RecordedOps, *q) + + if _, ok := f.Rules[q.OrgID]; !ok { + return nil + } + + var rules []*models.AlertRule + for ruleGroup := range f.Rules[q.OrgID] { + for _, storedRules := range f.Rules[q.OrgID][ruleGroup] { + rules = append(rules, storedRules...) + } + } + + q.Result = rules + return nil } func (f *FakeRuleStore) GetNamespaceAlertRules(_ context.Context, q *models.ListNamespaceAlertRulesQuery) error { @@ -185,8 +201,24 @@ func (f *FakeRuleStore) GetRuleGroupAlertRules(_ context.Context, q *models.List return nil } -func (f *FakeRuleStore) GetNamespaces(_ context.Context, _ int64, _ *models2.SignedInUser) (map[string]*models2.Folder, error) { - return nil, nil +func (f *FakeRuleStore) GetNamespaces(_ context.Context, orgID int64, _ *models2.SignedInUser) (map[string]*models2.Folder, error) { + f.mtx.Lock() + defer f.mtx.Unlock() + + namespacesMap := map[string]*models2.Folder{} + + _, ok := f.Rules[orgID] + if !ok { + return namespacesMap, nil + } + + for rg := range f.Rules[orgID] { + for namespace := range f.Rules[orgID][rg] { + namespacesMap[namespace] = &models2.Folder{} + } + } + + return namespacesMap, nil } func (f *FakeRuleStore) GetNamespaceByTitle(_ context.Context, _ string, _ int64, _ *models2.SignedInUser, _ bool) (*models2.Folder, error) { return nil, nil @@ -198,6 +230,27 @@ func (f *FakeRuleStore) GetOrgRuleGroups(_ context.Context, q *models.ListOrgRul if err := f.Hook(*q); err != nil { return err } + + // If we have namespaces, we want to try and retrieve the list of rules stored. + if len(q.NamespaceUIDs) != 0 { + _, ok := f.Rules[q.OrgID] + if !ok { + return nil + } + + var ruleGroups [][]string + for rg := range f.Rules[q.OrgID] { + for storedNamespace := range f.Rules[q.OrgID][rg] { + for _, namespace := range q.NamespaceUIDs { + if storedNamespace == namespace { // if they match, they should go in. + ruleGroups = append(ruleGroups, []string{rg, storedNamespace, storedNamespace}) + } + } + } + } + + q.Result = ruleGroups + } return nil }