The open and composable observability and data visualization platform. Visualize metrics, logs, and traces from multiple sources like Prometheus, Loki, Elasticsearch, InfluxDB, Postgres and many more.
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
grafana/pkg/services/ngalert/eval/eval.go

244 lines
7.0 KiB

// Package eval executes the condition for an alert definition, evaluates the condition results, and
// returns the alert instance states.
package eval
import (
"context"
"fmt"
"time"
"github.com/grafana/grafana/pkg/services/ngalert/models"
"github.com/grafana/grafana/pkg/setting"
"github.com/grafana/grafana/pkg/tsdb"
"github.com/grafana/grafana-plugin-sdk-go/backend"
"github.com/grafana/grafana-plugin-sdk-go/data"
"github.com/grafana/grafana/pkg/expr"
)
const alertingEvaluationTimeout = 30 * time.Second
type Evaluator struct {
Cfg *setting.Cfg
}
// invalidEvalResultFormatError is an error for invalid format of the alert definition evaluation results.
type invalidEvalResultFormatError struct {
refID string
reason string
err error
}
func (e *invalidEvalResultFormatError) Error() string {
s := fmt.Sprintf("invalid format of evaluation results for the alert definition %s: %s", e.refID, e.reason)
if e.err != nil {
s = fmt.Sprintf("%s: %s", s, e.err.Error())
}
return s
}
func (e *invalidEvalResultFormatError) Unwrap() error {
return e.err
}
// Condition contains backend expressions and queries and the RefID
// of the query or expression that will be evaluated.
type Condition struct {
RefID string `json:"refId"`
OrgID int64 `json:"-"`
QueriesAndExpressions []models.AlertQuery `json:"queriesAndExpressions"`
}
// ExecutionResults contains the unevaluated results from executing
// a condition.
type ExecutionResults struct {
AlertDefinitionID int64
Error error
Results data.Frames
}
// Results is a slice of evaluated alert instances states.
type Results []result
// result contains the evaluated state of an alert instance
// identified by its labels.
type result struct {
Instance data.Labels
State state // Enum
}
// state is an enum of the evaluation state for an alert instance.
type state int
const (
// Normal is the eval state for an alert instance condition
// that evaluated to false.
Normal state = iota
// Alerting is the eval state for an alert instance condition
// that evaluated to false.
Alerting
)
func (s state) String() string {
return [...]string{"Normal", "Alerting"}[s]
}
// IsValid checks the condition's validity.
func (c Condition) IsValid() bool {
// TODO search for refIDs in QueriesAndExpressions
return len(c.QueriesAndExpressions) != 0
}
// AlertExecCtx is the context provided for executing an alert condition.
type AlertExecCtx struct {
OrgID int64
ExpressionsEnabled bool
Ctx context.Context
}
// execute runs the Condition's expressions or queries.
func (c *Condition) execute(ctx AlertExecCtx, now time.Time, dataService *tsdb.Service) (*ExecutionResults, error) {
result := ExecutionResults{}
if !c.IsValid() {
return nil, fmt.Errorf("invalid conditions")
// TODO: Things probably
}
queryDataReq := &backend.QueryDataRequest{
PluginContext: backend.PluginContext{
OrgID: ctx.OrgID,
},
Queries: []backend.DataQuery{},
}
for i := range c.QueriesAndExpressions {
q := c.QueriesAndExpressions[i]
model, err := q.GetModel()
if err != nil {
return nil, fmt.Errorf("failed to get query model: %w", err)
}
interval, err := q.GetIntervalDuration()
if err != nil {
return nil, fmt.Errorf("failed to retrieve intervalMs from the model: %w", err)
}
maxDatapoints, err := q.GetMaxDatapoints()
if err != nil {
return nil, fmt.Errorf("failed to retrieve maxDatapoints from the model: %w", err)
}
queryDataReq.Queries = append(queryDataReq.Queries, backend.DataQuery{
JSON: model,
Interval: interval,
RefID: q.RefID,
MaxDataPoints: maxDatapoints,
QueryType: q.QueryType,
TimeRange: q.RelativeTimeRange.ToTimeRange(now),
})
}
exprService := expr.Service{
Cfg: &setting.Cfg{ExpressionsEnabled: ctx.ExpressionsEnabled},
DataService: dataService,
}
pbRes, err := exprService.TransformData(ctx.Ctx, queryDataReq)
if err != nil {
return &result, err
}
for refID, res := range pbRes.Responses {
if refID != c.RefID {
continue
}
result.Results = res.Frames
}
if len(result.Results) == 0 {
err = fmt.Errorf("no GEL results")
result.Error = err
return &result, err
}
return &result, nil
}
// evaluateExecutionResult takes the ExecutionResult, and returns a frame where
// each column is a string type that holds a string representing its state.
func evaluateExecutionResult(results *ExecutionResults) (Results, error) {
evalResults := make([]result, 0)
labels := make(map[string]bool)
for _, f := range results.Results {
rowLen, err := f.RowLen()
if err != nil {
return nil, &invalidEvalResultFormatError{refID: f.RefID, reason: "unable to get frame row length", err: err}
}
if rowLen > 1 {
return nil, &invalidEvalResultFormatError{refID: f.RefID, reason: fmt.Sprintf("unexpected row length: %d instead of 1", rowLen)}
}
if len(f.Fields) > 1 {
return nil, &invalidEvalResultFormatError{refID: f.RefID, reason: fmt.Sprintf("unexpected field length: %d instead of 1", len(f.Fields))}
}
if f.Fields[0].Type() != data.FieldTypeNullableFloat64 {
return nil, &invalidEvalResultFormatError{refID: f.RefID, reason: fmt.Sprintf("invalid field type: %d", f.Fields[0].Type())}
}
labelsStr := f.Fields[0].Labels.String()
_, ok := labels[labelsStr]
if ok {
return nil, &invalidEvalResultFormatError{refID: f.RefID, reason: fmt.Sprintf("frame cannot uniquely be identified by its labels: %s", labelsStr)}
}
labels[labelsStr] = true
state := Normal
val, err := f.Fields[0].FloatAt(0)
if err != nil || val != 0 {
state = Alerting
}
evalResults = append(evalResults, result{
Instance: f.Fields[0].Labels,
State: state,
})
}
return evalResults, nil
}
// AsDataFrame forms the EvalResults in Frame suitable for displaying in the table panel of the front end.
// This may be temporary, as there might be a fair amount we want to display in the frontend, and it might not make sense to store that in data.Frame.
// For the first pass, I would expect a Frame with a single row, and a column for each instance with a boolean value.
func (evalResults Results) AsDataFrame() data.Frame {
fields := make([]*data.Field, 0)
for _, evalResult := range evalResults {
fields = append(fields, data.NewField("", evalResult.Instance, []bool{evalResult.State != Normal}))
}
f := data.NewFrame("", fields...)
return *f
}
// ConditionEval executes conditions and evaluates the result.
func (e *Evaluator) ConditionEval(condition *Condition, now time.Time, dataService *tsdb.Service) (Results, error) {
alertCtx, cancelFn := context.WithTimeout(context.Background(), alertingEvaluationTimeout)
defer cancelFn()
alertExecCtx := AlertExecCtx{OrgID: condition.OrgID, Ctx: alertCtx, ExpressionsEnabled: e.Cfg.ExpressionsEnabled}
execResult, err := condition.execute(alertExecCtx, now, dataService)
if err != nil {
return nil, fmt.Errorf("failed to execute conditions: %w", err)
}
evalResults, err := evaluateExecutionResult(execResult)
if err != nil {
return nil, fmt.Errorf("failed to evaluate results: %w", err)
}
return evalResults, nil
}