The open and composable observability and data visualization platform. Visualize metrics, logs, and traces from multiple sources like Prometheus, Loki, Elasticsearch, InfluxDB, Postgres and many more.
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 
 
 
grafana/pkg/services/alerting/eval_handler.go

84 lines
2.0 KiB

package alerting
import (
"fmt"
"time"
"github.com/grafana/grafana/pkg/log"
"github.com/grafana/grafana/pkg/metrics"
)
var (
MaxRetries int = 1
)
type DefaultEvalHandler struct {
log log.Logger
alertJobTimeout time.Duration
}
func NewEvalHandler() *DefaultEvalHandler {
return &DefaultEvalHandler{
log: log.New("alerting.evalHandler"),
alertJobTimeout: time.Second * 10,
}
}
func (e *DefaultEvalHandler) Eval(context *EvalContext) {
go e.eval(context)
select {
case <-time.After(e.alertJobTimeout):
context.Error = fmt.Errorf("Execution timed out after %v", e.alertJobTimeout)
context.EndTime = time.Now()
e.log.Debug("Job Execution timeout", "alertId", context.Rule.Id, "timeout setting", e.alertJobTimeout)
e.retry(context)
case <-context.DoneChan:
e.log.Debug("Job Execution done", "timeMs", context.GetDurationMs(), "alertId", context.Rule.Id, "firing", context.Firing)
if context.Error != nil {
e.retry(context)
}
}
}
func (e *DefaultEvalHandler) retry(context *EvalContext) {
e.log.Debug("Retrying eval exeuction", "alertId", context.Rule.Id)
if context.RetryCount < MaxRetries {
context.DoneChan = make(chan bool, 1)
context.CancelChan = make(chan bool, 1)
context.RetryCount++
e.Eval(context)
}
}
func (e *DefaultEvalHandler) eval(context *EvalContext) {
defer func() {
if err := recover(); err != nil {
e.log.Error("Alerting rule eval panic", "error", err, "stack", log.Stack(1))
if panicErr, ok := err.(error); ok {
context.Error = panicErr
}
}
}()
for _, condition := range context.Rule.Conditions {
condition.Eval(context)
// break if condition could not be evaluated
if context.Error != nil {
break
}
// break if result has not triggered yet
if context.Firing == false {
break
}
}
context.EndTime = time.Now()
elapsedTime := context.EndTime.Sub(context.StartTime) / time.Millisecond
metrics.M_Alerting_Exeuction_Time.Update(elapsedTime)
context.DoneChan <- true
}