diff --git a/pkg/models/alerts.go b/pkg/models/alerts.go index 12c5794eeb5..7f674d67ac0 100644 --- a/pkg/models/alerts.go +++ b/pkg/models/alerts.go @@ -110,11 +110,11 @@ type GetAlertChangesQuery struct { } type AlertJob struct { - Offset int64 - Delay bool - Running bool - Retry int - Rule AlertRule + Offset int64 + Delay bool + Running bool + RetryCount int + Rule AlertRule } type AlertResult struct { @@ -125,3 +125,7 @@ type AlertResult struct { Description string AlertJob *AlertJob } + +func (ar *AlertResult) IsResultIncomplete() bool { + return ar.State == AlertStatePending +} diff --git a/pkg/services/alerting/alert_rule_reader.go b/pkg/services/alerting/alert_rule_reader.go index 1dbedcedf86..ccc81e1c3de 100644 --- a/pkg/services/alerting/alert_rule_reader.go +++ b/pkg/services/alerting/alert_rule_reader.go @@ -5,6 +5,7 @@ import ( "time" "github.com/grafana/grafana/pkg/bus" + "github.com/grafana/grafana/pkg/log" m "github.com/grafana/grafana/pkg/models" ) @@ -51,31 +52,6 @@ func (arr *AlertRuleReader) updateRules() { arr.Lock() defer arr.Unlock() - /* - rules = []m.AlertRule{ - //{Id: 1, Title: "alert rule 1", Interval: "10s", Frequency: 10}, - //{Id: 2, Title: "alert rule 2", Interval: "10s", Frequency: 10}, - //{Id: 3, Title: "alert rule 3", Interval: "10s", Frequency: 10}, - //{Id: 4, Title: "alert rule 4", Interval: "10s", Frequency: 5}, - //{Id: 5, Title: "alert rule 5", Interval: "10s", Frequency: 5}, - { - Id: 1, - OrgId: 1, - Title: "alert rule 1", - Frequency: 3, - DatasourceId: 1, - WarnOperator: ">", - WarnLevel: 3, - CritOperator: ">", - CritLevel: 4, - Aggregator: "avg", - //Query: `{"refId":"A","target":"statsd.fakesite.counters.session_start.*.count","textEditor":true}"`, - Query: `{"hide":false,"refId":"A","target":"aliasByNode(statsd.fakesite.counters.session_start.*.count, 4)","textEditor":false}`, - QueryRange: 3600, - }, - } - */ - cmd := &m.GetAlertsQuery{ OrgId: 1, } @@ -83,6 +59,8 @@ func (arr *AlertRuleReader) updateRules() { if err == nil { alertJobs = cmd.Result + } else { + log.Error(1, "AlertRuleReader: Could not load alerts") } } diff --git a/pkg/services/alerting/alerting.go b/pkg/services/alerting/alerting.go index e31372f32e6..714ebd17a94 100644 --- a/pkg/services/alerting/alerting.go +++ b/pkg/services/alerting/alerting.go @@ -74,8 +74,8 @@ func (scheduler *Scheduler) updateJobs(alertRuleFn func() []m.AlertRule) { job = scheduler.jobs[rule.Id] } else { job = &m.AlertJob{ - Running: false, - Retry: 0, + Running: false, + RetryCount: 0, } } @@ -110,24 +110,28 @@ func (scheduler *Scheduler) executor(executor Executor) { func (scheduler *Scheduler) handleResponses() { for response := range scheduler.responseQueue { - log.Info("Response: alert(%d) status(%s) actual(%v) retry(%d) running(%v)", response.Id, response.State, response.ActualValue, response.AlertJob.Retry, response.AlertJob.Running) + log.Info("Response: alert(%d) status(%s) actual(%v) retry(%d)", response.Id, response.State, response.ActualValue, response.AlertJob.RetryCount) response.AlertJob.Running = false - if response.State == m.AlertStatePending { - response.AlertJob.Retry++ - if response.AlertJob.Retry > maxRetries { - response.State = m.AlertStateCritical - response.Description = fmt.Sprintf("Failed to run check after %d retires", maxRetries) - scheduler.saveState(response) + if response.IsResultIncomplete() { + response.AlertJob.RetryCount++ + if response.AlertJob.RetryCount < maxRetries { + scheduler.runQueue <- response.AlertJob + } else { + saveState(&m.AlertResult{ + Id: response.Id, + State: m.AlertStateCritical, + Description: fmt.Sprintf("Failed to run check after %d retires", maxRetries), + }) } } else { - response.AlertJob.Retry = 0 - scheduler.saveState(response) + response.AlertJob.RetryCount = 0 + saveState(response) } } } -func (scheduler *Scheduler) saveState(response *m.AlertResult) { +func saveState(response *m.AlertResult) { cmd := &m.UpdateAlertStateCommand{ AlertId: response.Id, NewState: response.State,