feat(alerting): work on alerting

pull/5622/head
Torkel Ödegaard 9 years ago
parent 624cd6fc0a
commit f13b869aa4
  1. 32
      pkg/api/api.go
  2. 3
      pkg/models/alert.go
  3. 14
      pkg/services/alerting/alertstates/states.go
  4. 23
      pkg/services/alerting/engine.go
  5. 28
      pkg/services/alerting/handler.go
  6. 17
      pkg/services/alerting/models.go
  7. 2
      pkg/services/alerting/result_handler.go
  8. 21
      pkg/services/sqlstore/alert.go
  9. 4
      public/app/features/alerting/alert_log_ctrl.ts
  10. 2
      public/app/features/alerting/alerts_ctrl.ts
  11. 6
      public/app/features/alerting/notification_edit_ctrl.ts
  12. 4
      public/app/features/alerting/notifications_list_ctrl.ts
  13. 7
      public/app/plugins/datasource/grafana-live/plugin.json
  14. 4
      public/app/plugins/panel/graph/partials/tab_alerting.html

@ -245,25 +245,23 @@ func Register(r *macaron.Macaron) {
// metrics // metrics
r.Get("/metrics", wrap(GetInternalMetrics)) r.Get("/metrics", wrap(GetInternalMetrics))
r.Group("/alerting", func() { r.Group("/alerts", func() {
r.Group("/rules", func() { r.Get("/:alertId/states", wrap(GetAlertStates))
r.Get("/:alertId/states", wrap(GetAlertStates)) //r.Put("/:alertId/state", bind(m.UpdateAlertStateCommand{}), wrap(PutAlertState))
//r.Put("/:alertId/state", bind(m.UpdateAlertStateCommand{}), wrap(PutAlertState)) r.Get("/:alertId", ValidateOrgAlert, wrap(GetAlert))
r.Get("/:alertId", ValidateOrgAlert, wrap(GetAlert)) //r.Delete("/:alertId", ValidateOrgAlert, wrap(DelAlert)) disabled until we know how to handle it dashboard updates
//r.Delete("/:alertId", ValidateOrgAlert, wrap(DelAlert)) disabled until we know how to handle it dashboard updates r.Get("/", wrap(GetAlerts))
r.Get("/", wrap(GetAlerts))
})
r.Get("/notifications", wrap(GetAlertNotifications))
r.Group("/notification", func() {
r.Post("/", bind(m.CreateAlertNotificationCommand{}), wrap(CreateAlertNotification))
r.Put("/:notificationId", bind(m.UpdateAlertNotificationCommand{}), wrap(UpdateAlertNotification))
r.Get("/:notificationId", wrap(GetAlertNotificationById))
r.Delete("/:notificationId", wrap(DeleteAlertNotification))
}, reqOrgAdmin)
}) })
r.Get("/alert-notifications", wrap(GetAlertNotifications))
r.Group("/alert-notifications", func() {
r.Post("/", bind(m.CreateAlertNotificationCommand{}), wrap(CreateAlertNotification))
r.Put("/:notificationId", bind(m.UpdateAlertNotificationCommand{}), wrap(UpdateAlertNotification))
r.Get("/:notificationId", wrap(GetAlertNotificationById))
r.Delete("/:notificationId", wrap(DeleteAlertNotification))
}, reqOrgAdmin)
// error test // error test
r.Get("/metrics/error", wrap(GenerateError)) r.Get("/metrics/error", wrap(GenerateError))

@ -18,6 +18,9 @@ type Alert struct {
Enabled bool Enabled bool
Frequency int64 Frequency int64
CreatedBy int64
UpdatedBy int64
Created time.Time Created time.Time
Updated time.Time Updated time.Time

@ -5,14 +5,12 @@ var (
Ok, Ok,
Warn, Warn,
Critical, Critical,
Acknowledged, Unknown,
Maintenance,
} }
Ok = "OK" Ok = "OK"
Warn = "WARN" Warn = "WARN"
Critical = "CRITICAL" Critical = "CRITICAL"
Acknowledged = "ACKNOWLEDGED" Pending = "PENDING"
Maintenance = "MAINTENANCE" Unknown = "UNKNOWN"
Pending = "PENDING"
) )

@ -19,6 +19,7 @@ type Engine struct {
ruleReader RuleReader ruleReader RuleReader
log log.Logger log log.Logger
responseHandler ResultHandler responseHandler ResultHandler
alertJobTimeout time.Duration
} }
func NewEngine() *Engine { func NewEngine() *Engine {
@ -31,6 +32,7 @@ func NewEngine() *Engine {
ruleReader: NewRuleReader(), ruleReader: NewRuleReader(),
log: log.New("alerting.engine"), log: log.New("alerting.engine"),
responseHandler: NewResultHandler(), responseHandler: NewResultHandler(),
alertJobTimeout: time.Second * 5,
} }
return e return e
@ -87,24 +89,25 @@ func (e *Engine) execDispatch() {
} }
func (e *Engine) executeJob(job *AlertJob) { func (e *Engine) executeJob(job *AlertJob) {
now := time.Now() startTime := time.Now()
resultChan := make(chan *AlertResult, 1) resultChan := make(chan *AlertResult, 1)
go e.handler.Execute(job, resultChan) go e.handler.Execute(job, resultChan)
select { select {
case <-time.After(time.Second * 5): case <-time.After(e.alertJobTimeout):
e.resultQueue <- &AlertResult{ e.resultQueue <- &AlertResult{
State: alertstates.Pending, State: alertstates.Pending,
Duration: float64(time.Since(now).Nanoseconds()) / float64(1000000), Error: fmt.Errorf("Timeout"),
Error: fmt.Errorf("Timeout"), AlertJob: job,
AlertJob: job, StartTime: startTime,
ExeuctionTime: time.Now(), EndTime: time.Now(),
} }
close(resultChan)
e.log.Debug("Job Execution timeout", "alertRuleId", job.Rule.Id) e.log.Debug("Job Execution timeout", "alertRuleId", job.Rule.Id)
case result := <-resultChan: case result := <-resultChan:
result.Duration = float64(time.Since(now).Nanoseconds()) / float64(1000000) duration := float64(result.EndTime.Nanosecond()-result.StartTime.Nanosecond()) / float64(1000000)
e.log.Debug("Job Execution done", "timeTakenMs", result.Duration, "ruleId", job.Rule.Id) e.log.Debug("Job Execution done", "timeTakenMs", duration, "ruleId", job.Rule.Id)
e.resultQueue <- result e.resultQueue <- result
} }
} }
@ -117,7 +120,7 @@ func (e *Engine) resultHandler() {
}() }()
for result := range e.resultQueue { for result := range e.resultQueue {
e.log.Debug("Alert Rule Result", "ruleId", result.AlertJob.Rule.Id, "state", result.State, "value", result.ActualValue, "retry", result.AlertJob.RetryCount) e.log.Debug("Alert Rule Result", "ruleId", result.AlertJob.Rule.Id, "state", result.State, "retry", result.AlertJob.RetryCount)
result.AlertJob.Running = false result.AlertJob.Running = false

@ -26,18 +26,24 @@ func NewHandler() *HandlerImpl {
} }
func (e *HandlerImpl) Execute(job *AlertJob, resultQueue chan *AlertResult) { func (e *HandlerImpl) Execute(job *AlertJob, resultQueue chan *AlertResult) {
startTime := time.Now()
timeSeries, err := e.executeQuery(job) timeSeries, err := e.executeQuery(job)
if err != nil { if err != nil {
resultQueue <- &AlertResult{ resultQueue <- &AlertResult{
Error: err, Error: err,
State: alertstates.Pending, State: alertstates.Pending,
AlertJob: job, AlertJob: job,
ExeuctionTime: time.Now(), StartTime: time.Now(),
EndTime: time.Now(),
} }
} }
result := e.evaluateRule(job.Rule, timeSeries) result := e.evaluateRule(job.Rule, timeSeries)
result.AlertJob = job result.AlertJob = job
result.StartTime = startTime
result.EndTime = time.Now()
resultQueue <- result resultQueue <- result
} }
@ -108,9 +114,9 @@ func (e *HandlerImpl) evaluateRule(rule *AlertRule, series tsdb.TimeSeriesSlice)
e.log.Debug("Alert execution Crit", "name", serie.Name, "condition", condition2, "result", critResult) e.log.Debug("Alert execution Crit", "name", serie.Name, "condition", condition2, "result", critResult)
if critResult { if critResult {
triggeredAlert = append(triggeredAlert, &TriggeredAlert{ triggeredAlert = append(triggeredAlert, &TriggeredAlert{
State: alertstates.Critical, State: alertstates.Critical,
ActualValue: transformedValue, Value: transformedValue,
Name: serie.Name, Metric: serie.Name,
}) })
continue continue
} }
@ -120,9 +126,9 @@ func (e *HandlerImpl) evaluateRule(rule *AlertRule, series tsdb.TimeSeriesSlice)
e.log.Debug("Alert execution Warn", "name", serie.Name, "condition", condition, "result", warnResult) e.log.Debug("Alert execution Warn", "name", serie.Name, "condition", condition, "result", warnResult)
if warnResult { if warnResult {
triggeredAlert = append(triggeredAlert, &TriggeredAlert{ triggeredAlert = append(triggeredAlert, &TriggeredAlert{
State: alertstates.Warn, State: alertstates.Warn,
ActualValue: transformedValue, Value: transformedValue,
Name: serie.Name, Metric: serie.Name,
}) })
} }
} }
@ -138,5 +144,5 @@ func (e *HandlerImpl) evaluateRule(rule *AlertRule, series tsdb.TimeSeriesSlice)
} }
} }
return &AlertResult{State: executionState, Description: "Returned " + executionState, TriggeredAlerts: triggeredAlert, ExeuctionTime: time.Now()} return &AlertResult{State: executionState, TriggeredAlerts: triggeredAlert}
} }

@ -24,19 +24,20 @@ func (aj *AlertJob) IncRetry() {
type AlertResult struct { type AlertResult struct {
State string State string
ActualValue float64
Duration float64
TriggeredAlerts []*TriggeredAlert TriggeredAlerts []*TriggeredAlert
Description string
Error error Error error
AlertJob *AlertJob Description string
ExeuctionTime time.Time StartTime time.Time
EndTime time.Time
AlertJob *AlertJob
} }
type TriggeredAlert struct { type TriggeredAlert struct {
ActualValue float64 Value float64
Name string Metric string
State string State string
Tags map[string]string
} }
type Level struct { type Level struct {

@ -60,7 +60,7 @@ func (handler *ResultHandlerImpl) shouldUpdateState(result *AlertResult) bool {
} }
lastExecution := query.Result.Created lastExecution := query.Result.Created
asdf := result.ExeuctionTime.Add(time.Minute * -15) asdf := result.StartTime.Add(time.Minute * -15)
olderThen15Min := lastExecution.Before(asdf) olderThen15Min := lastExecution.Before(asdf)
changedState := query.Result.NewState != result.State changedState := query.Result.NewState != result.State

@ -158,24 +158,29 @@ func DeleteAlertDefinition(dashboardId int64, sess *xorm.Session) error {
func SaveAlerts(cmd *m.SaveAlertsCommand) error { func SaveAlerts(cmd *m.SaveAlertsCommand) error {
return inTransaction(func(sess *xorm.Session) error { return inTransaction(func(sess *xorm.Session) error {
alerts, err := GetAlertsByDashboardId2(cmd.DashboardId, sess) existingAlerts, err := GetAlertsByDashboardId2(cmd.DashboardId, sess)
if err != nil { if err != nil {
return err return err
} }
upsertAlerts(alerts, cmd, sess) if err := upsertAlerts(existingAlerts, cmd, sess); err != nil {
deleteMissingAlerts(alerts, cmd, sess) return err
}
if err := deleteMissingAlerts(existingAlerts, cmd, sess); err != nil {
return err
}
return nil return nil
}) })
} }
func upsertAlerts(alerts []*m.Alert, cmd *m.SaveAlertsCommand, sess *xorm.Session) error { func upsertAlerts(existingAlerts []*m.Alert, cmd *m.SaveAlertsCommand, sess *xorm.Session) error {
for _, alert := range cmd.Alerts { for _, alert := range cmd.Alerts {
update := false update := false
var alertToUpdate *m.Alert var alertToUpdate *m.Alert
for _, k := range alerts { for _, k := range existingAlerts {
if alert.PanelId == k.PanelId { if alert.PanelId == k.PanelId {
update = true update = true
alert.Id = k.Id alert.Id = k.Id
@ -195,11 +200,13 @@ func upsertAlerts(alerts []*m.Alert, cmd *m.SaveAlertsCommand, sess *xorm.Sessio
sqlog.Debug("Alert updated", "name", alert.Name, "id", alert.Id) sqlog.Debug("Alert updated", "name", alert.Name, "id", alert.Id)
} }
} else { } else {
alert.Updated = time.Now() alert.Updated = time.Now()
alert.Created = time.Now() alert.Created = time.Now()
alert.State = "OK" alert.State = "UNKNOWN"
alert.CreatedBy = cmd.UserId
alert.UpdatedBy = cmd.UserId
_, err := sess.Insert(alert) _, err := sess.Insert(alert)
if err != nil { if err != nil {
return err return err

@ -20,7 +20,7 @@ export class AlertLogCtrl {
} }
loadAlertLogs(alertId: number) { loadAlertLogs(alertId: number) {
this.backendSrv.get(`/api/alerts/rules/${alertId}/states`).then(result => { this.backendSrv.get(`/api/alerts/${alertId}/states`).then(result => {
this.alertLogs = _.map(result, log => { this.alertLogs = _.map(result, log => {
log.iconCss = alertDef.getCssForState(log.newState); log.iconCss = alertDef.getCssForState(log.newState);
log.humanTime = moment(log.created).format("YYYY-MM-DD HH:mm:ss"); log.humanTime = moment(log.created).format("YYYY-MM-DD HH:mm:ss");
@ -28,7 +28,7 @@ export class AlertLogCtrl {
}); });
}); });
this.backendSrv.get(`/api/alerts/rules/${alertId}`).then(result => { this.backendSrv.get(`/api/alerts/${alertId}`).then(result => {
this.alert = result; this.alert = result;
}); });
} }

@ -49,7 +49,7 @@ export class AlertListCtrl {
state: stats state: stats
}; };
this.backendSrv.get('/api/alerts/rules', params).then(result => { this.backendSrv.get('/api/alerts', params).then(result => {
this.alerts = _.map(result, alert => { this.alerts = _.map(result, alert => {
alert.iconCss = alertDef.getCssForState(alert.state); alert.iconCss = alertDef.getCssForState(alert.state);
return alert; return alert;

@ -24,7 +24,7 @@ export class AlertNotificationEditCtrl {
} }
loadNotification(notificationId) { loadNotification(notificationId) {
this.backendSrv.get(`/api/alerts/notification/${notificationId}`).then(result => { this.backendSrv.get(`/api/alert-notifications/${notificationId}`).then(result => {
console.log(result); console.log(result);
this.notification = result; this.notification = result;
}); });
@ -37,7 +37,7 @@ export class AlertNotificationEditCtrl {
save() { save() {
if (this.notification.id) { if (this.notification.id) {
console.log('this.notification: ', this.notification); console.log('this.notification: ', this.notification);
this.backendSrv.put(`/api/alerts/notification/${this.notification.id}`, this.notification) this.backendSrv.put(`/api/alert-notifications/${this.notification.id}`, this.notification)
.then(result => { .then(result => {
this.notification = result; this.notification = result;
this.$scope.appEvent('alert-success', ['Notification created!', '']); this.$scope.appEvent('alert-success', ['Notification created!', '']);
@ -45,7 +45,7 @@ export class AlertNotificationEditCtrl {
this.$scope.appEvent('alert-error', ['Unable to create notification.', '']); this.$scope.appEvent('alert-error', ['Unable to create notification.', '']);
}); });
} else { } else {
this.backendSrv.post(`/api/alerts/notification`, this.notification) this.backendSrv.post(`/api/alert-notifications`, this.notification)
.then(result => { .then(result => {
this.notification = result; this.notification = result;
this.$scope.appEvent('alert-success', ['Notification updated!', '']); this.$scope.appEvent('alert-success', ['Notification updated!', '']);

@ -15,13 +15,13 @@ export class AlertNotificationsListCtrl {
} }
loadNotifications() { loadNotifications() {
this.backendSrv.get(`/api/alerts/notifications`).then(result => { this.backendSrv.get(`/api/alert-notifications`).then(result => {
this.notifications = result; this.notifications = result;
}); });
} }
deleteNotification(notificationId) { deleteNotification(notificationId) {
this.backendSrv.delete(`/api/alerts/notification/${notificationId}`) this.backendSrv.delete(`/api/alerts-notification/${notificationId}`)
.then(() => { .then(() => {
this.notifications = this.notifications.filter(notification => { this.notifications = this.notifications.filter(notification => {
return notification.id !== notificationId; return notification.id !== notificationId;

@ -0,0 +1,7 @@
{
"type": "datasource",
"name": "Grafana Live",
"id": "grafana-live",
"metrics": true
}

@ -123,14 +123,14 @@
<h5 class="section-heading">Information</h5> <h5 class="section-heading">Information</h5>
<div class="gf-form"> <div class="gf-form">
<span class="gf-form-label width-10">Alert name</span> <span class="gf-form-label width-10">Alert name</span>
<input type="text" class="gf-form-input width-22" ng-model="ctrl.panel.alerting.name"> <input type="text" class="gf-form-input width-22" ng-model="ctrl.alert.name">
</div> </div>
<div class="gf-form-inline"> <div class="gf-form-inline">
<div class="gf-form"> <div class="gf-form">
<span class="gf-form-label width-10" style="margin-top: -73px;">Alert description</span> <span class="gf-form-label width-10" style="margin-top: -73px;">Alert description</span>
</div> </div>
<div class="gf-form"> <div class="gf-form">
<textarea rows="5" ng-model="ctrl.panel.alerting.description" class="gf-form-input width-22"></textarea> <textarea rows="5" ng-model="ctrl.alert.description" class="gf-form-input width-22"></textarea>
</div> </div>
</div> </div>
</div> </div>

Loading…
Cancel
Save