mirror of https://github.com/grafana/grafana
pull/5622/head
parent
70cb8400c3
commit
a191b9b1cf
@ -1,15 +0,0 @@ |
||||
package models |
||||
|
||||
type TimeSeries struct { |
||||
Name string `json:"name"` |
||||
Points [][2]float64 `json:"points"` |
||||
} |
||||
|
||||
type TimeSeriesSlice []*TimeSeries |
||||
|
||||
func NewTimeSeries(name string, points [][2]float64) *TimeSeries { |
||||
return &TimeSeries{ |
||||
Name: name, |
||||
Points: points, |
||||
} |
||||
} |
||||
@ -0,0 +1,18 @@ |
||||
package alertstates |
||||
|
||||
var ( |
||||
ValidStates = []string{ |
||||
Ok, |
||||
Warn, |
||||
Critical, |
||||
Acknowledged, |
||||
Maintenance, |
||||
} |
||||
|
||||
Ok = "OK" |
||||
Warn = "WARN" |
||||
Critical = "CRITICAL" |
||||
Acknowledged = "ACKNOWLEDGED" |
||||
Maintenance = "MAINTENANCE" |
||||
Pending = "PENDING" |
||||
) |
||||
@ -1,80 +1,80 @@ |
||||
package graphite |
||||
|
||||
import ( |
||||
"bytes" |
||||
"encoding/json" |
||||
"fmt" |
||||
"io/ioutil" |
||||
"net/http" |
||||
"net/url" |
||||
"strconv" |
||||
"time" |
||||
|
||||
"github.com/grafana/grafana/pkg/components/simplejson" |
||||
"github.com/grafana/grafana/pkg/log" |
||||
m "github.com/grafana/grafana/pkg/models" |
||||
"github.com/grafana/grafana/pkg/util" |
||||
) |
||||
|
||||
type GraphiteClient struct{} |
||||
|
||||
type GraphiteSerie struct { |
||||
Datapoints [][2]float64 |
||||
Target string |
||||
} |
||||
|
||||
var DefaultClient = &http.Client{ |
||||
Timeout: time.Minute, |
||||
} |
||||
|
||||
type GraphiteResponse []GraphiteSerie |
||||
|
||||
func (client GraphiteClient) GetSeries(rule m.AlertJob, datasource m.DataSource) (m.TimeSeriesSlice, error) { |
||||
v := url.Values{ |
||||
"format": []string{"json"}, |
||||
"target": []string{getTargetFromRule(rule.Rule)}, |
||||
"until": []string{"now"}, |
||||
"from": []string{"-" + strconv.Itoa(rule.Rule.QueryRange) + "s"}, |
||||
} |
||||
|
||||
log.Trace("Graphite: sending request with querystring: ", v.Encode()) |
||||
|
||||
req, err := http.NewRequest("POST", datasource.Url+"/render", nil) |
||||
|
||||
if err != nil { |
||||
return nil, fmt.Errorf("Could not create request") |
||||
} |
||||
|
||||
req.Body = ioutil.NopCloser(bytes.NewReader([]byte(v.Encode()))) |
||||
|
||||
if datasource.BasicAuth { |
||||
req.Header.Add("Authorization", util.GetBasicAuthHeader(datasource.User, datasource.Password)) |
||||
} |
||||
|
||||
res, err := DefaultClient.Do(req) |
||||
|
||||
if err != nil { |
||||
return nil, err |
||||
} |
||||
|
||||
if res.StatusCode != http.StatusOK { |
||||
return nil, fmt.Errorf("expected httpstatus 200, found %d", res.StatusCode) |
||||
} |
||||
|
||||
response := GraphiteResponse{} |
||||
|
||||
json.NewDecoder(res.Body).Decode(&response) |
||||
|
||||
var timeSeries []*m.TimeSeries |
||||
for _, v := range response { |
||||
timeSeries = append(timeSeries, m.NewTimeSeries(v.Target, v.Datapoints)) |
||||
} |
||||
|
||||
return timeSeries, nil |
||||
} |
||||
|
||||
func getTargetFromRule(rule m.AlertRule) string { |
||||
json, _ := simplejson.NewJson([]byte(rule.Query)) |
||||
|
||||
return json.Get("target").MustString() |
||||
} |
||||
package datasources |
||||
|
||||
// import (
|
||||
// "bytes"
|
||||
// "encoding/json"
|
||||
// "fmt"
|
||||
// "io/ioutil"
|
||||
// "net/http"
|
||||
// "net/url"
|
||||
// "strconv"
|
||||
// "time"
|
||||
//
|
||||
// "github.com/grafana/grafana/pkg/components/simplejson"
|
||||
// "github.com/grafana/grafana/pkg/log"
|
||||
// m "github.com/grafana/grafana/pkg/models"
|
||||
// "github.com/grafana/grafana/pkg/util"
|
||||
// )
|
||||
//
|
||||
// type GraphiteClient struct{}
|
||||
//
|
||||
// type GraphiteSerie struct {
|
||||
// Datapoints [][2]float64
|
||||
// Target string
|
||||
// }
|
||||
//
|
||||
// var DefaultClient = &http.Client{
|
||||
// Timeout: time.Minute,
|
||||
// }
|
||||
//
|
||||
// type GraphiteResponse []GraphiteSerie
|
||||
//
|
||||
// func (client GraphiteClient) GetSeries(rule m.AlertJob, datasource m.DataSource) (m.TimeSeriesSlice, error) {
|
||||
// v := url.Values{
|
||||
// "format": []string{"json"},
|
||||
// "target": []string{getTargetFromRule(rule.Rule)},
|
||||
// "until": []string{"now"},
|
||||
// "from": []string{"-" + strconv.Itoa(rule.Rule.QueryRange) + "s"},
|
||||
// }
|
||||
//
|
||||
// log.Trace("Graphite: sending request with querystring: ", v.Encode())
|
||||
//
|
||||
// req, err := http.NewRequest("POST", datasource.Url+"/render", nil)
|
||||
//
|
||||
// if err != nil {
|
||||
// return nil, fmt.Errorf("Could not create request")
|
||||
// }
|
||||
//
|
||||
// req.Body = ioutil.NopCloser(bytes.NewReader([]byte(v.Encode())))
|
||||
//
|
||||
// if datasource.BasicAuth {
|
||||
// req.Header.Add("Authorization", util.GetBasicAuthHeader(datasource.User, datasource.Password))
|
||||
// }
|
||||
//
|
||||
// res, err := DefaultClient.Do(req)
|
||||
//
|
||||
// if err != nil {
|
||||
// return nil, err
|
||||
// }
|
||||
//
|
||||
// if res.StatusCode != http.StatusOK {
|
||||
// return nil, fmt.Errorf("expected httpstatus 200, found %d", res.StatusCode)
|
||||
// }
|
||||
//
|
||||
// response := GraphiteResponse{}
|
||||
//
|
||||
// json.NewDecoder(res.Body).Decode(&response)
|
||||
//
|
||||
// var timeSeries []*m.TimeSeries
|
||||
// for _, v := range response {
|
||||
// timeSeries = append(timeSeries, m.NewTimeSeries(v.Target, v.Datapoints))
|
||||
// }
|
||||
//
|
||||
// return timeSeries, nil
|
||||
// }
|
||||
//
|
||||
// func getTargetFromRule(rule m.AlertRule) string {
|
||||
// json, _ := simplejson.NewJson([]byte(rule.Query))
|
||||
//
|
||||
// return json.Get("target").MustString()
|
||||
// }
|
||||
|
||||
@ -0,0 +1 @@ |
||||
package alerting |
||||
@ -0,0 +1,43 @@ |
||||
package alerting |
||||
|
||||
import "github.com/grafana/grafana/pkg/services/alerting/alertstates" |
||||
|
||||
type AlertJob struct { |
||||
Offset int64 |
||||
Delay bool |
||||
Running bool |
||||
RetryCount int |
||||
Rule AlertRule |
||||
} |
||||
|
||||
type AlertResult struct { |
||||
Id int64 |
||||
State string |
||||
ActualValue float64 |
||||
Duration float64 |
||||
Description string |
||||
AlertJob *AlertJob |
||||
} |
||||
|
||||
func (ar *AlertResult) IsResultIncomplete() bool { |
||||
return ar.State == alertstates.Pending |
||||
} |
||||
|
||||
type AlertRule struct { |
||||
Id int64 |
||||
OrgId int64 |
||||
DatasourceId int64 |
||||
DashboardId int64 |
||||
PanelId int64 |
||||
Query string |
||||
QueryRefId string |
||||
WarnLevel float64 |
||||
CritLevel float64 |
||||
WarnOperator string |
||||
CritOperator string |
||||
Frequency int64 |
||||
Title string |
||||
Description string |
||||
QueryRange int |
||||
Aggregator string |
||||
} |
||||
@ -0,0 +1,129 @@ |
||||
package alerting |
||||
|
||||
import ( |
||||
"fmt" |
||||
"time" |
||||
|
||||
"github.com/Unknwon/log" |
||||
"github.com/grafana/grafana/pkg/services/alerting/alertstates" |
||||
) |
||||
|
||||
type Scheduler struct { |
||||
jobs map[int64]*AlertJob |
||||
runQueue chan *AlertJob |
||||
responseQueue chan *AlertResult |
||||
} |
||||
|
||||
func NewScheduler() *Scheduler { |
||||
return &Scheduler{ |
||||
jobs: make(map[int64]*AlertJob, 0), |
||||
runQueue: make(chan *AlertJob, 1000), |
||||
responseQueue: make(chan *AlertResult, 1000), |
||||
} |
||||
} |
||||
|
||||
func (scheduler *Scheduler) dispatch(reader RuleReader) { |
||||
reschedule := time.NewTicker(time.Second * 10) |
||||
secondTicker := time.NewTicker(time.Second) |
||||
|
||||
scheduler.updateJobs(reader.Fetch) |
||||
|
||||
for { |
||||
select { |
||||
case <-secondTicker.C: |
||||
scheduler.queueJobs() |
||||
case <-reschedule.C: |
||||
scheduler.updateJobs(reader.Fetch) |
||||
} |
||||
} |
||||
} |
||||
|
||||
func (scheduler *Scheduler) updateJobs(alertRuleFn func() []AlertRule) { |
||||
log.Debug("Scheduler: UpdateJobs()") |
||||
|
||||
jobs := make(map[int64]*AlertJob, 0) |
||||
rules := alertRuleFn() |
||||
|
||||
for i, rule := range rules { |
||||
var job *AlertJob |
||||
if scheduler.jobs[rule.Id] != nil { |
||||
job = scheduler.jobs[rule.Id] |
||||
} else { |
||||
job = &AlertJob{ |
||||
Running: false, |
||||
RetryCount: 0, |
||||
} |
||||
} |
||||
|
||||
job.Rule = rule |
||||
job.Offset = int64(i) |
||||
|
||||
jobs[rule.Id] = job |
||||
} |
||||
|
||||
log.Debug("Scheduler: Selected %d jobs", len(jobs)) |
||||
scheduler.jobs = jobs |
||||
} |
||||
|
||||
func (scheduler *Scheduler) queueJobs() { |
||||
now := time.Now().Unix() |
||||
for _, job := range scheduler.jobs { |
||||
if now%job.Rule.Frequency == 0 && job.Running == false { |
||||
log.Info("Scheduler: Putting job on to run queue: %s", job.Rule.Title) |
||||
scheduler.runQueue <- job |
||||
} |
||||
} |
||||
} |
||||
|
||||
func (scheduler *Scheduler) executor(executor Executor) { |
||||
for job := range scheduler.runQueue { |
||||
//log.Info("Executor: queue length %d", len(this.runQueue))
|
||||
log.Info("Executor: executing %s", job.Rule.Title) |
||||
job.Running = true |
||||
scheduler.measureAndExecute(executor, job) |
||||
} |
||||
} |
||||
|
||||
func (scheduler *Scheduler) handleResponses() { |
||||
for response := range scheduler.responseQueue { |
||||
log.Info("Response: alert(%d) status(%s) actual(%v) retry(%d)", response.Id, response.State, response.ActualValue, response.AlertJob.RetryCount) |
||||
response.AlertJob.Running = false |
||||
|
||||
if response.IsResultIncomplete() { |
||||
response.AlertJob.RetryCount++ |
||||
if response.AlertJob.RetryCount < maxRetries { |
||||
scheduler.runQueue <- response.AlertJob |
||||
} else { |
||||
saveState(&AlertResult{ |
||||
Id: response.Id, |
||||
State: alertstates.Critical, |
||||
Description: fmt.Sprintf("Failed to run check after %d retires", maxRetries), |
||||
}) |
||||
} |
||||
} else { |
||||
response.AlertJob.RetryCount = 0 |
||||
saveState(response) |
||||
} |
||||
} |
||||
} |
||||
|
||||
func (scheduler *Scheduler) measureAndExecute(exec Executor, job *AlertJob) { |
||||
now := time.Now() |
||||
|
||||
responseChan := make(chan *AlertResult, 1) |
||||
go exec.Execute(job, responseChan) |
||||
|
||||
select { |
||||
case <-time.After(time.Second * 5): |
||||
scheduler.responseQueue <- &AlertResult{ |
||||
Id: job.Rule.Id, |
||||
State: alertstates.Pending, |
||||
Duration: float64(time.Since(now).Nanoseconds()) / float64(1000000), |
||||
AlertJob: job, |
||||
} |
||||
case result := <-responseChan: |
||||
result.Duration = float64(time.Since(now).Nanoseconds()) / float64(1000000) |
||||
log.Info("Schedular: exeuction took %vms", result.Duration) |
||||
scheduler.responseQueue <- result |
||||
} |
||||
} |
||||
@ -0,0 +1,90 @@ |
||||
package tsdb |
||||
|
||||
import "errors" |
||||
|
||||
type Batch struct { |
||||
DataSourceId int64 |
||||
Queries QuerySlice |
||||
Depends map[string]bool |
||||
Done bool |
||||
Started bool |
||||
} |
||||
|
||||
type BatchSlice []*Batch |
||||
|
||||
func newBatch(dsId int64, queries QuerySlice) *Batch { |
||||
return &Batch{ |
||||
DataSourceId: dsId, |
||||
Queries: queries, |
||||
Depends: make(map[string]bool), |
||||
} |
||||
} |
||||
|
||||
func (bg *Batch) process(context *QueryContext) { |
||||
executor := getExecutorFor(bg.Queries[0].DataSource) |
||||
|
||||
if executor == nil { |
||||
bg.Done = true |
||||
result := &BatchResult{ |
||||
Error: errors.New("Could not find executor for data source type " + bg.Queries[0].DataSource.Type), |
||||
QueryResults: make(map[string]*QueryResult), |
||||
} |
||||
for _, query := range bg.Queries { |
||||
result.QueryResults[query.RefId] = &QueryResult{Error: result.Error} |
||||
} |
||||
context.ResultsChan <- result |
||||
return |
||||
} |
||||
|
||||
res := executor.Execute(bg.Queries, context) |
||||
bg.Done = true |
||||
context.ResultsChan <- res |
||||
} |
||||
|
||||
func (bg *Batch) addQuery(query *Query) { |
||||
bg.Queries = append(bg.Queries, query) |
||||
} |
||||
|
||||
func (bg *Batch) allDependenciesAreIn(context *QueryContext) bool { |
||||
for key := range bg.Depends { |
||||
if _, exists := context.Results[key]; !exists { |
||||
return false |
||||
} |
||||
} |
||||
|
||||
return true |
||||
} |
||||
|
||||
func getBatches(req *Request) (BatchSlice, error) { |
||||
batches := make(BatchSlice, 0) |
||||
|
||||
for _, query := range req.Queries { |
||||
if foundBatch := findMatchingBatchGroup(query, batches); foundBatch != nil { |
||||
foundBatch.addQuery(query) |
||||
} else { |
||||
newBatch := newBatch(query.DataSource.Id, QuerySlice{query}) |
||||
batches = append(batches, newBatch) |
||||
|
||||
for _, refId := range query.Depends { |
||||
for _, batch := range batches { |
||||
for _, batchQuery := range batch.Queries { |
||||
if batchQuery.RefId == refId { |
||||
newBatch.Depends[refId] = true |
||||
} |
||||
} |
||||
} |
||||
} |
||||
} |
||||
} |
||||
|
||||
return batches, nil |
||||
} |
||||
|
||||
func findMatchingBatchGroup(query *Query, batches BatchSlice) *Batch { |
||||
for _, batch := range batches { |
||||
if batch.DataSourceId == query.DataSource.Id { |
||||
return batch |
||||
} |
||||
} |
||||
return nil |
||||
} |
||||
@ -0,0 +1,24 @@ |
||||
package tsdb |
||||
|
||||
type Executor interface { |
||||
Execute(queries QuerySlice, context *QueryContext) *BatchResult |
||||
} |
||||
|
||||
var registry map[string]GetExecutorFn |
||||
|
||||
type GetExecutorFn func(dsInfo *DataSourceInfo) Executor |
||||
|
||||
func init() { |
||||
registry = make(map[string]GetExecutorFn) |
||||
} |
||||
|
||||
func getExecutorFor(dsInfo *DataSourceInfo) Executor { |
||||
if fn, exists := registry[dsInfo.Type]; exists { |
||||
return fn(dsInfo) |
||||
} |
||||
return nil |
||||
} |
||||
|
||||
func RegisterExecutor(dsType string, fn GetExecutorFn) { |
||||
registry[dsType] = fn |
||||
} |
||||
@ -0,0 +1,62 @@ |
||||
package tsdb |
||||
|
||||
import "time" |
||||
|
||||
type TimeRange struct { |
||||
From time.Time |
||||
To time.Time |
||||
} |
||||
|
||||
type Request struct { |
||||
TimeRange TimeRange |
||||
MaxDataPoints int |
||||
Queries QuerySlice |
||||
} |
||||
|
||||
type Response struct { |
||||
BatchTimings []*BatchTiming |
||||
Results map[string]*QueryResult |
||||
} |
||||
|
||||
type DataSourceInfo struct { |
||||
Id int64 |
||||
Name string |
||||
Type string |
||||
Url string |
||||
Password string |
||||
User string |
||||
Database string |
||||
BasicAuth bool |
||||
BasicAuthUser string |
||||
BasicAuthPassword string |
||||
} |
||||
|
||||
type BatchTiming struct { |
||||
TimeElapsed int64 |
||||
} |
||||
|
||||
type BatchResult struct { |
||||
Error error |
||||
QueryResults map[string]*QueryResult |
||||
Timings *BatchTiming |
||||
} |
||||
|
||||
type QueryResult struct { |
||||
Error error |
||||
RefId string |
||||
Series TimeSeriesSlice |
||||
} |
||||
|
||||
type TimeSeries struct { |
||||
Name string |
||||
Points [][2]float64 |
||||
} |
||||
|
||||
type TimeSeriesSlice []*TimeSeries |
||||
|
||||
func NewTimeSeries(name string, points [][2]float64) *TimeSeries { |
||||
return &TimeSeries{ |
||||
Name: name, |
||||
Points: points, |
||||
} |
||||
} |
||||
@ -0,0 +1,12 @@ |
||||
package tsdb |
||||
|
||||
type Query struct { |
||||
RefId string |
||||
Query string |
||||
Depends []string |
||||
DataSource *DataSourceInfo |
||||
Results []*TimeSeries |
||||
Exclude bool |
||||
} |
||||
|
||||
type QuerySlice []*Query |
||||
@ -0,0 +1,21 @@ |
||||
package tsdb |
||||
|
||||
import "sync" |
||||
|
||||
type QueryContext struct { |
||||
TimeRange TimeRange |
||||
Queries QuerySlice |
||||
Results map[string]*QueryResult |
||||
ResultsChan chan *BatchResult |
||||
Lock sync.RWMutex |
||||
BatchWaits sync.WaitGroup |
||||
} |
||||
|
||||
func NewQueryContext(queries QuerySlice, timeRange TimeRange) *QueryContext { |
||||
return &QueryContext{ |
||||
TimeRange: timeRange, |
||||
Queries: queries, |
||||
ResultsChan: make(chan *BatchResult), |
||||
Results: make(map[string]*QueryResult), |
||||
} |
||||
} |
||||
@ -0,0 +1,51 @@ |
||||
package tsdb |
||||
|
||||
func HandleRequest(req *Request) (*Response, error) { |
||||
context := NewQueryContext(req.Queries, req.TimeRange) |
||||
|
||||
batches, err := getBatches(req) |
||||
if err != nil { |
||||
return nil, err |
||||
} |
||||
|
||||
currentlyExecuting := 0 |
||||
|
||||
for _, batch := range batches { |
||||
if len(batch.Depends) == 0 { |
||||
currentlyExecuting += 1 |
||||
batch.Started = true |
||||
go batch.process(context) |
||||
} |
||||
} |
||||
|
||||
response := &Response{} |
||||
|
||||
for currentlyExecuting != 0 { |
||||
select { |
||||
case batchResult := <-context.ResultsChan: |
||||
currentlyExecuting -= 1 |
||||
|
||||
response.BatchTimings = append(response.BatchTimings, batchResult.Timings) |
||||
|
||||
for refId, result := range batchResult.QueryResults { |
||||
context.Results[refId] = result |
||||
} |
||||
|
||||
for _, batch := range batches { |
||||
// not interested in started batches
|
||||
if batch.Started { |
||||
continue |
||||
} |
||||
|
||||
if batch.allDependenciesAreIn(context) { |
||||
currentlyExecuting += 1 |
||||
batch.Started = true |
||||
go batch.process(context) |
||||
} |
||||
} |
||||
} |
||||
} |
||||
|
||||
response.Results = context.Results |
||||
return response, nil |
||||
} |
||||
Loading…
Reference in new issue