mirror of https://github.com/grafana/grafana
parent
464f3f738f
commit
f6b8d3a1c2
@ -0,0 +1 @@ |
||||
grafana/provisioning/dashboards/alerts/alert-* |
@ -0,0 +1,137 @@ |
||||
# Grafana High Availability (HA) test setup |
||||
|
||||
A set of docker compose services which together creates a Grafana HA test setup with capability of easily |
||||
scaling up/down number of Grafana instances. |
||||
|
||||
Included services |
||||
|
||||
* Grafana |
||||
* Mysql - Grafana configuration database and session storage |
||||
* Prometheus - Monitoring of Grafana and used as datasource of provisioned alert rules |
||||
* Nginx - Reverse proxy for Grafana and Prometheus. Enables browsing Grafana/Prometheus UI using a hostname |
||||
|
||||
## Prerequisites |
||||
|
||||
### Build grafana docker container |
||||
|
||||
Build a Grafana docker container from current branch and commit and tag it as grafana/grafana:dev. |
||||
|
||||
```bash |
||||
$ cd <grafana repo> |
||||
$ make build-docker-full |
||||
``` |
||||
|
||||
### Virtual host names |
||||
|
||||
#### Alternative 1 - Use dnsmasq |
||||
|
||||
```bash |
||||
$ sudo apt-get install dnsmasq |
||||
$ echo 'address=/loc/127.0.0.1' | sudo tee /etc/dnsmasq.d/dnsmasq-loc.conf > /dev/null |
||||
$ sudo /etc/init.d/dnsmasq restart |
||||
$ ping whatever.loc |
||||
PING whatever.loc (127.0.0.1) 56(84) bytes of data. |
||||
64 bytes from localhost (127.0.0.1): icmp_seq=1 ttl=64 time=0.076 ms |
||||
--- whatever.loc ping statistics --- |
||||
1 packet transmitted, 1 received, 0% packet loss, time 1998ms |
||||
``` |
||||
|
||||
#### Alternative 2 - Manually update /etc/hosts |
||||
|
||||
Update your `/etc/hosts` to be able to access Grafana and/or Prometheus UI using a hostname. |
||||
|
||||
```bash |
||||
$ cat /etc/hosts |
||||
127.0.0.1 grafana.loc |
||||
127.0.0.1 prometheus.loc |
||||
``` |
||||
|
||||
## Start services |
||||
|
||||
```bash |
||||
$ docker-compose up -d |
||||
``` |
||||
|
||||
Browse |
||||
* http://grafana.loc/ |
||||
* http://prometheus.loc/ |
||||
|
||||
Check for any errors |
||||
|
||||
```bash |
||||
$ docker-compose logs | grep error |
||||
``` |
||||
|
||||
### Scale Grafana instances up/down |
||||
|
||||
Scale number of Grafana instances to `<instances>` |
||||
|
||||
```bash |
||||
$ docker-compose up --scale grafana=<instances> -d |
||||
# for example 3 instances |
||||
$ docker-compose up --scale grafana=3 -d |
||||
``` |
||||
|
||||
## Test alerting |
||||
|
||||
### Create notification channels |
||||
|
||||
Creates default notification channels, if not already exists |
||||
|
||||
```bash |
||||
$ ./alerts.sh setup |
||||
``` |
||||
|
||||
### Slack notifications |
||||
|
||||
Disable |
||||
|
||||
```bash |
||||
$ ./alerts.sh slack -d |
||||
``` |
||||
|
||||
Enable and configure url |
||||
|
||||
```bash |
||||
$ ./alerts.sh slack -u https://hooks.slack.com/services/... |
||||
``` |
||||
|
||||
Enable, configure url and enable reminders |
||||
|
||||
```bash |
||||
$ ./alerts.sh slack -u https://hooks.slack.com/services/... -r -e 10m |
||||
``` |
||||
|
||||
### Provision alert dashboards with alert rules |
||||
|
||||
Provision 1 dashboard/alert rule (default) |
||||
|
||||
```bash |
||||
$ ./alerts.sh provision |
||||
``` |
||||
|
||||
Provision 10 dashboards/alert rules |
||||
|
||||
```bash |
||||
$ ./alerts.sh provision -a 10 |
||||
``` |
||||
|
||||
Provision 10 dashboards/alert rules and change condition to `gt > 100` |
||||
|
||||
```bash |
||||
$ ./alerts.sh provision -a 10 -c 100 |
||||
``` |
||||
|
||||
### Pause/unpause all alert rules |
||||
|
||||
Pause |
||||
|
||||
```bash |
||||
$ ./alerts.sh pause |
||||
``` |
||||
|
||||
Unpause |
||||
|
||||
```bash |
||||
$ ./alerts.sh unpause |
||||
``` |
@ -0,0 +1,156 @@ |
||||
#!/bin/bash |
||||
|
||||
requiresJsonnet() { |
||||
if ! type "jsonnet" > /dev/null; then |
||||
echo "you need you install jsonnet to run this script" |
||||
echo "follow the instructions on https://github.com/google/jsonnet" |
||||
exit 1 |
||||
fi |
||||
} |
||||
|
||||
setup() { |
||||
STATUS=$(curl -s -o /dev/null -w '%{http_code}' http://admin:admin@grafana.loc/api/alert-notifications/1) |
||||
if [ $STATUS -eq 200 ]; then |
||||
echo "Email already exists, skipping..." |
||||
else |
||||
curl -H "Content-Type: application/json" \ |
||||
-d '{ |
||||
"name": "Email", |
||||
"type": "email", |
||||
"isDefault": false, |
||||
"sendReminder": false, |
||||
"uploadImage": true, |
||||
"settings": { |
||||
"addresses": "user@test.com" |
||||
} |
||||
}' \ |
||||
http://admin:admin@grafana.loc/api/alert-notifications |
||||
fi |
||||
|
||||
STATUS=$(curl -s -o /dev/null -w '%{http_code}' http://admin:admin@grafana.loc/api/alert-notifications/2) |
||||
if [ $STATUS -eq 200 ]; then |
||||
echo "Slack already exists, skipping..." |
||||
else |
||||
curl -H "Content-Type: application/json" \ |
||||
-d '{ |
||||
"name": "Slack", |
||||
"type": "slack", |
||||
"isDefault": false, |
||||
"sendReminder": false, |
||||
"uploadImage": true |
||||
}' \ |
||||
http://admin:admin@grafana.loc/api/alert-notifications |
||||
fi |
||||
} |
||||
|
||||
slack() { |
||||
enabled=true |
||||
url='' |
||||
remind=false |
||||
remindEvery='10m' |
||||
|
||||
while getopts ":e:u:dr" o; do |
||||
case "${o}" in |
||||
e) |
||||
remindEvery=${OPTARG} |
||||
;; |
||||
u) |
||||
url=${OPTARG} |
||||
;; |
||||
d) |
||||
enabled=false |
||||
;; |
||||
r) |
||||
remind=true |
||||
;; |
||||
esac |
||||
done |
||||
shift $((OPTIND-1)) |
||||
|
||||
curl -X PUT \ |
||||
-H "Content-Type: application/json" \ |
||||
-d '{ |
||||
"id": 2, |
||||
"name": "Slack", |
||||
"type": "slack", |
||||
"isDefault": '$enabled', |
||||
"sendReminder": '$remind', |
||||
"frequency": "'$remindEvery'", |
||||
"uploadImage": true, |
||||
"settings": { |
||||
"url": "'$url'" |
||||
} |
||||
}' \ |
||||
http://admin:admin@grafana.loc/api/alert-notifications/2 |
||||
} |
||||
|
||||
provision() { |
||||
alerts=1 |
||||
condition=65 |
||||
while getopts ":a:c:" o; do |
||||
case "${o}" in |
||||
a) |
||||
alerts=${OPTARG} |
||||
;; |
||||
c) |
||||
condition=${OPTARG} |
||||
;; |
||||
esac |
||||
done |
||||
shift $((OPTIND-1)) |
||||
|
||||
requiresJsonnet |
||||
|
||||
rm -rf grafana/provisioning/dashboards/alerts/alert-*.json |
||||
jsonnet -m grafana/provisioning/dashboards/alerts grafana/provisioning/alerts.jsonnet --ext-code alerts=$alerts --ext-code condition=$condition |
||||
} |
||||
|
||||
pause() { |
||||
curl -H "Content-Type: application/json" \ |
||||
-d '{"paused":true}' \ |
||||
http://admin:admin@grafana.loc/api/admin/pause-all-alerts |
||||
} |
||||
|
||||
unpause() { |
||||
curl -H "Content-Type: application/json" \ |
||||
-d '{"paused":false}' \ |
||||
http://admin:admin@grafana.loc/api/admin/pause-all-alerts |
||||
} |
||||
|
||||
usage() { |
||||
echo -e "Usage: ./alerts.sh COMMAND [OPTIONS]\n" |
||||
echo -e "Commands" |
||||
echo -e " setup\t\t creates default alert notification channels" |
||||
echo -e " slack\t\t configure slack notification channel" |
||||
echo -e " [-d]\t\t\t disable notifier, default enabled" |
||||
echo -e " [-u]\t\t\t url" |
||||
echo -e " [-r]\t\t\t send reminders" |
||||
echo -e " [-e <remind every>]\t\t default 10m\n" |
||||
echo -e " provision\t provision alerts" |
||||
echo -e " [-a <alert rule count>]\t default 1" |
||||
echo -e " [-c <condition value>]\t default 65\n" |
||||
echo -e " pause\t\t pause all alerts" |
||||
echo -e " unpause\t unpause all alerts" |
||||
} |
||||
|
||||
main() { |
||||
local cmd=$1 |
||||
|
||||
if [[ $cmd == "setup" ]]; then |
||||
setup |
||||
elif [[ $cmd == "slack" ]]; then |
||||
slack "${@:2}" |
||||
elif [[ $cmd == "provision" ]]; then |
||||
provision "${@:2}" |
||||
elif [[ $cmd == "pause" ]]; then |
||||
pause |
||||
elif [[ $cmd == "unpause" ]]; then |
||||
unpause |
||||
fi |
||||
|
||||
if [[ -z "$cmd" ]]; then |
||||
usage |
||||
fi |
||||
} |
||||
|
||||
main "$@" |
@ -0,0 +1,57 @@ |
||||
version: "2.1" |
||||
|
||||
services: |
||||
nginx-proxy: |
||||
image: jwilder/nginx-proxy |
||||
ports: |
||||
- "80:80" |
||||
volumes: |
||||
- /var/run/docker.sock:/tmp/docker.sock:ro |
||||
|
||||
mysql: |
||||
image: mysql |
||||
environment: |
||||
MYSQL_ROOT_PASSWORD: rootpass |
||||
MYSQL_DATABASE: grafana |
||||
MYSQL_USER: grafana |
||||
MYSQL_PASSWORD: password |
||||
healthcheck: |
||||
test: ["CMD", "mysqladmin" ,"ping", "-h", "localhost"] |
||||
timeout: 10s |
||||
retries: 10 |
||||
|
||||
grafana: |
||||
image: grafana/grafana:dev |
||||
volumes: |
||||
- ./grafana/provisioning/:/etc/grafana/provisioning/ |
||||
environment: |
||||
- VIRTUAL_HOST=grafana.loc |
||||
- GF_SERVER_ROOT_URL=http://grafana.loc |
||||
- GF_DATABASE_TYPE=mysql |
||||
- GF_DATABASE_HOST=mysql:3306 |
||||
- GF_DATABASE_NAME=grafana |
||||
- GF_DATABASE_USER=grafana |
||||
- GF_DATABASE_PASSWORD=password |
||||
- GF_SESSION_PROVIDER=mysql |
||||
- GF_SESSION_PROVIDER_CONFIG=grafana:password@tcp(mysql:3306)/grafana?allowNativePasswords=true |
||||
ports: |
||||
- 3000 |
||||
depends_on: |
||||
mysql: |
||||
condition: service_healthy |
||||
|
||||
prometheus: |
||||
image: prom/prometheus:v2.4.2 |
||||
volumes: |
||||
- ./prometheus/:/etc/prometheus/ |
||||
environment: |
||||
- VIRTUAL_HOST=prometheus.loc |
||||
ports: |
||||
- 9090 |
||||
|
||||
# mysqld-exporter: |
||||
# image: prom/mysqld-exporter |
||||
# environment: |
||||
# - DATA_SOURCE_NAME=grafana:password@(mysql:3306)/ |
||||
# ports: |
||||
# - 9104 |
@ -0,0 +1,202 @@ |
||||
local numAlerts = std.extVar('alerts'); |
||||
local condition = std.extVar('condition'); |
||||
local arr = std.range(1, numAlerts); |
||||
|
||||
local alertDashboardTemplate = { |
||||
"editable": true, |
||||
"gnetId": null, |
||||
"graphTooltip": 0, |
||||
"id": null, |
||||
"links": [], |
||||
"panels": [ |
||||
{ |
||||
"alert": { |
||||
"conditions": [ |
||||
{ |
||||
"evaluator": { |
||||
"params": [ |
||||
65 |
||||
], |
||||
"type": "gt" |
||||
}, |
||||
"operator": { |
||||
"type": "and" |
||||
}, |
||||
"query": { |
||||
"params": [ |
||||
"A", |
||||
"5m", |
||||
"now" |
||||
] |
||||
}, |
||||
"reducer": { |
||||
"params": [], |
||||
"type": "avg" |
||||
}, |
||||
"type": "query" |
||||
} |
||||
], |
||||
"executionErrorState": "alerting", |
||||
"frequency": "10s", |
||||
"handler": 1, |
||||
"name": "bulk alerting", |
||||
"noDataState": "no_data", |
||||
"notifications": [ |
||||
{ |
||||
"id": 2 |
||||
} |
||||
] |
||||
}, |
||||
"aliasColors": {}, |
||||
"bars": false, |
||||
"dashLength": 10, |
||||
"dashes": false, |
||||
"datasource": "Prometheus", |
||||
"fill": 1, |
||||
"gridPos": { |
||||
"h": 9, |
||||
"w": 12, |
||||
"x": 0, |
||||
"y": 0 |
||||
}, |
||||
"id": 2, |
||||
"legend": { |
||||
"avg": false, |
||||
"current": false, |
||||
"max": false, |
||||
"min": false, |
||||
"show": true, |
||||
"total": false, |
||||
"values": false |
||||
}, |
||||
"lines": true, |
||||
"linewidth": 1, |
||||
"nullPointMode": "null", |
||||
"percentage": false, |
||||
"pointradius": 5, |
||||
"points": false, |
||||
"renderer": "flot", |
||||
"seriesOverrides": [], |
||||
"spaceLength": 10, |
||||
"stack": false, |
||||
"steppedLine": false, |
||||
"targets": [ |
||||
{ |
||||
"$$hashKey": "object:117", |
||||
"expr": "go_goroutines", |
||||
"format": "time_series", |
||||
"intervalFactor": 1, |
||||
"refId": "A" |
||||
} |
||||
], |
||||
"thresholds": [ |
||||
{ |
||||
"colorMode": "critical", |
||||
"fill": true, |
||||
"line": true, |
||||
"op": "gt", |
||||
"value": 50 |
||||
} |
||||
], |
||||
"timeFrom": null, |
||||
"timeShift": null, |
||||
"title": "Panel Title", |
||||
"tooltip": { |
||||
"shared": true, |
||||
"sort": 0, |
||||
"value_type": "individual" |
||||
}, |
||||
"type": "graph", |
||||
"xaxis": { |
||||
"buckets": null, |
||||
"mode": "time", |
||||
"name": null, |
||||
"show": true, |
||||
"values": [] |
||||
}, |
||||
"yaxes": [ |
||||
{ |
||||
"format": "short", |
||||
"label": null, |
||||
"logBase": 1, |
||||
"max": null, |
||||
"min": null, |
||||
"show": true |
||||
}, |
||||
{ |
||||
"format": "short", |
||||
"label": null, |
||||
"logBase": 1, |
||||
"max": null, |
||||
"min": null, |
||||
"show": true |
||||
} |
||||
] |
||||
} |
||||
], |
||||
"schemaVersion": 16, |
||||
"style": "dark", |
||||
"tags": [], |
||||
"templating": { |
||||
"list": [] |
||||
}, |
||||
"time": { |
||||
"from": "now-6h", |
||||
"to": "now" |
||||
}, |
||||
"timepicker": { |
||||
"refresh_intervals": [ |
||||
"5s", |
||||
"10s", |
||||
"30s", |
||||
"1m", |
||||
"5m", |
||||
"15m", |
||||
"30m", |
||||
"1h", |
||||
"2h", |
||||
"1d" |
||||
], |
||||
"time_options": [ |
||||
"5m", |
||||
"15m", |
||||
"1h", |
||||
"6h", |
||||
"12h", |
||||
"24h", |
||||
"2d", |
||||
"7d", |
||||
"30d" |
||||
] |
||||
}, |
||||
"timezone": "", |
||||
"title": "New dashboard", |
||||
"uid": null, |
||||
"version": 0 |
||||
}; |
||||
|
||||
|
||||
{ |
||||
['alert-' + std.toString(x) + '.json']: |
||||
alertDashboardTemplate + { |
||||
panels: [ |
||||
alertDashboardTemplate.panels[0] + |
||||
{ |
||||
alert+: { |
||||
name: 'Alert rule ' + x, |
||||
conditions: [ |
||||
alertDashboardTemplate.panels[0].alert.conditions[0] + |
||||
{ |
||||
evaluator+: { |
||||
params: [condition] |
||||
} |
||||
}, |
||||
], |
||||
}, |
||||
}, |
||||
], |
||||
uid: 'alert-' + x, |
||||
title: 'Alert ' + x |
||||
}, |
||||
for x in arr |
||||
} |
@ -0,0 +1,8 @@ |
||||
apiVersion: 1 |
||||
|
||||
providers: |
||||
- name: 'Alerts' |
||||
folder: 'Alerts' |
||||
type: file |
||||
options: |
||||
path: /etc/grafana/provisioning/dashboards/alerts |
@ -0,0 +1,172 @@ |
||||
{ |
||||
"annotations": { |
||||
"list": [ |
||||
{ |
||||
"builtIn": 1, |
||||
"datasource": "-- Grafana --", |
||||
"enable": true, |
||||
"hide": true, |
||||
"iconColor": "rgba(0, 211, 255, 1)", |
||||
"name": "Annotations & Alerts", |
||||
"type": "dashboard" |
||||
} |
||||
] |
||||
}, |
||||
"editable": true, |
||||
"gnetId": null, |
||||
"graphTooltip": 0, |
||||
"links": [], |
||||
"panels": [ |
||||
{ |
||||
"aliasColors": { |
||||
"Active alerts": "#bf1b00" |
||||
}, |
||||
"bars": false, |
||||
"dashLength": 10, |
||||
"dashes": false, |
||||
"datasource": "Prometheus", |
||||
"fill": 1, |
||||
"gridPos": { |
||||
"h": 12, |
||||
"w": 24, |
||||
"x": 0, |
||||
"y": 0 |
||||
}, |
||||
"id": 2, |
||||
"interval": "", |
||||
"legend": { |
||||
"alignAsTable": true, |
||||
"avg": false, |
||||
"current": true, |
||||
"max": false, |
||||
"min": false, |
||||
"rightSide": true, |
||||
"show": true, |
||||
"total": false, |
||||
"values": true |
||||
}, |
||||
"lines": true, |
||||
"linewidth": 2, |
||||
"links": [], |
||||
"nullPointMode": "null", |
||||
"percentage": false, |
||||
"pointradius": 5, |
||||
"points": false, |
||||
"renderer": "flot", |
||||
"seriesOverrides": [ |
||||
{ |
||||
"alias": "Active grafana instances", |
||||
"dashes": true, |
||||
"fill": 0 |
||||
} |
||||
], |
||||
"spaceLength": 10, |
||||
"stack": false, |
||||
"steppedLine": false, |
||||
"targets": [ |
||||
{ |
||||
"expr": "sum(increase(grafana_alerting_notification_sent_total[1m])) by(job)", |
||||
"format": "time_series", |
||||
"instant": false, |
||||
"interval": "1m", |
||||
"intervalFactor": 1, |
||||
"legendFormat": "Notifications sent", |
||||
"refId": "A" |
||||
}, |
||||
{ |
||||
"expr": "min(grafana_alerting_active_alerts) without(instance)", |
||||
"format": "time_series", |
||||
"interval": "1m", |
||||
"intervalFactor": 1, |
||||
"legendFormat": "Active alerts", |
||||
"refId": "B" |
||||
}, |
||||
{ |
||||
"expr": "count(up{job=\"grafana\"})", |
||||
"format": "time_series", |
||||
"intervalFactor": 1, |
||||
"legendFormat": "Active grafana instances", |
||||
"refId": "C" |
||||
} |
||||
], |
||||
"thresholds": [], |
||||
"timeFrom": null, |
||||
"timeShift": null, |
||||
"title": "Notifications sent vs active alerts", |
||||
"tooltip": { |
||||
"shared": true, |
||||
"sort": 0, |
||||
"value_type": "individual" |
||||
}, |
||||
"type": "graph", |
||||
"xaxis": { |
||||
"buckets": null, |
||||
"mode": "time", |
||||
"name": null, |
||||
"show": true, |
||||
"values": [] |
||||
}, |
||||
"yaxes": [ |
||||
{ |
||||
"format": "short", |
||||
"label": null, |
||||
"logBase": 1, |
||||
"max": null, |
||||
"min": "0", |
||||
"show": true |
||||
}, |
||||
{ |
||||
"format": "short", |
||||
"label": null, |
||||
"logBase": 1, |
||||
"max": null, |
||||
"min": null, |
||||
"show": true |
||||
} |
||||
], |
||||
"yaxis": { |
||||
"align": false, |
||||
"alignLevel": 3 |
||||
} |
||||
} |
||||
], |
||||
"schemaVersion": 16, |
||||
"style": "dark", |
||||
"tags": [], |
||||
"templating": { |
||||
"list": [] |
||||
}, |
||||
"time": { |
||||
"from": "now-1h", |
||||
"to": "now" |
||||
}, |
||||
"timepicker": { |
||||
"refresh_intervals": [ |
||||
"5s", |
||||
"10s", |
||||
"30s", |
||||
"1m", |
||||
"5m", |
||||
"15m", |
||||
"30m", |
||||
"1h", |
||||
"2h", |
||||
"1d" |
||||
], |
||||
"time_options": [ |
||||
"5m", |
||||
"15m", |
||||
"1h", |
||||
"6h", |
||||
"12h", |
||||
"24h", |
||||
"2d", |
||||
"7d", |
||||
"30d" |
||||
] |
||||
}, |
||||
"timezone": "", |
||||
"title": "Overview", |
||||
"uid": "xHy7-hAik", |
||||
"version": 6 |
||||
} |
@ -0,0 +1,11 @@ |
||||
apiVersion: 1 |
||||
|
||||
datasources: |
||||
- name: Prometheus |
||||
type: prometheus |
||||
access: proxy |
||||
url: http://prometheus:9090 |
||||
jsonData: |
||||
timeInterval: 10s |
||||
queryTimeout: 30s |
||||
httpMethod: POST |
@ -0,0 +1,39 @@ |
||||
# my global config |
||||
global: |
||||
scrape_interval: 10s # By default, scrape targets every 15 seconds. |
||||
evaluation_interval: 10s # By default, scrape targets every 15 seconds. |
||||
# scrape_timeout is set to the global default (10s). |
||||
|
||||
# Load and evaluate rules in this file every 'evaluation_interval' seconds. |
||||
#rule_files: |
||||
# - "alert.rules" |
||||
# - "first.rules" |
||||
# - "second.rules" |
||||
|
||||
# alerting: |
||||
# alertmanagers: |
||||
# - scheme: http |
||||
# static_configs: |
||||
# - targets: |
||||
# - "127.0.0.1:9093" |
||||
|
||||
scrape_configs: |
||||
- job_name: 'prometheus' |
||||
static_configs: |
||||
- targets: ['localhost:9090'] |
||||
|
||||
- job_name: 'grafana' |
||||
dns_sd_configs: |
||||
- names: |
||||
- 'grafana' |
||||
type: 'A' |
||||
port: 3000 |
||||
refresh_interval: 10s |
||||
|
||||
# - job_name: 'mysql' |
||||
# dns_sd_configs: |
||||
# - names: |
||||
# - 'mysqld-exporter' |
||||
# type: 'A' |
||||
# port: 9104 |
||||
# refresh_interval: 10s |
Loading…
Reference in new issue