diff --git a/devenv/docker/ha_test/.gitignore b/devenv/docker/ha_test/.gitignore new file mode 100644 index 00000000000..0f4e139e204 --- /dev/null +++ b/devenv/docker/ha_test/.gitignore @@ -0,0 +1 @@ +grafana/provisioning/dashboards/alerts/alert-* \ No newline at end of file diff --git a/devenv/docker/ha_test/README.md b/devenv/docker/ha_test/README.md new file mode 100644 index 00000000000..bc93727ceae --- /dev/null +++ b/devenv/docker/ha_test/README.md @@ -0,0 +1,137 @@ +# Grafana High Availability (HA) test setup + +A set of docker compose services which together creates a Grafana HA test setup with capability of easily +scaling up/down number of Grafana instances. + +Included services + +* Grafana +* Mysql - Grafana configuration database and session storage +* Prometheus - Monitoring of Grafana and used as datasource of provisioned alert rules +* Nginx - Reverse proxy for Grafana and Prometheus. Enables browsing Grafana/Prometheus UI using a hostname + +## Prerequisites + +### Build grafana docker container + +Build a Grafana docker container from current branch and commit and tag it as grafana/grafana:dev. + +```bash +$ cd +$ make build-docker-full +``` + +### Virtual host names + +#### Alternative 1 - Use dnsmasq + +```bash +$ sudo apt-get install dnsmasq +$ echo 'address=/loc/127.0.0.1' | sudo tee /etc/dnsmasq.d/dnsmasq-loc.conf > /dev/null +$ sudo /etc/init.d/dnsmasq restart +$ ping whatever.loc +PING whatever.loc (127.0.0.1) 56(84) bytes of data. +64 bytes from localhost (127.0.0.1): icmp_seq=1 ttl=64 time=0.076 ms +--- whatever.loc ping statistics --- +1 packet transmitted, 1 received, 0% packet loss, time 1998ms +``` + +#### Alternative 2 - Manually update /etc/hosts + +Update your `/etc/hosts` to be able to access Grafana and/or Prometheus UI using a hostname. + +```bash +$ cat /etc/hosts +127.0.0.1 grafana.loc +127.0.0.1 prometheus.loc +``` + +## Start services + +```bash +$ docker-compose up -d +``` + +Browse +* http://grafana.loc/ +* http://prometheus.loc/ + +Check for any errors + +```bash +$ docker-compose logs | grep error +``` + +### Scale Grafana instances up/down + +Scale number of Grafana instances to `` + +```bash +$ docker-compose up --scale grafana= -d +# for example 3 instances +$ docker-compose up --scale grafana=3 -d +``` + +## Test alerting + +### Create notification channels + +Creates default notification channels, if not already exists + +```bash +$ ./alerts.sh setup +``` + +### Slack notifications + +Disable + +```bash +$ ./alerts.sh slack -d +``` + +Enable and configure url + +```bash +$ ./alerts.sh slack -u https://hooks.slack.com/services/... +``` + +Enable, configure url and enable reminders + +```bash +$ ./alerts.sh slack -u https://hooks.slack.com/services/... -r -e 10m +``` + +### Provision alert dashboards with alert rules + +Provision 1 dashboard/alert rule (default) + +```bash +$ ./alerts.sh provision +``` + +Provision 10 dashboards/alert rules + +```bash +$ ./alerts.sh provision -a 10 +``` + +Provision 10 dashboards/alert rules and change condition to `gt > 100` + +```bash +$ ./alerts.sh provision -a 10 -c 100 +``` + +### Pause/unpause all alert rules + +Pause + +```bash +$ ./alerts.sh pause +``` + +Unpause + +```bash +$ ./alerts.sh unpause +``` diff --git a/devenv/docker/ha_test/alerts.sh b/devenv/docker/ha_test/alerts.sh new file mode 100755 index 00000000000..a05a4581739 --- /dev/null +++ b/devenv/docker/ha_test/alerts.sh @@ -0,0 +1,156 @@ +#!/bin/bash + +requiresJsonnet() { + if ! type "jsonnet" > /dev/null; then + echo "you need you install jsonnet to run this script" + echo "follow the instructions on https://github.com/google/jsonnet" + exit 1 + fi +} + +setup() { + STATUS=$(curl -s -o /dev/null -w '%{http_code}' http://admin:admin@grafana.loc/api/alert-notifications/1) + if [ $STATUS -eq 200 ]; then + echo "Email already exists, skipping..." + else + curl -H "Content-Type: application/json" \ + -d '{ + "name": "Email", + "type": "email", + "isDefault": false, + "sendReminder": false, + "uploadImage": true, + "settings": { + "addresses": "user@test.com" + } + }' \ + http://admin:admin@grafana.loc/api/alert-notifications + fi + + STATUS=$(curl -s -o /dev/null -w '%{http_code}' http://admin:admin@grafana.loc/api/alert-notifications/2) + if [ $STATUS -eq 200 ]; then + echo "Slack already exists, skipping..." + else + curl -H "Content-Type: application/json" \ + -d '{ + "name": "Slack", + "type": "slack", + "isDefault": false, + "sendReminder": false, + "uploadImage": true + }' \ + http://admin:admin@grafana.loc/api/alert-notifications + fi +} + +slack() { + enabled=true + url='' + remind=false + remindEvery='10m' + + while getopts ":e:u:dr" o; do + case "${o}" in + e) + remindEvery=${OPTARG} + ;; + u) + url=${OPTARG} + ;; + d) + enabled=false + ;; + r) + remind=true + ;; + esac + done + shift $((OPTIND-1)) + + curl -X PUT \ + -H "Content-Type: application/json" \ + -d '{ + "id": 2, + "name": "Slack", + "type": "slack", + "isDefault": '$enabled', + "sendReminder": '$remind', + "frequency": "'$remindEvery'", + "uploadImage": true, + "settings": { + "url": "'$url'" + } + }' \ + http://admin:admin@grafana.loc/api/alert-notifications/2 +} + +provision() { + alerts=1 + condition=65 + while getopts ":a:c:" o; do + case "${o}" in + a) + alerts=${OPTARG} + ;; + c) + condition=${OPTARG} + ;; + esac + done + shift $((OPTIND-1)) + + requiresJsonnet + + rm -rf grafana/provisioning/dashboards/alerts/alert-*.json + jsonnet -m grafana/provisioning/dashboards/alerts grafana/provisioning/alerts.jsonnet --ext-code alerts=$alerts --ext-code condition=$condition +} + +pause() { + curl -H "Content-Type: application/json" \ + -d '{"paused":true}' \ + http://admin:admin@grafana.loc/api/admin/pause-all-alerts +} + +unpause() { + curl -H "Content-Type: application/json" \ + -d '{"paused":false}' \ + http://admin:admin@grafana.loc/api/admin/pause-all-alerts +} + +usage() { + echo -e "Usage: ./alerts.sh COMMAND [OPTIONS]\n" + echo -e "Commands" + echo -e " setup\t\t creates default alert notification channels" + echo -e " slack\t\t configure slack notification channel" + echo -e " [-d]\t\t\t disable notifier, default enabled" + echo -e " [-u]\t\t\t url" + echo -e " [-r]\t\t\t send reminders" + echo -e " [-e ]\t\t default 10m\n" + echo -e " provision\t provision alerts" + echo -e " [-a ]\t default 1" + echo -e " [-c ]\t default 65\n" + echo -e " pause\t\t pause all alerts" + echo -e " unpause\t unpause all alerts" +} + +main() { + local cmd=$1 + + if [[ $cmd == "setup" ]]; then + setup + elif [[ $cmd == "slack" ]]; then + slack "${@:2}" + elif [[ $cmd == "provision" ]]; then + provision "${@:2}" + elif [[ $cmd == "pause" ]]; then + pause + elif [[ $cmd == "unpause" ]]; then + unpause + fi + + if [[ -z "$cmd" ]]; then + usage + fi +} + +main "$@" diff --git a/devenv/docker/ha_test/docker-compose.yaml b/devenv/docker/ha_test/docker-compose.yaml new file mode 100644 index 00000000000..78f98ab8dc5 --- /dev/null +++ b/devenv/docker/ha_test/docker-compose.yaml @@ -0,0 +1,57 @@ +version: "2.1" + +services: + nginx-proxy: + image: jwilder/nginx-proxy + ports: + - "80:80" + volumes: + - /var/run/docker.sock:/tmp/docker.sock:ro + + mysql: + image: mysql + environment: + MYSQL_ROOT_PASSWORD: rootpass + MYSQL_DATABASE: grafana + MYSQL_USER: grafana + MYSQL_PASSWORD: password + healthcheck: + test: ["CMD", "mysqladmin" ,"ping", "-h", "localhost"] + timeout: 10s + retries: 10 + + grafana: + image: grafana/grafana:dev + volumes: + - ./grafana/provisioning/:/etc/grafana/provisioning/ + environment: + - VIRTUAL_HOST=grafana.loc + - GF_SERVER_ROOT_URL=http://grafana.loc + - GF_DATABASE_TYPE=mysql + - GF_DATABASE_HOST=mysql:3306 + - GF_DATABASE_NAME=grafana + - GF_DATABASE_USER=grafana + - GF_DATABASE_PASSWORD=password + - GF_SESSION_PROVIDER=mysql + - GF_SESSION_PROVIDER_CONFIG=grafana:password@tcp(mysql:3306)/grafana?allowNativePasswords=true + ports: + - 3000 + depends_on: + mysql: + condition: service_healthy + + prometheus: + image: prom/prometheus:v2.4.2 + volumes: + - ./prometheus/:/etc/prometheus/ + environment: + - VIRTUAL_HOST=prometheus.loc + ports: + - 9090 + + # mysqld-exporter: + # image: prom/mysqld-exporter + # environment: + # - DATA_SOURCE_NAME=grafana:password@(mysql:3306)/ + # ports: + # - 9104 \ No newline at end of file diff --git a/devenv/docker/ha_test/grafana/provisioning/alerts.jsonnet b/devenv/docker/ha_test/grafana/provisioning/alerts.jsonnet new file mode 100644 index 00000000000..86ded7e79d6 --- /dev/null +++ b/devenv/docker/ha_test/grafana/provisioning/alerts.jsonnet @@ -0,0 +1,202 @@ +local numAlerts = std.extVar('alerts'); +local condition = std.extVar('condition'); +local arr = std.range(1, numAlerts); + +local alertDashboardTemplate = { + "editable": true, + "gnetId": null, + "graphTooltip": 0, + "id": null, + "links": [], + "panels": [ + { + "alert": { + "conditions": [ + { + "evaluator": { + "params": [ + 65 + ], + "type": "gt" + }, + "operator": { + "type": "and" + }, + "query": { + "params": [ + "A", + "5m", + "now" + ] + }, + "reducer": { + "params": [], + "type": "avg" + }, + "type": "query" + } + ], + "executionErrorState": "alerting", + "frequency": "10s", + "handler": 1, + "name": "bulk alerting", + "noDataState": "no_data", + "notifications": [ + { + "id": 2 + } + ] + }, + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Prometheus", + "fill": 1, + "gridPos": { + "h": 9, + "w": 12, + "x": 0, + "y": 0 + }, + "id": 2, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "$$hashKey": "object:117", + "expr": "go_goroutines", + "format": "time_series", + "intervalFactor": 1, + "refId": "A" + } + ], + "thresholds": [ + { + "colorMode": "critical", + "fill": true, + "line": true, + "op": "gt", + "value": 50 + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Panel Title", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + } + ], + "schemaVersion": 16, + "style": "dark", + "tags": [], + "templating": { + "list": [] + }, + "time": { + "from": "now-6h", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "", + "title": "New dashboard", + "uid": null, + "version": 0 +}; + + +{ + ['alert-' + std.toString(x) + '.json']: + alertDashboardTemplate + { + panels: [ + alertDashboardTemplate.panels[0] + + { + alert+: { + name: 'Alert rule ' + x, + conditions: [ + alertDashboardTemplate.panels[0].alert.conditions[0] + + { + evaluator+: { + params: [condition] + } + }, + ], + }, + }, + ], + uid: 'alert-' + x, + title: 'Alert ' + x + }, + for x in arr +} \ No newline at end of file diff --git a/devenv/docker/ha_test/grafana/provisioning/dashboards/alerts.yaml b/devenv/docker/ha_test/grafana/provisioning/dashboards/alerts.yaml new file mode 100644 index 00000000000..60b6cd4bb04 --- /dev/null +++ b/devenv/docker/ha_test/grafana/provisioning/dashboards/alerts.yaml @@ -0,0 +1,8 @@ +apiVersion: 1 + +providers: + - name: 'Alerts' + folder: 'Alerts' + type: file + options: + path: /etc/grafana/provisioning/dashboards/alerts diff --git a/devenv/docker/ha_test/grafana/provisioning/dashboards/alerts/overview.json b/devenv/docker/ha_test/grafana/provisioning/dashboards/alerts/overview.json new file mode 100644 index 00000000000..53e33c37b1f --- /dev/null +++ b/devenv/docker/ha_test/grafana/provisioning/dashboards/alerts/overview.json @@ -0,0 +1,172 @@ +{ + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": "-- Grafana --", + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "type": "dashboard" + } + ] + }, + "editable": true, + "gnetId": null, + "graphTooltip": 0, + "links": [], + "panels": [ + { + "aliasColors": { + "Active alerts": "#bf1b00" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Prometheus", + "fill": 1, + "gridPos": { + "h": 12, + "w": 24, + "x": 0, + "y": 0 + }, + "id": 2, + "interval": "", + "legend": { + "alignAsTable": true, + "avg": false, + "current": true, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 2, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "alias": "Active grafana instances", + "dashes": true, + "fill": 0 + } + ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(increase(grafana_alerting_notification_sent_total[1m])) by(job)", + "format": "time_series", + "instant": false, + "interval": "1m", + "intervalFactor": 1, + "legendFormat": "Notifications sent", + "refId": "A" + }, + { + "expr": "min(grafana_alerting_active_alerts) without(instance)", + "format": "time_series", + "interval": "1m", + "intervalFactor": 1, + "legendFormat": "Active alerts", + "refId": "B" + }, + { + "expr": "count(up{job=\"grafana\"})", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "Active grafana instances", + "refId": "C" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Notifications sent vs active alerts", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": 3 + } + } + ], + "schemaVersion": 16, + "style": "dark", + "tags": [], + "templating": { + "list": [] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "", + "title": "Overview", + "uid": "xHy7-hAik", + "version": 6 +} \ No newline at end of file diff --git a/devenv/docker/ha_test/grafana/provisioning/datasources/datasources.yaml b/devenv/docker/ha_test/grafana/provisioning/datasources/datasources.yaml new file mode 100644 index 00000000000..8d59793be16 --- /dev/null +++ b/devenv/docker/ha_test/grafana/provisioning/datasources/datasources.yaml @@ -0,0 +1,11 @@ +apiVersion: 1 + +datasources: + - name: Prometheus + type: prometheus + access: proxy + url: http://prometheus:9090 + jsonData: + timeInterval: 10s + queryTimeout: 30s + httpMethod: POST \ No newline at end of file diff --git a/devenv/docker/ha_test/prometheus/prometheus.yml b/devenv/docker/ha_test/prometheus/prometheus.yml new file mode 100644 index 00000000000..ea97ba8ba05 --- /dev/null +++ b/devenv/docker/ha_test/prometheus/prometheus.yml @@ -0,0 +1,39 @@ +# my global config +global: + scrape_interval: 10s # By default, scrape targets every 15 seconds. + evaluation_interval: 10s # By default, scrape targets every 15 seconds. + # scrape_timeout is set to the global default (10s). + +# Load and evaluate rules in this file every 'evaluation_interval' seconds. +#rule_files: +# - "alert.rules" +# - "first.rules" +# - "second.rules" + +# alerting: +# alertmanagers: +# - scheme: http +# static_configs: +# - targets: +# - "127.0.0.1:9093" + +scrape_configs: + - job_name: 'prometheus' + static_configs: + - targets: ['localhost:9090'] + + - job_name: 'grafana' + dns_sd_configs: + - names: + - 'grafana' + type: 'A' + port: 3000 + refresh_interval: 10s + + # - job_name: 'mysql' + # dns_sd_configs: + # - names: + # - 'mysqld-exporter' + # type: 'A' + # port: 9104 + # refresh_interval: 10s \ No newline at end of file