Alerting: Support Unified Alerting with Grafana HA (#37920)

* Alerting: Support Unified Alerting in Grafana's HA mode.
pull/39298/head
gotjosh 4 years ago committed by GitHub
parent 92209f1011
commit 7db97097c9
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
  1. 34
      conf/defaults.ini
  2. 33
      conf/sample.ini
  3. 1
      devenv/docker/ha-test-unified-alerting/.gitignore
  4. 66
      devenv/docker/ha-test-unified-alerting/README.md
  5. 90
      devenv/docker/ha-test-unified-alerting/docker-compose.yaml
  6. 203
      devenv/docker/ha-test-unified-alerting/grafana/provisioning/alerts.jsonnet
  7. 172
      devenv/docker/ha-test-unified-alerting/grafana/provisioning/dashboards/alerts/overview.json
  8. 14
      devenv/docker/ha-test-unified-alerting/grafana/provisioning/dashboards/dashboards.yaml
  9. 5397
      devenv/docker/ha-test-unified-alerting/grafana/provisioning/dashboards/mysql/overview.json
  10. 16
      devenv/docker/ha-test-unified-alerting/grafana/provisioning/datasources/datasources.yaml
  11. 47
      devenv/docker/ha-test-unified-alerting/prometheus/prometheus.yml
  12. 46
      docs/sources/administration/configuration.md
  13. 2
      pkg/services/ngalert/metrics/ngalert.go
  14. 8
      pkg/services/ngalert/ngalert.go
  15. 44
      pkg/services/ngalert/notifier/alertmanager.go
  16. 2
      pkg/services/ngalert/notifier/alertmanager_test.go
  17. 78
      pkg/services/ngalert/notifier/multiorg_alertmanager.go
  18. 22
      pkg/services/ngalert/notifier/multiorg_alertmanager_test.go
  19. 4
      pkg/services/ngalert/schedule/schedule_unit_test.go
  20. 28
      pkg/setting/setting.go
  21. 57
      pkg/setting/setting_unified_alerting.go
  22. 39
      pkg/setting/setting_unified_alerting_test.go
  23. 6
      pkg/tests/api/alerting/api_admin_configuration_test.go
  24. 14
      pkg/tests/api/alerting/api_alertmanager_configuration_test.go
  25. 33
      pkg/tests/testinfra/testinfra.go

@ -211,7 +211,7 @@ rudderstack_data_plane_url =
# Application Insights connection string. Specify an URL string to enable this feature.
application_insights_connection_string =
# Optional. Specifies an Application Insights endpoint URL where the endpoint string is wrapped in backticks ``.
# Optional. Specifies an Application Insights endpoint URL where the endpoint string is wrapped in backticks ``.
application_insights_endpoint_url =
#################################### Security ############################
@ -732,7 +732,37 @@ global_alert_rule = -1
#################################### Unified Alerting ####################
[unified_alerting]
# Specify the frequency of polling for admin config changes.
admin_config_poll_interval_seconds = 60
# The interval string is a possibly signed sequence of decimal numbers, followed by a unit suffix (ms, s, m, h, d), e.g. 30s or 1m.
admin_config_poll_interval = 60s
# Specify the frequency of polling for Alertmanager config changes.
# The interval string is a possibly signed sequence of decimal numbers, followed by a unit suffix (ms, s, m, h, d), e.g. 30s or 1m.
alertmanager_config_poll_interval = 60s
# Listen address/hostname and port to receive unified alerting messages for other Grafana instances. The port is used for both TCP and UDP. It is assumed other Grafana instances are also running on the same port.
ha_listen_address = "0.0.0.0:9094"
# Explicit address/hostname and port to advertise other Grafana instances. The port is used for both TCP and UDP.
ha_advertise_address = ""
# Comma-separated list of initial instances (in a format of host:port) that will form the HA cluster. Configuring this setting will enable High Availability mode for alerting.
ha_peers = ""
# Time to wait for an instance to send a notification via the Alertmanager. In HA, each Grafana instance will
# be assigned a position (e.g. 0, 1). We then multiply this position with the timeout to indicate how long should
# each instance wait before sending the notification to take into account replication lag.
# The interval string is a possibly signed sequence of decimal numbers, followed by a unit suffix (ms, s, m, h, d), e.g. 30s or 1m.
ha_peer_timeout = 15s
# The interval between sending gossip messages. By lowering this value (more frequent) gossip messages are propagated
# across cluster more quickly at the expense of increased bandwidth usage.
# The interval string is a possibly signed sequence of decimal numbers, followed by a unit suffix (ms, s, m, h, d), e.g. 30s or 1m.
ha_gossip_interval = 200ms
# The interval between gossip full state syncs. Setting this interval lower (more frequent) will increase convergence speeds
# across larger clusters at the expense of increased bandwidth usage.
# The interval string is a possibly signed sequence of decimal numbers, followed by a unit suffix (ms, s, m, h, d), e.g. 30s or 1m.
ha_push_pull_interval = 60s
#################################### Alerting ############################
[alerting]

@ -709,7 +709,38 @@
#################################### Unified Alerting ####################
[unified_alerting]
# Specify the frequency of polling for admin config changes.
;admin_config_poll_interval_seconds = 60
# The interval string is a possibly signed sequence of decimal numbers, followed by a unit suffix (ms, s, m, h, d), e.g. 30s or 1m.
;admin_config_poll_interval = 60s
# Specify the frequency of polling for Alertmanager config changes.
# The interval string is a possibly signed sequence of decimal numbers, followed by a unit suffix (ms, s, m, h, d), e.g. 30s or 1m.
;alertmanager_config_poll_interval = 60s
# Listen address/hostname and port to receive unified alerting messages for other Grafana instances. The port is used for both TCP and UDP. It is assumed other Grafana instances are also running on the same port. The default value is `0.0.0.0:9094`.
;ha_listen_address = "0.0.0.0:9094"
# Listen address/hostname and port to receive unified alerting messages for other Grafana instances. The port is used for both TCP and UDP. It is assumed other Grafana instances are also running on the same port. The default value is `0.0.0.0:9094`.
;ha_advertise_address = ""
# Comma-separated list of initial instances (in a format of host:port) that will form the HA cluster. Configuring this setting will enable High Availability mode for alerting.
;ha_peers = ""
# Time to wait for an instance to send a notification via the Alertmanager. In HA, each Grafana instance will
# be assigned a position (e.g. 0, 1). We then multiply this position with the timeout to indicate how long should
# each instance wait before sending the notification to take into account replication lag.
# The interval string is a possibly signed sequence of decimal numbers, followed by a unit suffix (ms, s, m, h, d), e.g. 30s or 1m.
;ha_peer_timeout = "15s"
# The interval between sending gossip messages. By lowering this value (more frequent) gossip messages are propagated
# across cluster more quickly at the expense of increased bandwidth usage.
# The interval string is a possibly signed sequence of decimal numbers, followed by a unit suffix (ms, s, m, h, d), e.g. 30s or 1m.
;ha_gossip_interval = "200ms"
# The interval between gossip full state syncs. Setting this interval lower (more frequent) will increase convergence speeds
# across larger clusters at the expense of increased bandwidth usage.
# The interval string is a possibly signed sequence of decimal numbers, followed by a unit suffix (ms, s, m, h, d), e.g. 30s or 1m.
;ha_push_pull_interval = "60s"
#################################### Alerting ############################
[alerting]

@ -0,0 +1 @@
grafana/provisioning/dashboards/alerts/alert-*

@ -0,0 +1,66 @@
# Grafana Unified Alerting High Availability (HA) test setup
A set of docker compose services which together creates a Grafana HA test setup for unified alerting.
Included services
- Grafana
- Mysql - Grafana configuration database, exporter for metrics and session storage
- Prometheus - Monitoring of Grafana and used as data source
- Nginx - Reverse proxy for Grafana and Prometheus. Enables browsing Grafana/Prometheus UI using a hostname
## Prerequisites
### Build grafana docker container
Build a Grafana docker container from current branch and commit and tag it as grafana/grafana:dev.
```bash
$ cd <grafana repo>
$ make build-docker-full
```
### Virtual host names
#### Alternative 1 - Use dnsmasq
```bash
$ sudo apt-get install dnsmasq
$ echo 'address=/loc/127.0.0.1' | sudo tee /etc/dnsmasq.d/dnsmasq-loc.conf > /dev/null
$ sudo /etc/init.d/dnsmasq restart
$ ping whatever.loc
PING whatever.loc (127.0.0.1) 56(84) bytes of data.
64 bytes from localhost (127.0.0.1): icmp_seq=1 ttl=64 time=0.076 ms
--- whatever.loc ping statistics ---
1 packet transmitted, 1 received, 0% packet loss, time 1998ms
```
#### Alternative 2 - Manually update /etc/hosts
Update your `/etc/hosts` to be able to access Grafana and/or Prometheus UI using a hostname.
```bash
$ cat /etc/hosts
127.0.0.1 grafana.loc
127.0.0.1 prometheus.loc
```
## Start services
```bash
$ docker-compose up -d
```
Browse
- http://grafana.loc/
- http://prometheus.loc/
## Test alerting
### Create contact points
TBD
### Create alerts
TBD
### Create silences
TBD

@ -0,0 +1,90 @@
version: "2.1"
services:
db:
image: mysql:5.6
platform: linux/x86_64
environment:
MYSQL_ROOT_PASSWORD: rootpass
MYSQL_DATABASE: grafana
MYSQL_USER: grafana
MYSQL_PASSWORD: password
command: [mysqld, --character-set-server=utf8mb4, --collation-server=utf8mb4_unicode_ci, --innodb_monitor_enable=all, --max-connections=1001]
ports:
- 3306
healthcheck:
test: ["CMD", "mysqladmin" ,"ping", "-h", "localhost"]
timeout: 10s
retries: 10
mysqld-exporter:
image: prom/mysqld-exporter
environment:
- DATA_SOURCE_NAME=root:rootpass@(db:3306)/
ports:
- 9104
depends_on:
db:
condition: service_healthy
prometheus:
image: prom/prometheus:v2.4.2
volumes:
- ./prometheus/:/etc/prometheus/
environment:
- VIRTUAL_HOST=prometheus.loc
ports:
- 909
nginx-proxy:
image: jwilder/nginx-proxy
ports:
- "80:80"
volumes:
- /var/run/docker.sock:/tmp/docker.sock:ro
depends_on:
db:
condition: service_healthy
grafana1:
image: grafana/grafana:dev
volumes:
- ./grafana/provisioning/:/etc/grafana/provisioning/
environment:
- VIRTUAL_HOST=grafana.loc
- GF_FEATURE_TOGGLES_ENABLE=ngalert
- GF_UNIFIED_ALERTING_HA_PEERS=ha-test-unified-alerting_grafana2_1:9094,ha-test-unified-alerting_grafana1_1:9094
- GF_SERVER_ROOT_URL=http://grafana.loc
- GF_DATABASE_NAME=grafana
- GF_DATABASE_USER=grafana
- GF_DATABASE_PASSWORD=password
- GF_DATABASE_TYPE=mysql
- GF_DATABASE_HOST=db:3306
- GF_DATABASE_MAX_OPEN_CONN=300
- GF_SESSION_PROVIDER=mysql
- GF_SESSION_PROVIDER_CONFIG=grafana:password@tcp(db:3306)/grafana?allowNativePasswords=true
ports:
- 3010:3000
depends_on:
db:
condition: service_healthy
grafana2:
image: grafana/grafana:dev
volumes:
- ./grafana/provisioning/:/etc/grafana/provisioning/
environment:
- VIRTUAL_HOST=grafana.loc
- GF_FEATURE_TOGGLES_ENABLE=ngalert
- GF_UNIFIED_ALERTING_HA_PEERS=ha-test-unified-alerting_grafana2_1:9094,ha-test-unified-alerting_grafana1_1:9094
- GF_SERVER_ROOT_URL=http://grafana.loc
- GF_DATABASE_NAME=grafana
- GF_DATABASE_USER=grafana
- GF_DATABASE_PASSWORD=password
- GF_DATABASE_TYPE=mysql
- GF_DATABASE_HOST=db:3306
- GF_DATABASE_MAX_OPEN_CONN=300
- GF_SESSION_PROVIDER=mysql
- GF_SESSION_PROVIDER_CONFIG=grafana:password@tcp(db:3306)/grafana?allowNativePasswords=true
ports:
- 3020:3000
depends_on:
db:
condition: service_healthy

@ -0,0 +1,203 @@
local numAlerts = std.extVar('alerts');
local condition = std.extVar('condition');
local arr = std.range(1, numAlerts);
local alertDashboardTemplate = {
"editable": true,
"gnetId": null,
"graphTooltip": 0,
"id": null,
"links": [],
"panels": [
{
"alert": {
"conditions": [
{
"evaluator": {
"params": [
65
],
"type": "gt"
},
"operator": {
"type": "and"
},
"query": {
"params": [
"A",
"5m",
"now"
]
},
"reducer": {
"params": [],
"type": "avg"
},
"type": "query"
}
],
"executionErrorState": "alerting",
"frequency": "10s",
"handler": 1,
"for": "1m",
"name": "bulk alerting",
"noDataState": "no_data",
"notifications": [
{
"id": 2
}
]
},
"aliasColors": {},
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": "Prometheus",
"fill": 1,
"gridPos": {
"h": 9,
"w": 12,
"x": 0,
"y": 0
},
"id": 2,
"legend": {
"avg": false,
"current": false,
"max": false,
"min": false,
"show": true,
"total": false,
"values": false
},
"lines": true,
"linewidth": 1,
"nullPointMode": "null",
"percentage": false,
"pointradius": 5,
"points": false,
"renderer": "flot",
"seriesOverrides": [],
"spaceLength": 10,
"stack": false,
"steppedLine": false,
"targets": [
{
"$$hashKey": "object:117",
"expr": "go_goroutines",
"format": "time_series",
"intervalFactor": 1,
"refId": "A"
}
],
"thresholds": [
{
"colorMode": "critical",
"fill": true,
"line": true,
"op": "gt",
"value": 50
}
],
"timeFrom": null,
"timeShift": null,
"title": "Panel Title",
"tooltip": {
"shared": true,
"sort": 0,
"value_type": "individual"
},
"type": "graph",
"xaxis": {
"buckets": null,
"mode": "time",
"name": null,
"show": true,
"values": []
},
"yaxes": [
{
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": true
},
{
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": true
}
]
}
],
"schemaVersion": 16,
"style": "dark",
"tags": [],
"templating": {
"list": []
},
"time": {
"from": "now-6h",
"to": "now"
},
"timepicker": {
"refresh_intervals": [
"5s",
"10s",
"30s",
"1m",
"5m",
"15m",
"30m",
"1h",
"2h",
"1d"
],
"time_options": [
"5m",
"15m",
"1h",
"6h",
"12h",
"24h",
"2d",
"7d",
"30d"
]
},
"timezone": "",
"title": "New dashboard",
"uid": null,
"version": 0
};
{
['alert-' + std.toString(x) + '.json']:
alertDashboardTemplate + {
panels: [
alertDashboardTemplate.panels[0] +
{
alert+: {
name: 'Alert rule ' + x,
conditions: [
alertDashboardTemplate.panels[0].alert.conditions[0] +
{
evaluator+: {
params: [condition]
}
},
],
},
},
],
uid: 'alert-' + x,
title: 'Alert ' + x
},
for x in arr
}

@ -0,0 +1,172 @@
{
"annotations": {
"list": [
{
"builtIn": 1,
"datasource": "-- Grafana --",
"enable": true,
"hide": true,
"iconColor": "rgba(0, 211, 255, 1)",
"name": "Annotations & Alerts",
"type": "dashboard"
}
]
},
"editable": true,
"gnetId": null,
"graphTooltip": 0,
"links": [],
"panels": [
{
"aliasColors": {
"Active alerts": "#bf1b00"
},
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": "Prometheus",
"fill": 1,
"gridPos": {
"h": 12,
"w": 24,
"x": 0,
"y": 0
},
"id": 2,
"interval": "",
"legend": {
"alignAsTable": true,
"avg": false,
"current": true,
"max": false,
"min": false,
"rightSide": true,
"show": true,
"total": false,
"values": true
},
"lines": true,
"linewidth": 2,
"links": [],
"nullPointMode": "null",
"percentage": false,
"pointradius": 5,
"points": false,
"renderer": "flot",
"seriesOverrides": [
{
"alias": "Active grafana instances",
"dashes": true,
"fill": 0
}
],
"spaceLength": 10,
"stack": false,
"steppedLine": false,
"targets": [
{
"expr": "sum(increase(grafana_alerting_notification_sent_total[1m])) by(job)",
"format": "time_series",
"instant": false,
"interval": "1m",
"intervalFactor": 1,
"legendFormat": "Notifications sent",
"refId": "A"
},
{
"expr": "min(grafana_alerting_active_alerts) without(instance)",
"format": "time_series",
"interval": "1m",
"intervalFactor": 1,
"legendFormat": "Active alerts",
"refId": "B"
},
{
"expr": "count(up{job=\"grafana\"})",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "Active grafana instances",
"refId": "C"
}
],
"thresholds": [],
"timeFrom": null,
"timeShift": null,
"title": "Notifications sent vs active alerts",
"tooltip": {
"shared": true,
"sort": 0,
"value_type": "individual"
},
"type": "graph",
"xaxis": {
"buckets": null,
"mode": "time",
"name": null,
"show": true,
"values": []
},
"yaxes": [
{
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": "0",
"show": true
},
{
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": true
}
],
"yaxis": {
"align": false,
"alignLevel": 3
}
}
],
"schemaVersion": 16,
"style": "dark",
"tags": [],
"templating": {
"list": []
},
"time": {
"from": "now-1h",
"to": "now"
},
"timepicker": {
"refresh_intervals": [
"5s",
"10s",
"30s",
"1m",
"5m",
"15m",
"30m",
"1h",
"2h",
"1d"
],
"time_options": [
"5m",
"15m",
"1h",
"6h",
"12h",
"24h",
"2d",
"7d",
"30d"
]
},
"timezone": "",
"title": "Overview",
"uid": "xHy7-hAik",
"version": 6
}

@ -0,0 +1,14 @@
apiVersion: 1
providers:
- name: 'Alerts'
folder: 'Alerts'
type: file
options:
path: /etc/grafana/provisioning/dashboards/alerts
- name: 'MySQL'
folder: 'MySQL'
type: file
options:
path: /etc/grafana/provisioning/dashboards/mysql

@ -0,0 +1,16 @@
apiVersion: 1
datasources:
- name: Prometheus
type: prometheus
access: proxy
url: http://prometheus:9090
jsonData:
timeInterval: 10s
queryTimeout: 30s
httpMethod: POST
- name: Loki
type: loki
access: proxy
url: http://loki:3100

@ -0,0 +1,47 @@
# my global config
global:
scrape_interval: 10s # By default, scrape targets every 15 seconds.
evaluation_interval: 10s # By default, scrape targets every 15 seconds.
# scrape_timeout is set to the global default (10s).
# Load and evaluate rules in this file every 'evaluation_interval' seconds.
#rule_files:
# - "alert.rules"
# - "first.rules"
# - "second.rules"
# alerting:
# alertmanagers:
# - scheme: http
# static_configs:
# - targets:
# - "127.0.0.1:9093"
scrape_configs:
- job_name: 'prometheus'
static_configs:
- targets: ['localhost:9090']
- job_name: 'grafana'
dns_sd_configs:
- names:
- 'grafana'
type: 'A'
port: 3000
refresh_interval: 10s
- job_name: 'mysql'
dns_sd_configs:
- names:
- 'mysqld-exporter'
type: 'A'
port: 9104
refresh_interval: 10s
- job_name: 'loki'
dns_sd_configs:
- names:
- 'loki'
type: 'A'
port: 3100
refresh_interval: 10s

@ -1119,9 +1119,51 @@ Sets a global limit on number of alert rules that can be created. Default is -1
For more information about the Grafana 8 alerts, refer to [Unified Alerting]({{< relref "../alerting/unified-alerting/_index.md" >}}).
### admin_config_poll_interval_seconds
### admin_config_poll_interval
Specify the frequency of polling for admin config changes. The default value is `60`.
Specify the frequency of polling for admin config changes. The default value is `60s`.
The interval string is a possibly signed sequence of decimal numbers, followed by a unit suffix (ms, s, m, h, d), e.g. 30s or 1m.
### alertmanager_config_poll_interval
Specify the frequency of polling for Alertmanager config changes. The default value is `60s`.
The interval string is a possibly signed sequence of decimal numbers, followed by a unit suffix (ms, s, m, h, d), e.g. 30s or 1m.
### ha_listen_address
Listen address/hostname and port to receive unified alerting messages for other Grafana instances. The port is used for both TCP and UDP. It is assumed other Grafana instances are also running on the same port. The default value is `0.0.0.0:9094`.
### ha_advertise_address
Explicit address/hostname and port to advertise other Grafana instances. The port is used for both TCP and UDP.
### ha_peers
Comma-separated list of initial instances (in a format of host:port) that will form the HA cluster. Configuring this setting will enable High Availability mode for alerting.
### ha_peer_timeout
Time to wait for an instance to send a notification via the Alertmanager. In HA, each Grafana instance will
be assigned a position (e.g. 0, 1). We then multiply this position with the timeout to indicate how long should
each instance wait before sending the notification to take into account replication lag. The default value is `15s`.
The interval string is a possibly signed sequence of decimal numbers, followed by a unit suffix (ms, s, m, h, d), e.g. 30s or 1m.
### ha_gossip_interval
The interval between sending gossip messages. By lowering this value (more frequent) gossip messages are propagated
across cluster more quickly at the expense of increased bandwidth usage. The default value is `200ms`.
The interval string is a possibly signed sequence of decimal numbers, followed by a unit suffix (ms, s, m, h, d), e.g. 30s or 1m.
### ha_push_pull_interval
The interval between gossip full state syncs. Setting this interval lower (more frequent) will increase convergence speeds
across larger clusters at the expense of increased bandwidth usage. The default value is `60s`.
The interval string is a possibly signed sequence of decimal numbers, followed by a unit suffix (ms, s, m, h, d), e.g. 30s or 1m.
<hr>

@ -52,6 +52,7 @@ type Scheduler struct {
}
type MultiOrgAlertmanager struct {
Registerer prometheus.Registerer
ActiveConfigurations prometheus.Gauge
DiscoveredConfigurations prometheus.Gauge
registries *OrgRegistries
@ -178,6 +179,7 @@ func newStateMetrics(r prometheus.Registerer) *State {
func newMultiOrgAlertmanagerMetrics(r prometheus.Registerer) *MultiOrgAlertmanager {
return &MultiOrgAlertmanager{
Registerer: r,
registries: NewOrgRegistries(),
DiscoveredConfigurations: promauto.With(r).NewGauge(prometheus.GaugeOpts{
Namespace: Namespace,

@ -84,6 +84,8 @@ type AlertNG struct {
}
func (ng *AlertNG) init() error {
var err error
baseInterval := ng.Cfg.AlertingBaseInterval
if baseInterval <= 0 {
baseInterval = defaultBaseIntervalSeconds
@ -97,7 +99,11 @@ func (ng *AlertNG) init() error {
Logger: ng.Log,
}
ng.MultiOrgAlertmanager = notifier.NewMultiOrgAlertmanager(ng.Cfg, store, store, ng.KVStore, ng.Metrics.GetMultiOrgAlertmanagerMetrics())
multiOrgMetrics := ng.Metrics.GetMultiOrgAlertmanagerMetrics()
ng.MultiOrgAlertmanager, err = notifier.NewMultiOrgAlertmanager(ng.Cfg, store, store, ng.KVStore, multiOrgMetrics, log.New("ngalert.multiorg.alertmanager"))
if err != nil {
return err
}
// Let's make sure we're able to complete an initial sync of Alertmanagers before we start the alerting components.
if err := ng.MultiOrgAlertmanager.LoadAndSyncAlertmanagersForOrgs(context.Background()); err != nil {

@ -15,6 +15,7 @@ import (
gokit_log "github.com/go-kit/kit/log"
amv2 "github.com/prometheus/alertmanager/api/v2/models"
"github.com/prometheus/alertmanager/cluster"
"github.com/prometheus/alertmanager/dispatch"
"github.com/prometheus/alertmanager/inhibit"
"github.com/prometheus/alertmanager/nflog"
@ -24,6 +25,7 @@ import (
"github.com/prometheus/alertmanager/silence"
"github.com/prometheus/alertmanager/template"
"github.com/prometheus/alertmanager/types"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/common/model"
"github.com/grafana/grafana/pkg/components/securejsondata"
@ -77,9 +79,16 @@ const (
`
)
type ClusterPeer interface {
AddState(string, cluster.State, prometheus.Registerer) cluster.ClusterChannel
Position() int
WaitReady(context.Context) error
}
type Alertmanager struct {
logger log.Logger
gokitLogger gokit_log.Logger
OrgID int64
Settings *setting.Cfg
Store store.AlertingStore
@ -90,6 +99,8 @@ type Alertmanager struct {
marker types.Marker
alerts *mem.Alerts
route *dispatch.Route
peer ClusterPeer
peerTimeout time.Duration
dispatcher *dispatch.Dispatcher
inhibitor *inhibit.Inhibitor
@ -111,7 +122,7 @@ type Alertmanager struct {
orgID int64
}
func newAlertmanager(orgID int64, cfg *setting.Cfg, store store.AlertingStore, kvStore kvstore.KVStore, m *metrics.Alertmanager) (*Alertmanager, error) {
func newAlertmanager(orgID int64, cfg *setting.Cfg, store store.AlertingStore, kvStore kvstore.KVStore, peer ClusterPeer, m *metrics.Alertmanager) (*Alertmanager, error) {
am := &Alertmanager{
Settings: cfg,
stopc: make(chan struct{}),
@ -120,6 +131,8 @@ func newAlertmanager(orgID int64, cfg *setting.Cfg, store store.AlertingStore, k
stageMetrics: notify.NewMetrics(m.Registerer),
dispatcherMetrics: dispatch.NewDispatcherMetrics(false, m.Registerer),
Store: store,
peer: peer,
peerTimeout: cfg.HAPeerTimeout,
Metrics: m,
orgID: orgID,
}
@ -148,6 +161,9 @@ func newAlertmanager(orgID int64, cfg *setting.Cfg, store store.AlertingStore, k
if err != nil {
return nil, fmt.Errorf("unable to initialize the notification log component of alerting: %w", err)
}
c := am.peer.AddState(fmt.Sprintf("notificationlog:%d", am.OrgID), am.notificationLog, m.Registerer)
am.notificationLog.SetBroadcast(c.Broadcast)
// Initialize silences
am.silences, err = silence.New(silence.Options{
Metrics: m.Registerer,
@ -158,6 +174,9 @@ func newAlertmanager(orgID int64, cfg *setting.Cfg, store store.AlertingStore, k
return nil, fmt.Errorf("unable to initialize the silencing component of alerting: %w", err)
}
c = am.peer.AddState(fmt.Sprintf("silences:%d", am.OrgID), am.silences, m.Registerer)
am.silences.SetBroadcast(c.Broadcast)
am.wg.Add(1)
go func() {
am.silences.Maintenance(15*time.Minute, silencesFilePath, am.stopc, func() (int64, error) {
@ -392,15 +411,16 @@ func (am *Alertmanager) applyConfig(cfg *apimodels.PostableUserConfig, rawConfig
am.inhibitor = inhibit.NewInhibitor(am.alerts, cfg.AlertmanagerConfig.InhibitRules, am.marker, am.gokitLogger)
am.silencer = silence.NewSilencer(am.silences, am.marker, am.gokitLogger)
meshStage := notify.NewGossipSettleStage(am.peer)
inhibitionStage := notify.NewMuteStage(am.inhibitor)
silencingStage := notify.NewMuteStage(am.silencer)
for name := range integrationsMap {
stage := am.createReceiverStage(name, integrationsMap[name], waitFunc, am.notificationLog)
routingStage[name] = notify.MultiStage{silencingStage, inhibitionStage, stage}
stage := am.createReceiverStage(name, integrationsMap[name], am.waitFunc, am.notificationLog)
routingStage[name] = notify.MultiStage{meshStage, silencingStage, inhibitionStage, stage}
}
am.route = dispatch.NewRoute(cfg.AlertmanagerConfig.Route, nil)
am.dispatcher = dispatch.NewDispatcher(am.alerts, am.route, routingStage, am.marker, timeoutFunc, &nilLimits{}, am.gokitLogger, am.dispatcherMetrics)
am.dispatcher = dispatch.NewDispatcher(am.alerts, am.route, routingStage, am.marker, am.timeoutFunc, &nilLimits{}, am.gokitLogger, am.dispatcherMetrics)
am.wg.Add(1)
go func() {
@ -701,21 +721,17 @@ func (am *Alertmanager) createReceiverStage(name string, integrations []notify.I
return fs
}
func waitFunc() time.Duration {
// When it's a single instance, we don't need additional wait. The routing policies will have their own group wait.
// We need >0 wait here in case we have peers to sync the notification state with. 0 wait in that case can result
// in duplicate notifications being sent.
// TODO: we have setting.AlertingNotificationTimeout in legacy settings. Either use that or separate set of config
// for clustering with intuitive name, like "PeerTimeout".
return 0
func (am *Alertmanager) waitFunc() time.Duration {
return time.Duration(am.peer.Position()) * am.peerTimeout
}
func timeoutFunc(d time.Duration) time.Duration {
//TODO: What does MinTimeout means here?
func (am *Alertmanager) timeoutFunc(d time.Duration) time.Duration {
// time.Duration d relates to the receiver's group_interval. Even with a group interval of 1s,
// we need to make sure (non-position-0) peers in the cluster wait before flushing the notifications.
if d < notify.MinTimeout {
d = notify.MinTimeout
}
return d + waitFunc()
return d + am.waitFunc()
}
type nilLimits struct{}

@ -48,7 +48,7 @@ func setupAMTest(t *testing.T) *Alertmanager {
}
kvStore := newFakeKVStore(t)
am, err := newAlertmanager(1, cfg, s, kvStore, m)
am, err := newAlertmanager(1, cfg, s, kvStore, &NilPeer{}, m)
require.NoError(t, err)
return am
}

@ -6,6 +6,12 @@ import (
"sync"
"time"
"github.com/grafana/grafana/pkg/services/ngalert/logging"
gokit_log "github.com/go-kit/kit/log"
"github.com/prometheus/alertmanager/cluster"
"github.com/prometheus/client_golang/prometheus"
"github.com/grafana/grafana/pkg/infra/kvstore"
"github.com/grafana/grafana/pkg/infra/log"
"github.com/grafana/grafana/pkg/services/ngalert/metrics"
@ -14,7 +20,6 @@ import (
)
var (
SyncOrgsPollInterval = 1 * time.Minute
ErrNoAlertmanagerForOrg = fmt.Errorf("Alertmanager does not exist for this organization")
ErrAlertmanagerNotReady = fmt.Errorf("Alertmanager is not ready yet")
)
@ -26,6 +31,10 @@ type MultiOrgAlertmanager struct {
settings *setting.Cfg
logger log.Logger
// clusterPeer represents the clustering peers of Alertmanagers between Grafana instances.
peer ClusterPeer
settleCancel context.CancelFunc
configStore store.AlertingStore
orgStore store.OrgStore
kvStore kvstore.KVStore
@ -33,16 +42,52 @@ type MultiOrgAlertmanager struct {
metrics *metrics.MultiOrgAlertmanager
}
func NewMultiOrgAlertmanager(cfg *setting.Cfg, configStore store.AlertingStore, orgStore store.OrgStore, kvStore kvstore.KVStore, m *metrics.MultiOrgAlertmanager) *MultiOrgAlertmanager {
return &MultiOrgAlertmanager{
func NewMultiOrgAlertmanager(cfg *setting.Cfg, configStore store.AlertingStore, orgStore store.OrgStore, kvStore kvstore.KVStore, m *metrics.MultiOrgAlertmanager, l log.Logger) (*MultiOrgAlertmanager, error) {
moa := &MultiOrgAlertmanager{
logger: l,
settings: cfg,
logger: log.New("multiorg.alertmanager"),
alertmanagers: map[int64]*Alertmanager{},
configStore: configStore,
orgStore: orgStore,
kvStore: kvStore,
metrics: m,
}
clusterLogger := gokit_log.With(gokit_log.NewLogfmtLogger(logging.NewWrapper(l)), "component", "cluster")
moa.peer = &NilPeer{}
if len(cfg.HAPeers) > 0 {
peer, err := cluster.Create(
clusterLogger,
m.Registerer,
cfg.HAListenAddr,
cfg.HAAdvertiseAddr,
cfg.HAPeers, // peers
true,
cfg.HAPushPullInterval,
cfg.HAGossipInterval,
cluster.DefaultTcpTimeout,
cluster.DefaultProbeTimeout,
cluster.DefaultProbeInterval,
nil,
)
if err != nil {
return nil, fmt.Errorf("unable to initialize gossip mesh: %w", err)
}
err = peer.Join(cluster.DefaultReconnectInterval, cluster.DefaultReconnectTimeout)
if err != nil {
l.Error("msg", "unable to join gossip mesh while initializing cluster for high availability mode", "err", err)
}
// Attempt to verify the number of peers for 30s every 2s. The risk here is what we send a notification "too soon".
// Which should _never_ happen given we share the notification log via the database so the risk of double notification is very low.
var ctx context.Context
ctx, moa.settleCancel = context.WithTimeout(context.Background(), 30*time.Second)
go peer.Settle(ctx, cluster.DefaultGossipInterval*10)
moa.peer = peer
}
return moa, nil
}
func (moa *MultiOrgAlertmanager) Run(ctx context.Context) error {
@ -53,7 +98,7 @@ func (moa *MultiOrgAlertmanager) Run(ctx context.Context) error {
case <-ctx.Done():
moa.StopAndWait()
return nil
case <-time.After(SyncOrgsPollInterval):
case <-time.After(moa.settings.AlertmanagerConfigPollInterval):
if err := moa.LoadAndSyncAlertmanagersForOrgs(ctx); err != nil {
moa.logger.Error("error while synchronizing Alertmanager orgs", "err", err)
}
@ -90,7 +135,7 @@ func (moa *MultiOrgAlertmanager) SyncAlertmanagersForOrgs(orgIDs []int64) {
// To export them, we need to translate the metrics from each individual registry and,
// then aggregate them on the main registry.
m := metrics.NewAlertmanagerMetrics(moa.metrics.GetOrCreateOrgRegistry(orgID))
am, err := newAlertmanager(orgID, moa.settings, moa.configStore, moa.kvStore, m)
am, err := newAlertmanager(orgID, moa.settings, moa.configStore, moa.kvStore, moa.peer, m)
if err != nil {
moa.logger.Error("unable to create Alertmanager for org", "org", orgID, "err", err)
}
@ -130,6 +175,14 @@ func (moa *MultiOrgAlertmanager) StopAndWait() {
for _, am := range moa.alertmanagers {
am.StopAndWait()
}
p, ok := moa.peer.(*cluster.Peer)
if ok {
moa.settleCancel()
if err := p.Leave(10 * time.Second); err != nil {
moa.logger.Warn("unable to leave the gossip mesh", "err", err)
}
}
}
// AlertmanagerFor returns the Alertmanager instance for the organization provided.
@ -150,3 +203,16 @@ func (moa *MultiOrgAlertmanager) AlertmanagerFor(orgID int64) (*Alertmanager, er
return orgAM, nil
}
// NilPeer and NilChannel implements the Alertmanager clustering interface.
type NilPeer struct{}
func (p *NilPeer) Position() int { return 0 }
func (p *NilPeer) WaitReady(context.Context) error { return nil }
func (p *NilPeer) AddState(string, cluster.State, prometheus.Registerer) cluster.ClusterChannel {
return &NilChannel{}
}
type NilChannel struct{}
func (c *NilChannel) Broadcast([]byte) {}

@ -8,6 +8,7 @@ import (
"testing"
"time"
"github.com/grafana/grafana/pkg/infra/log"
"github.com/grafana/grafana/pkg/services/ngalert/metrics"
"github.com/grafana/grafana/pkg/services/ngalert/models"
"github.com/grafana/grafana/pkg/setting"
@ -18,7 +19,6 @@ import (
)
func TestMultiOrgAlertmanager_SyncAlertmanagersForOrgs(t *testing.T) {
t.Skipf("Skipping multiorg alertmanager tests for now")
configStore := &FakeConfigStore{
configs: map[int64]*models.AlertConfiguration{},
}
@ -28,12 +28,15 @@ func TestMultiOrgAlertmanager_SyncAlertmanagersForOrgs(t *testing.T) {
tmpDir, err := ioutil.TempDir("", "test")
require.NoError(t, err)
SyncOrgsPollInterval = 10 * time.Minute // Don't poll in unit tests.
kvStore := newFakeKVStore(t)
reg := prometheus.NewPedanticRegistry()
m := metrics.NewNGAlert(reg)
mam := NewMultiOrgAlertmanager(&setting.Cfg{DataPath: tmpDir}, configStore, orgStore, kvStore, m.GetMultiOrgAlertmanagerMetrics())
cfg := &setting.Cfg{
DataPath: tmpDir,
AlertmanagerConfigPollInterval: 3 * time.Minute, // do not poll in tests
}
mam, err := NewMultiOrgAlertmanager(cfg, configStore, orgStore, kvStore, m.GetMultiOrgAlertmanagerMetrics(), log.New("testlogger"))
require.NoError(t, err)
ctx := context.Background()
t.Cleanup(cleanOrgDirectories(tmpDir, t))
@ -82,22 +85,23 @@ grafana_alerting_discovered_configurations 4
}
func TestMultiOrgAlertmanager_AlertmanagerFor(t *testing.T) {
t.Skipf("Skipping multiorg alertmanager tests for now")
configStore := &FakeConfigStore{
configs: map[int64]*models.AlertConfiguration{},
}
orgStore := &FakeOrgStore{
orgs: []int64{1, 2, 3},
}
tmpDir, err := ioutil.TempDir("", "test")
require.NoError(t, err)
SyncOrgsPollInterval = 10 * time.Minute // Don't poll in unit tests.
cfg := &setting.Cfg{
DataPath: tmpDir,
AlertmanagerConfigPollInterval: 3 * time.Minute, // do not poll in tests
}
kvStore := newFakeKVStore(t)
reg := prometheus.NewPedanticRegistry()
m := metrics.NewNGAlert(reg)
mam := NewMultiOrgAlertmanager(&setting.Cfg{DataPath: tmpDir}, configStore, orgStore, kvStore, m.GetMultiOrgAlertmanagerMetrics())
mam, err := NewMultiOrgAlertmanager(cfg, configStore, orgStore, kvStore, m.GetMultiOrgAlertmanagerMetrics(), log.New("testlogger"))
require.NoError(t, err)
ctx := context.Background()
t.Cleanup(cleanOrgDirectories(tmpDir, t))

@ -231,6 +231,8 @@ func setupScheduler(t *testing.T, rs store.RuleStore, is store.InstanceStore, ac
mockedClock := clock.NewMock()
logger := log.New("ngalert schedule test")
m := metrics.NewNGAlert(prometheus.NewPedanticRegistry())
moa, err := notifier.NewMultiOrgAlertmanager(&setting.Cfg{}, &notifier.FakeConfigStore{}, &notifier.FakeOrgStore{}, &notifier.FakeKVStore{}, nil, log.New("testlogger"))
require.NoError(t, err)
schedCfg := SchedulerCfg{
C: mockedClock,
BaseInterval: time.Second,
@ -239,7 +241,7 @@ func setupScheduler(t *testing.T, rs store.RuleStore, is store.InstanceStore, ac
RuleStore: rs,
InstanceStore: is,
AdminConfigStore: acs,
MultiOrgNotifier: notifier.NewMultiOrgAlertmanager(&setting.Cfg{}, &notifier.FakeConfigStore{}, &notifier.FakeOrgStore{}, &notifier.FakeKVStore{}, nil),
MultiOrgNotifier: moa,
Logger: logger,
Metrics: m.GetSchedulerMetrics(),
AdminConfigPollInterval: 10 * time.Minute, // do not poll in unit tests.

@ -18,15 +18,14 @@ import (
"strings"
"time"
"github.com/gobwas/glob"
"github.com/prometheus/common/model"
"gopkg.in/ini.v1"
"github.com/grafana/grafana-aws-sdk/pkg/awsds"
"github.com/grafana/grafana/pkg/components/gtime"
"github.com/grafana/grafana/pkg/infra/log"
"github.com/grafana/grafana/pkg/util"
"github.com/gobwas/glob"
"github.com/prometheus/common/model"
"gopkg.in/ini.v1"
)
type Scheme string
@ -420,7 +419,14 @@ type Cfg struct {
GeomapEnableCustomBaseLayers bool
// Unified Alerting
AdminConfigPollInterval time.Duration
AdminConfigPollInterval time.Duration
AlertmanagerConfigPollInterval time.Duration
HAListenAddr string
HAAdvertiseAddr string
HAPeers []string
HAPeerTimeout time.Duration
HAGossipInterval time.Duration
HAPushPullInterval time.Duration
}
// IsLiveConfigEnabled returns true if live should be able to save configs to SQL tables
@ -916,8 +922,7 @@ func (cfg *Cfg) Load(args CommandLineArgs) error {
if err := readAlertingSettings(iniFile); err != nil {
return err
}
if err := cfg.readUnifiedAlertingSettings(iniFile); err != nil {
if err := cfg.ReadUnifiedAlertingSettings(iniFile); err != nil {
return err
}
@ -1374,13 +1379,6 @@ func (cfg *Cfg) readRenderingSettings(iniFile *ini.File) error {
return nil
}
func (cfg *Cfg) readUnifiedAlertingSettings(iniFile *ini.File) error {
ua := iniFile.Section("unified_alerting")
s := ua.Key("admin_config_poll_interval_seconds").MustInt(60)
cfg.AdminConfigPollInterval = time.Second * time.Duration(s)
return nil
}
func readAlertingSettings(iniFile *ini.File) error {
alerting := iniFile.Section("alerting")
AlertingEnabled = alerting.Key("enabled").MustBool(true)

@ -0,0 +1,57 @@
package setting
import (
"strings"
"time"
"github.com/grafana/grafana/pkg/components/gtime"
"github.com/prometheus/alertmanager/cluster"
"gopkg.in/ini.v1"
)
const (
AlertmanagerDefaultClusterAddr = "0.0.0.0:9094"
AlertmanagerDefaultPeerTimeout = 15 * time.Second
AlertmanagerDefaultGossipInterval = cluster.DefaultGossipInterval
AlertmanagerDefaultPushPullInterval = cluster.DefaultPushPullInterval
SchedulerDefaultAdminConfigPollInterval = 60 * time.Second
AlertmanagerDefaultConfigPollInterval = 60 * time.Second
)
func (cfg *Cfg) ReadUnifiedAlertingSettings(iniFile *ini.File) error {
ua := iniFile.Section("unified_alerting")
var err error
cfg.AdminConfigPollInterval, err = gtime.ParseDuration(valueAsString(ua, "admin_config_poll_interval", (SchedulerDefaultAdminConfigPollInterval).String()))
if err != nil {
return err
}
cfg.AlertmanagerConfigPollInterval, err = gtime.ParseDuration(valueAsString(ua, "alertmanager_config_poll_interval", (AlertmanagerDefaultConfigPollInterval).String()))
if err != nil {
return err
}
cfg.HAPeerTimeout, err = gtime.ParseDuration(valueAsString(ua, "ha_peer_timeout", (AlertmanagerDefaultPeerTimeout).String()))
if err != nil {
return err
}
cfg.HAGossipInterval, err = gtime.ParseDuration(valueAsString(ua, "ha_gossip_interval", (AlertmanagerDefaultGossipInterval).String()))
if err != nil {
return err
}
cfg.HAPushPullInterval, err = gtime.ParseDuration(valueAsString(ua, "ha_push_pull_interval", (AlertmanagerDefaultPushPullInterval).String()))
if err != nil {
return err
}
cfg.HAListenAddr = ua.Key("ha_listen_address").MustString(AlertmanagerDefaultClusterAddr)
cfg.HAAdvertiseAddr = ua.Key("ha_advertise_address").MustString("")
peers := ua.Key("ha_peers").MustString("")
cfg.HAPeers = make([]string, 0)
if peers != "" {
for _, peer := range strings.Split(peers, ",") {
peer = strings.TrimSpace(peer)
cfg.HAPeers = append(cfg.HAPeers, peer)
}
}
return nil
}

@ -0,0 +1,39 @@
package setting
import (
"testing"
"time"
"github.com/stretchr/testify/require"
)
func TestCfg_ReadUnifiedAlertingSettings(t *testing.T) {
cfg := NewCfg()
err := cfg.Load(CommandLineArgs{HomePath: "../../", Config: "../../conf/defaults.ini"})
require.NoError(t, err)
// It sets the correct defaults.
{
require.Equal(t, 60*time.Second, cfg.AdminConfigPollInterval)
require.Equal(t, 60*time.Second, cfg.AlertmanagerConfigPollInterval)
require.Equal(t, 15*time.Second, cfg.HAPeerTimeout)
require.Equal(t, "0.0.0.0:9094", cfg.HAListenAddr)
require.Equal(t, "", cfg.HAAdvertiseAddr)
require.Len(t, cfg.HAPeers, 0)
require.Equal(t, 200*time.Millisecond, cfg.HAGossipInterval)
require.Equal(t, 60*time.Second, cfg.HAPushPullInterval)
}
// With peers set, it correctly parses them.
{
require.Len(t, cfg.HAPeers, 0)
s, err := cfg.Raw.NewSection("unified_alerting")
require.NoError(t, err)
_, err = s.NewKey("ha_peers", "hostname1:9090,hostname2:9090,hostname3:9090")
require.NoError(t, err)
require.NoError(t, cfg.ReadUnifiedAlertingSettings(cfg.Raw))
require.Len(t, cfg.HAPeers, 3)
require.ElementsMatch(t, []string{"hostname1:9090", "hostname2:9090", "hostname3:9090"}, cfg.HAPeers)
}
}

@ -21,9 +21,9 @@ import (
func TestAdminConfiguration_SendingToExternalAlertmanagers(t *testing.T) {
dir, path := testinfra.CreateGrafDir(t, testinfra.GrafanaOpts{
EnableFeatureToggles: []string{"ngalert"},
DisableAnonymous: true,
NGAlertAdminConfigIntervalSeconds: 2,
EnableFeatureToggles: []string{"ngalert"},
DisableAnonymous: true,
NGAlertAdminConfigPollInterval: 2 * time.Second,
})
grafanaListedAddr, s := testinfra.StartGrafana(t, dir, path)

@ -8,8 +8,6 @@ import (
"testing"
"time"
"github.com/grafana/grafana/pkg/services/ngalert/notifier"
"github.com/grafana/grafana/pkg/bus"
"github.com/grafana/grafana/pkg/models"
"github.com/grafana/grafana/pkg/services/ngalert/api/tooling/definitions"
@ -19,16 +17,10 @@ import (
)
func TestAlertmanagerConfigurationIsTransactional(t *testing.T) {
// TODO: We need a reliable way to ensure Alertmanagers have synced correctly.
// For now, make them sync quicker.
p := notifier.SyncOrgsPollInterval
notifier.SyncOrgsPollInterval = 2 * time.Second
t.Cleanup(func() {
notifier.SyncOrgsPollInterval = p
})
dir, path := testinfra.CreateGrafDir(t, testinfra.GrafanaOpts{
EnableFeatureToggles: []string{"ngalert"},
DisableAnonymous: true,
EnableFeatureToggles: []string{"ngalert"},
NGAlertAlertmanagerConfigPollInterval: 2 * time.Second,
DisableAnonymous: true,
})
grafanaListedAddr, store := testinfra.StartGrafana(t, dir, path)

@ -10,6 +10,7 @@ import (
"path/filepath"
"strings"
"testing"
"time"
"github.com/grafana/grafana/pkg/api"
"github.com/grafana/grafana/pkg/infra/fs"
@ -204,13 +205,18 @@ func CreateGrafDir(t *testing.T, opts ...GrafanaOpts) (string, string) {
_, err = featureSection.NewKey("enable", strings.Join(o.EnableFeatureToggles, " "))
require.NoError(t, err)
}
if o.NGAlertAdminConfigIntervalSeconds != 0 {
ngalertingSection, err := cfg.NewSection("ngalerting")
if o.NGAlertAdminConfigPollInterval != 0 {
ngalertingSection, err := cfg.NewSection("unified_alerting")
require.NoError(t, err)
_, err = ngalertingSection.NewKey("admin_config_poll_interval_seconds", fmt.Sprintf("%d", o.NGAlertAdminConfigIntervalSeconds))
_, err = ngalertingSection.NewKey("admin_config_poll_interval", o.NGAlertAdminConfigPollInterval.String())
require.NoError(t, err)
}
if o.NGAlertAlertmanagerConfigPollInterval != 0 {
ngalertingSection, err := cfg.NewSection("unified_alerting")
require.NoError(t, err)
_, err = ngalertingSection.NewKey("alertmanager_config_poll_interval", o.NGAlertAlertmanagerConfigPollInterval.String())
require.NoError(t, err)
}
if o.AnonymousUserRole != "" {
_, err = anonSect.NewKey("org_role", string(o.AnonymousUserRole))
require.NoError(t, err)
@ -252,13 +258,14 @@ func CreateGrafDir(t *testing.T, opts ...GrafanaOpts) (string, string) {
}
type GrafanaOpts struct {
EnableCSP bool
EnableFeatureToggles []string
NGAlertAdminConfigIntervalSeconds int
AnonymousUserRole models.RoleType
EnableQuota bool
DisableAnonymous bool
CatalogAppEnabled bool
ViewersCanEdit bool
PluginAdminEnabled bool
EnableCSP bool
EnableFeatureToggles []string
NGAlertAdminConfigPollInterval time.Duration
NGAlertAlertmanagerConfigPollInterval time.Duration
AnonymousUserRole models.RoleType
EnableQuota bool
DisableAnonymous bool
CatalogAppEnabled bool
ViewersCanEdit bool
PluginAdminEnabled bool
}

Loading…
Cancel
Save