Alerting: Add options to configure TLS for HA using Redis (#87567)

* Add Alerting HA Redis Client TLS configs

* Add test to ping miniredis with mTLS

* Update .ini files and docs

* Add tests for unified alerting ha redis TLS settings

* Fix malformed go.sum

* Add modowner

* Fix lint error

* Update docs and use dstls config
pull/83024/head^2
Fayzal Ghantiwala 1 year ago committed by GitHub
parent e39658097f
commit 7a2fbad0c8
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
  1. 26
      conf/defaults.ini
  2. 29
      conf/sample.ini
  3. 7
      docs/sources/alerting/set-up/configure-high-availability/_index.md
  4. 1
      go.mod
  5. 2
      go.sum
  6. 5
      go.work.sum
  7. 16
      pkg/services/ngalert/notifier/multiorg_alertmanager.go
  8. 21
      pkg/services/ngalert/notifier/redis_peer.go
  9. 89
      pkg/services/ngalert/notifier/redis_peer_test.go
  10. 11
      pkg/setting/setting_unified_alerting.go
  11. 47
      pkg/setting/setting_unified_alerting_test.go

@ -1190,6 +1190,32 @@ ha_redis_peer_name =
# The maximum number of simultaneous redis connections.
ha_redis_max_conns = 5
# Enable TLS on the client used to communicate with the redis server. This should be set to true
# if using any of the other ha_redis_tls_* fields.
ha_redis_tls_enabled = false
# Path to the PEM-encoded TLS client certificate file used to authenticate with the redis server.
ha_redis_tls_cert_path =
# Path to the PEM-encoded TLS private key file. Also requires the client certificate to be configured.
ha_redis_tls_key_path =
# Path to the PEM-encoded CA certificates file.
ha_redis_tls_ca_path =
# Overrides the expected name of the redis server certificate.
ha_redis_tls_server_name =
# Skips validating the redis server certificate.
ha_redis_tls_insecure_skip_verify =
# Overrides the default TLS cipher suite list.
ha_redis_tls_cipher_suites =
# Overrides the default minimum TLS version.
# Allowed values: VersionTLS10, VersionTLS11, VersionTLS12, VersionTLS13
ha_redis_tls_min_version =
# Listen address/hostname and port to receive unified alerting messages for other Grafana instances. The port is used for both TCP and UDP. It is assumed other Grafana instances are also running on the same port.
ha_listen_address = "0.0.0.0:9094"

@ -1101,6 +1101,35 @@
# provided, a random one will be generated.
;ha_redis_peer_name =
# The maximum number of simultaneous redis connections.
# ha_redis_max_conns = 5
# Enable TLS on the client used to communicate with the redis server. This should be set to true
# if using any of the other ha_redis_tls_* fields.
# ha_redis_tls_enabled = false
# Path to the PEM-encoded TLS client certificate file used to authenticate with the redis server.
# ha_redis_tls_cert_path =
# Path to the PEM-encoded TLS private key file. Also requires the client certificate to be configured.
# ha_redis_tls_key_path =
# Path to the PEM-encoded CA certificates file.
# ha_redis_tls_ca_path =
# Overrides the expected name of the redis server certificate.
# ha_redis_tls_server_name =
# Skips validating the redis server certificate.
# ha_redis_tls_insecure_skip_verify =
# Overrides the default TLS cipher suite list.
# ha_redis_tls_cipher_suites =
# Overrides the default minimum TLS version.
# Allowed values: VersionTLS10, VersionTLS11, VersionTLS12, VersionTLS13
# ha_redis_tls_min_version =
# Listen address/hostname and port to receive unified alerting messages for other Grafana instances. The port is used for both TCP and UDP. It is assumed other Grafana instances are also running on the same port. The default value is `0.0.0.0:9094`.
;ha_listen_address = "0.0.0.0:9094"

@ -61,11 +61,12 @@ Since gossiping of notifications and silences uses both TCP and UDP port `9094`,
As an alternative to Memberlist, you can use Redis for high availability. This is useful if you want to have a central
database for HA and cannot support the meshing of all Grafana servers.
1. Make sure you have a redis server that supports pub/sub. If you use a proxy in front of your Redis cluster, make sure the proxy supports pub/sub.
1. Make sure you have a Redis server that supports pub/sub. If you use a proxy in front of your Redis cluster, make sure the proxy supports pub/sub.
1. In your custom configuration file ($WORKING_DIR/conf/custom.ini), go to the [unified_alerting] section.
1. Set `ha_redis_address` to the Redis server address Grafana should connect to.
1. [Optional] Set the username and password if authentication is enabled on the redis server using `ha_redis_username` and `ha_redis_password`.
1. [Optional] Set `ha_redis_prefix` to something unique if you plan to share the redis server with multiple Grafana instances.
1. [Optional] Set the username and password if authentication is enabled on the Redis server using `ha_redis_username` and `ha_redis_password`.
1. [Optional] Set `ha_redis_prefix` to something unique if you plan to share the Redis server with multiple Grafana instances.
1. [Optional] Set `ha_redis_tls_enabled` to `true` and configure the corresponding `ha_redis_tls_*` fields to secure communications between Grafana and Redis with Transport Layer Security (TLS).
The following metrics can be used for meta monitoring, exposed by the `/metrics` endpoint in Grafana:

@ -127,6 +127,7 @@ require (
github.com/lib/pq v1.10.9 // @grafana/grafana-backend-group
github.com/linkedin/goavro/v2 v2.10.0 // @grafana/grafana-backend-group
github.com/m3db/prometheus_remote_client_golang v0.4.4 // @grafana/grafana-backend-group
github.com/madflojo/testcerts v1.1.1 // @grafana/alerting-squad-backend
github.com/magefile/mage v1.15.0 // @grafana/grafana-release-guild
github.com/matryer/is v1.4.0 // @grafana/grafana-as-code
github.com/mattn/go-isatty v0.0.20 // @grafana/grafana-backend-group

@ -2519,6 +2519,8 @@ github.com/lyft/protoc-gen-star/v2 v2.0.3/go.mod h1:amey7yeodaJhXSbf/TlLvWiqQfLO
github.com/lyft/protoc-gen-validate v0.0.13/go.mod h1:XbGvPuh87YZc5TdIa2/I4pLk0QoUACkjt2znoq26NVQ=
github.com/m3db/prometheus_remote_client_golang v0.4.4 h1:DsAIjVKoCp7Ym35tAOFL1OuMLIdIikAEHeNPHY+yyM8=
github.com/m3db/prometheus_remote_client_golang v0.4.4/go.mod h1:wHfVbA3eAK6dQvKjCkHhusWYegCk3bDGkA15zymSHdc=
github.com/madflojo/testcerts v1.1.1 h1:YsSHWV79nMNZK0mJtwXjKoYHjJEbLPFefR8TxmmWupY=
github.com/madflojo/testcerts v1.1.1/go.mod h1:MW8sh39gLnkKh4K0Nc55AyHEDl9l/FBLDUsQhpmkuo0=
github.com/magefile/mage v1.11.0/go.mod h1:z5UZb/iS3GoOSn0JgWuiw7dxlurVYTu+/jHXqQg881A=
github.com/magefile/mage v1.15.0 h1:BvGheCMAsG3bWUDbZ8AyXXpCNwU9u5CB6sM+HNb9HYg=
github.com/magefile/mage v1.15.0/go.mod h1:z5UZb/iS3GoOSn0JgWuiw7dxlurVYTu+/jHXqQg881A=

@ -670,6 +670,7 @@ github.com/hamba/avro/v2 v2.17.2/go.mod h1:Q9YK+qxAhtVrNqOhwlZTATLgLA8qxG2vtvkhK
github.com/hanwen/go-fuse v1.0.0 h1:GxS9Zrn6c35/BnfiVsZVWmsG803xwE7eVRDvcf/BEVc=
github.com/hanwen/go-fuse/v2 v2.1.0 h1:+32ffteETaLYClUj0a3aHjZ1hOPxxaNEHiZiujuDaek=
github.com/hashicorp/consul/sdk v0.15.0 h1:2qK9nDrr4tiJKRoxPGhm6B7xJjLVIQqkjiab2M4aKjU=
github.com/hamba/avro/v2 v2.17.2/go.mod h1:Q9YK+qxAhtVrNqOhwlZTATLgLA8qxG2vtvkhK8fJ7Jo=
github.com/hashicorp/go-hclog v0.16.1 h1:IVQwpTGNRRIHafnTs2dQLIk4ENtneRIEEJWOVDqz99o=
github.com/hashicorp/go-hclog v0.16.1/go.mod h1:whpDNt7SSdeAju8AWKIWsul05p54N/39EeqMAyrmvFQ=
github.com/hashicorp/go-syslog v1.0.0 h1:KaodqZuhUoZereWVIYmpUgZysurB1kBLX2j0MwMrUAE=
@ -944,6 +945,9 @@ github.com/tidwall/match v1.1.1/go.mod h1:eRSPERbgtNPcGhD8UCthc6PmLEQXEWd3PRB5JT
github.com/tidwall/pretty v1.2.0 h1:RWIZEg2iJ8/g6fDDYzMpobmaoGh5OLl4AXtGUGPcqCs=
github.com/tidwall/pretty v1.2.0/go.mod h1:ITEVvHYasfjBbM0u2Pg8T2nJnzm8xPwvNhhsoaGGjNU=
github.com/tidwall/sjson v1.2.5 h1:kLy8mja+1c9jlljvWTlSazM7cKDRfJuR/bOJhcY5NcY=
github.com/tidwall/gjson v1.14.2/go.mod h1:/wbyibRr2FHMks5tjHJ5F8dMZh3AcwJEMf5vlfC0lxk=
github.com/tidwall/match v1.1.1/go.mod h1:eRSPERbgtNPcGhD8UCthc6PmLEQXEWd3PRB5JTxsfmM=
github.com/tidwall/pretty v1.2.0/go.mod h1:ITEVvHYasfjBbM0u2Pg8T2nJnzm8xPwvNhhsoaGGjNU=
github.com/tidwall/sjson v1.2.5/go.mod h1:Fvgq9kS/6ociJEDnK0Fk1cpYF4FIW6ZF7LAe+6jwd28=
github.com/tinylib/msgp v1.1.8/go.mod h1:qkpG+2ldGg4xRFmx+jfTvZPxfGFhi64BcnL9vkCm/Tw=
github.com/tklauser/go-sysconf v0.3.11 h1:89WgdJhk5SNwJfu+GKyYveZ4IaJ7xAkecBo+KdJV0CM=
@ -1101,6 +1105,7 @@ k8s.io/component-base v0.0.0-20240417101527-62c04b35eff6/go.mod h1:l0ukbPS0lwFxO
k8s.io/gengo v0.0.0-20230829151522-9cce18d56c01 h1:pWEwq4Asjm4vjW7vcsmijwBhOr1/shsbSYiWXmNGlks=
k8s.io/gengo v0.0.0-20230829151522-9cce18d56c01/go.mod h1:FiNAH4ZV3gBg2Kwh89tzAEV2be7d5xI0vBa/VySYy3E=
k8s.io/gengo/v2 v2.0.0-20240228010128-51d4e06bde70 h1:NGrVE502P0s0/1hudf8zjgwki1X/TByhmAoILTarmzo=
k8s.io/gengo v0.0.0-20230829151522-9cce18d56c01/go.mod h1:FiNAH4ZV3gBg2Kwh89tzAEV2be7d5xI0vBa/VySYy3E=
k8s.io/gengo/v2 v2.0.0-20240228010128-51d4e06bde70/go.mod h1:VH3AT8AaQOqiGjMF9p0/IM1Dj+82ZwjfxUP1IxaHE+8=
k8s.io/klog v1.0.0 h1:Pt+yjF5aB1xDSVbau4VsWe+dQNzA0qv1LlXdC2dF6Q8=
k8s.io/kms v0.29.0/go.mod h1:mB0f9HLxRXeXUfHfn1A7rpwOlzXI1gIWu86z6buNoYA=

@ -169,13 +169,15 @@ func (moa *MultiOrgAlertmanager) setupClustering(cfg *setting.Cfg) error {
// Redis setup.
if cfg.UnifiedAlerting.HARedisAddr != "" {
redisPeer, err := newRedisPeer(redisConfig{
addr: cfg.UnifiedAlerting.HARedisAddr,
name: cfg.UnifiedAlerting.HARedisPeerName,
prefix: cfg.UnifiedAlerting.HARedisPrefix,
password: cfg.UnifiedAlerting.HARedisPassword,
username: cfg.UnifiedAlerting.HARedisUsername,
db: cfg.UnifiedAlerting.HARedisDB,
maxConns: cfg.UnifiedAlerting.HARedisMaxConns,
addr: cfg.UnifiedAlerting.HARedisAddr,
name: cfg.UnifiedAlerting.HARedisPeerName,
prefix: cfg.UnifiedAlerting.HARedisPrefix,
password: cfg.UnifiedAlerting.HARedisPassword,
username: cfg.UnifiedAlerting.HARedisUsername,
db: cfg.UnifiedAlerting.HARedisDB,
maxConns: cfg.UnifiedAlerting.HARedisMaxConns,
tlsEnabled: cfg.UnifiedAlerting.HARedisTLSEnabled,
tls: cfg.UnifiedAlerting.HARedisTLSConfig,
}, clusterLogger, moa.metrics.Registerer, cfg.UnifiedAlerting.HAPushPullInterval)
if err != nil {
return fmt.Errorf("unable to initialize redis: %w", err)

@ -12,6 +12,7 @@ import (
"github.com/google/uuid"
alertingCluster "github.com/grafana/alerting/cluster"
alertingClusterPB "github.com/grafana/alerting/cluster/clusterpb"
dstls "github.com/grafana/dskit/crypto/tls"
"github.com/prometheus/client_golang/prometheus"
"github.com/redis/go-redis/v9"
@ -27,6 +28,9 @@ type redisConfig struct {
name string
prefix string
maxConns int
tlsEnabled bool
tls dstls.ClientConfig
}
const (
@ -90,13 +94,26 @@ func newRedisPeer(cfg redisConfig, logger log.Logger, reg prometheus.Registerer,
if cfg.maxConns >= 0 {
poolSize = cfg.maxConns
}
rdb := redis.NewClient(&redis.Options{
opts := &redis.Options{
Addr: cfg.addr,
Username: cfg.username,
Password: cfg.password,
DB: cfg.db,
PoolSize: poolSize,
})
}
if cfg.tlsEnabled {
tlsClientConfig, err := cfg.tls.GetTLSConfig()
if err != nil {
logger.Error("Failed to get TLS config", "err", err)
return nil, err
} else {
opts.TLSConfig = tlsClientConfig
}
}
rdb := redis.NewClient(opts)
cmd := rdb.Ping(context.Background())
if cmd.Err() != nil {
logger.Error("Failed to ping redis - redis-based alertmanager clustering may not be available", "err", cmd.Err())

@ -0,0 +1,89 @@
package notifier
import (
"context"
"crypto/tls"
"crypto/x509"
"os"
"testing"
"time"
"github.com/alicebob/miniredis/v2"
dstls "github.com/grafana/dskit/crypto/tls"
"github.com/grafana/grafana/pkg/infra/log"
"github.com/madflojo/testcerts"
"github.com/prometheus/client_golang/prometheus"
"github.com/stretchr/testify/require"
)
func TestNewRedisPeerWithTLS(t *testing.T) {
// Write client and server certificates/keys to tempDir, both issues by the same CA
certPaths := createX509TestDir(t)
// Set up tls.Config and start miniredis with server-side TLS
x509Cert, err := tls.LoadX509KeyPair(certPaths.serverCert, certPaths.serverKey)
require.NoError(t, err)
clientCAPool := x509.NewCertPool()
clientCAFile, err := os.ReadFile(certPaths.ca)
require.NoError(t, err)
clientCAPool.AppendCertsFromPEM(clientCAFile)
mr, err := miniredis.RunTLS(&tls.Config{
Certificates: []tls.Certificate{x509Cert},
ClientCAs: clientCAPool,
})
require.NoError(t, err)
defer mr.Close()
// Create redis peer with client-side TLS
redisPeer, err := newRedisPeer(redisConfig{
addr: mr.Addr(),
tlsEnabled: true,
tls: dstls.ClientConfig{
CertPath: certPaths.clientCert,
KeyPath: certPaths.clientKey,
CAPath: certPaths.ca,
ServerName: "localhost",
}}, log.NewNopLogger(), prometheus.DefaultRegisterer, time.Second*60)
require.NoError(t, err)
ping := redisPeer.redis.Ping(context.Background())
require.NoError(t, ping.Err())
}
type certPaths struct {
clientCert string
clientKey string
serverCert string
serverKey string
ca string
}
func createX509TestDir(t *testing.T) certPaths {
t.Helper()
tmpDir := t.TempDir()
ca := testcerts.NewCA()
caCertFile, _, err := ca.ToTempFile(tmpDir)
require.NoError(t, err)
serverKp, err := ca.NewKeyPair("localhost")
require.NoError(t, err)
serverCertFile, serverKeyFile, err := serverKp.ToTempFile(tmpDir)
require.NoError(t, err)
clientKp, err := ca.NewKeyPair()
require.NoError(t, err)
clientCertFile, clientKeyFile, err := clientKp.ToTempFile(tmpDir)
require.NoError(t, err)
return certPaths{
clientCert: clientCertFile.Name(),
clientKey: clientKeyFile.Name(),
serverCert: serverCertFile.Name(),
serverKey: serverKeyFile.Name(),
ca: caCertFile.Name(),
}
}

@ -7,6 +7,7 @@ import (
"time"
alertingCluster "github.com/grafana/alerting/cluster"
dstls "github.com/grafana/dskit/crypto/tls"
"github.com/grafana/grafana-plugin-sdk-go/backend/gtime"
"gopkg.in/ini.v1"
@ -79,6 +80,8 @@ type UnifiedAlertingSettings struct {
HARedisPassword string
HARedisDB int
HARedisMaxConns int
HARedisTLSEnabled bool
HARedisTLSConfig dstls.ClientConfig
MaxAttempts int64
MinInterval time.Duration
EvaluationTimeout time.Duration
@ -234,6 +237,14 @@ func (cfg *Cfg) ReadUnifiedAlertingSettings(iniFile *ini.File) error {
uaCfg.HAPeers = append(uaCfg.HAPeers, peer)
}
}
uaCfg.HARedisTLSEnabled = ua.Key("ha_redis_tls_enabled").MustBool(false)
uaCfg.HARedisTLSConfig.CertPath = ua.Key("ha_redis_tls_cert_path").MustString("")
uaCfg.HARedisTLSConfig.KeyPath = ua.Key("ha_redis_tls_key_path").MustString("")
uaCfg.HARedisTLSConfig.CAPath = ua.Key("ha_redis_tls_ca_path").MustString("")
uaCfg.HARedisTLSConfig.ServerName = ua.Key("ha_redis_tls_server_name").MustString("")
uaCfg.HARedisTLSConfig.InsecureSkipVerify = ua.Key("ha_redis_tls_insecure_skip_verify").MustBool(false)
uaCfg.HARedisTLSConfig.CipherSuites = ua.Key("ha_redis_tls_cipher_suites").MustString("")
uaCfg.HARedisTLSConfig.MinVersion = ua.Key("ha_redis_tls_min_version").MustString("")
// TODO load from ini file
uaCfg.DefaultConfiguration = alertmanagerDefaultConfiguration

@ -298,3 +298,50 @@ func TestMinInterval(t *testing.T) {
})
}
}
func TestHARedisTLSSettings(t *testing.T) {
// Initialize .ini file with new HA Redis TLS Settings
f := ini.Empty()
section, err := f.NewSection("unified_alerting")
require.NoError(t, err)
const (
tlsEnabled = true
certPath = "path/to/cert"
keyPath = "path/to/key"
caPath = "path/to/ca"
serverName = "server_name"
insecureSkipVerify = true
cipherSuites = "TLS_AES_128_GCM_SHA256"
minVersion = "VersionTLS13"
)
_, err = section.NewKey("ha_redis_tls_enabled", strconv.FormatBool(tlsEnabled))
require.NoError(t, err)
_, err = section.NewKey("ha_redis_tls_cert_path", certPath)
require.NoError(t, err)
_, err = section.NewKey("ha_redis_tls_key_path", keyPath)
require.NoError(t, err)
_, err = section.NewKey("ha_redis_tls_ca_path", caPath)
require.NoError(t, err)
_, err = section.NewKey("ha_redis_tls_server_name", serverName)
require.NoError(t, err)
_, err = section.NewKey("ha_redis_tls_insecure_skip_verify", strconv.FormatBool(insecureSkipVerify))
require.NoError(t, err)
_, err = section.NewKey("ha_redis_tls_cipher_suites", cipherSuites)
require.NoError(t, err)
_, err = section.NewKey("ha_redis_tls_min_version", minVersion)
require.NoError(t, err)
cfg := NewCfg()
err = cfg.ReadUnifiedAlertingSettings(f)
require.Nil(t, err)
require.Equal(t, tlsEnabled, cfg.UnifiedAlerting.HARedisTLSEnabled)
require.Equal(t, certPath, cfg.UnifiedAlerting.HARedisTLSConfig.CertPath)
require.Equal(t, keyPath, cfg.UnifiedAlerting.HARedisTLSConfig.KeyPath)
require.Equal(t, caPath, cfg.UnifiedAlerting.HARedisTLSConfig.CAPath)
require.Equal(t, serverName, cfg.UnifiedAlerting.HARedisTLSConfig.ServerName)
require.Equal(t, insecureSkipVerify, cfg.UnifiedAlerting.HARedisTLSConfig.InsecureSkipVerify)
require.Equal(t, cipherSuites, cfg.UnifiedAlerting.HARedisTLSConfig.CipherSuites)
require.Equal(t, minVersion, cfg.UnifiedAlerting.HARedisTLSConfig.MinVersion)
}

Loading…
Cancel
Save