Alerting: Use the forked Alertmanager for remote secondary mode (#79646)

* (WIP) Alerting: Use the forked Alertmanager for remote secondary mode

* fall back to using internal AM in case of error

* remove TODOs, clean up .ini file, add orgId as part of remote AM config struct

* log warnings and errors, fall back to remoteSecondary, fall back to internal AM only

* extract logic to decide remote Alertmanager mode to a separate function, switch on mode

* tests

* make linter happy

* remove func to decide remote Alertmanager mode

* refactor factory function and options

* add default case to switch statement

* remove ineffectual assignment
pull/79828/head
Santiago 2 years ago committed by GitHub
parent 12e473892f
commit a77ba40ed4
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
  1. 2
      conf/defaults.ini
  2. 78
      pkg/services/ngalert/ngalert.go
  3. 2
      pkg/services/ngalert/notifier/alertmanager.go
  4. 2
      pkg/services/ngalert/notifier/alertmanager_test.go
  5. 10
      pkg/services/ngalert/notifier/multiorg_alertmanager.go
  6. 2
      pkg/services/ngalert/remote/alertmanager_test.go
  7. 14
      pkg/setting/setting_unified_alerting.go
  8. 8
      public/app/features/dashboard-scene/panel-edit/testfiles/testDashboard.json

@ -1276,6 +1276,8 @@ tenant =
# If not present, the tenant ID will be set in the X-Scope-OrgID header.
password =
sync_interval = 5m
#################################### Alerting ############################
[alerting]
# Enable the legacy alerting sub-system and interface. If Unified Alerting is already enabled and you try to go back to legacy alerting, all data that is part of Unified Alerting will be deleted. When this configuration section and flag are not defined, the state is defined at runtime. See the documentation for more details.

@ -162,32 +162,70 @@ type AlertNG struct {
}
func (ng *AlertNG) init() error {
var err error
// AlertNG should be initialized before the cancellation deadline of initCtx
initCtx, cancelFunc := context.WithTimeout(context.Background(), 30*time.Second)
defer cancelFunc()
ng.store.Logger = ng.Log
decryptFn := ng.SecretsService.GetDecryptedValue
multiOrgMetrics := ng.Metrics.GetMultiOrgAlertmanagerMetrics()
// If enabled, configure the remote Alertmanager.
// - If several toggles are enabled, the order of precedence is RemoteOnly, RemotePrimary, RemoteSecondary
// - If no toggles are enabled, we default to using only the internal Alertmanager
// We currently support only remote secondary mode, so in case other toggles are enabled we fall back to remote secondary.
var overrides []notifier.Option
moaLogger := log.New("ngalert.multiorg.alertmanager")
remoteOnly := ng.FeatureToggles.IsEnabled(initCtx, featuremgmt.FlagAlertmanagerRemoteOnly)
remotePrimary := ng.FeatureToggles.IsEnabled(initCtx, featuremgmt.FlagAlertmanagerRemotePrimary)
remoteSecondary := ng.FeatureToggles.IsEnabled(initCtx, featuremgmt.FlagAlertmanagerRemoteSecondary)
if ng.Cfg.UnifiedAlerting.RemoteAlertmanager.Enable {
override := notifier.WithAlertmanagerOverride(func(ctx context.Context, orgID int64) (notifier.Alertmanager, error) {
externalAMCfg := remote.AlertmanagerConfig{}
// We won't be handling files on disk, we can pass an empty string as workingDirPath.
stateStore := notifier.NewFileStore(orgID, ng.KVStore, "")
return remote.NewAlertmanager(externalAMCfg, stateStore)
})
overrides = append(overrides, override)
switch {
case remoteOnly, remotePrimary:
ng.Log.Warn("Only remote secondary mode is supported at the moment, falling back to remote secondary")
fallthrough
case remoteSecondary:
ng.Log.Debug("Starting Grafana with remote secondary mode enabled")
// This function will be used by the MOA to create new Alertmanagers.
override := notifier.WithAlertmanagerOverride(func(factoryFn notifier.OrgAlertmanagerFactory) notifier.OrgAlertmanagerFactory {
return func(ctx context.Context, orgID int64) (notifier.Alertmanager, error) {
// Create internal Alertmanager.
internalAM, err := factoryFn(ctx, orgID)
if err != nil {
return nil, err
}
// Create remote Alertmanager.
remoteAM, err := createRemoteAlertmanager(orgID, ng.Cfg.UnifiedAlerting.RemoteAlertmanager, ng.KVStore)
if err != nil {
moaLogger.Error("Failed to create remote Alertmanager, falling back to using only the internal one", "err", err)
return internalAM, nil
}
// Use both Alertmanager implementations in the forked Alertmanager.
cfg := remote.RemoteSecondaryConfig{
Logger: log.New("ngalert.forked-alertmanager.remote-secondary"),
OrgID: orgID,
Store: ng.store,
SyncInterval: ng.Cfg.UnifiedAlerting.RemoteAlertmanager.SyncInterval,
}
return remote.NewRemoteSecondaryForkedAlertmanager(cfg, internalAM, remoteAM)
}
})
overrides = append(overrides, override)
default:
ng.Log.Error("A mode should be selected when enabling the remote Alertmanager, falling back to using only the internal Alertmanager")
}
}
ng.MultiOrgAlertmanager, err = notifier.NewMultiOrgAlertmanager(ng.Cfg, ng.store, ng.store, ng.KVStore, ng.store, decryptFn, multiOrgMetrics, ng.NotificationService, log.New("ngalert.multiorg.alertmanager"), ng.SecretsService, overrides...)
decryptFn := ng.SecretsService.GetDecryptedValue
multiOrgMetrics := ng.Metrics.GetMultiOrgAlertmanagerMetrics()
moa, err := notifier.NewMultiOrgAlertmanager(ng.Cfg, ng.store, ng.store, ng.KVStore, ng.store, decryptFn, multiOrgMetrics, ng.NotificationService, moaLogger, ng.SecretsService, overrides...)
if err != nil {
return err
}
ng.MultiOrgAlertmanager = moa
imageService, err := image.NewScreenshotImageServiceFromCfg(ng.Cfg, ng.store, ng.dashboardService, ng.renderService, ng.Metrics.Registerer)
if err != nil {
@ -490,3 +528,15 @@ func ApplyStateHistoryFeatureToggles(cfg *setting.UnifiedAlertingStateHistorySet
return
}
}
func createRemoteAlertmanager(orgID int64, amCfg setting.RemoteAlertmanagerSettings, kvstore kvstore.KVStore) (*remote.Alertmanager, error) {
externalAMCfg := remote.AlertmanagerConfig{
OrgID: orgID,
URL: amCfg.URL,
TenantID: amCfg.TenantID,
BasicAuthPassword: amCfg.Password,
}
// We won't be handling files on disk, we can pass an empty string as workingDirPath.
stateStore := notifier.NewFileStore(orgID, kvstore, "")
return remote.NewAlertmanager(externalAMCfg, stateStore)
}

@ -83,7 +83,7 @@ func (m maintenanceOptions) MaintenanceFunc(state alertingNotify.State) (int64,
return m.maintenanceFunc(state)
}
func newAlertmanager(ctx context.Context, orgID int64, cfg *setting.Cfg, store AlertingStore, kvStore kvstore.KVStore,
func NewAlertmanager(ctx context.Context, orgID int64, cfg *setting.Cfg, store AlertingStore, kvStore kvstore.KVStore,
peer alertingNotify.ClusterPeer, decryptFn alertingNotify.GetDecryptedValueFn, ns notifications.Service,
m *metrics.Alertmanager) (*alertmanager, error) {
workingPath := filepath.Join(cfg.DataPath, workingDir, strconv.Itoa(int(orgID)))

@ -41,7 +41,7 @@ func setupAMTest(t *testing.T) *alertmanager {
kvStore := fakes.NewFakeKVStore(t)
secretsService := secretsManager.SetupTestService(t, database.ProvideSecretsStore(sqlStore))
decryptFn := secretsService.GetDecryptedValue
am, err := newAlertmanager(context.Background(), 1, cfg, s, kvStore, &NilPeer{}, decryptFn, nil, m)
am, err := NewAlertmanager(context.Background(), 1, cfg, s, kvStore, &NilPeer{}, decryptFn, nil, m)
require.NoError(t, err)
return am
}

@ -77,7 +77,7 @@ type MultiOrgAlertmanager struct {
configStore AlertingStore
orgStore store.OrgStore
kvStore kvstore.KVStore
factory orgAlertmanagerFactory
factory OrgAlertmanagerFactory
decryptFn alertingNotify.GetDecryptedValueFn
@ -85,13 +85,13 @@ type MultiOrgAlertmanager struct {
ns notifications.Service
}
type orgAlertmanagerFactory func(ctx context.Context, orgID int64) (Alertmanager, error)
type OrgAlertmanagerFactory func(ctx context.Context, orgID int64) (Alertmanager, error)
type Option func(*MultiOrgAlertmanager)
func WithAlertmanagerOverride(f orgAlertmanagerFactory) Option {
func WithAlertmanagerOverride(f func(OrgAlertmanagerFactory) OrgAlertmanagerFactory) Option {
return func(moa *MultiOrgAlertmanager) {
moa.factory = f
moa.factory = f(moa.factory)
}
}
@ -122,7 +122,7 @@ func NewMultiOrgAlertmanager(cfg *setting.Cfg, configStore AlertingStore, orgSto
// Set up the default per tenant Alertmanager factory.
moa.factory = func(ctx context.Context, orgID int64) (Alertmanager, error) {
m := metrics.NewAlertmanagerMetrics(moa.metrics.GetOrCreateOrgRegistry(orgID))
return newAlertmanager(ctx, orgID, moa.settings, moa.configStore, moa.kvStore, moa.peer, moa.decryptFn, moa.ns, m)
return NewAlertmanager(ctx, orgID, moa.settings, moa.configStore, moa.kvStore, moa.peer, moa.decryptFn, moa.ns, m)
}
for _, opt := range opts {

@ -159,7 +159,7 @@ func TestIntegrationRemoteAlertmanagerApplyConfigOnlyUploadsOnce(t *testing.T) {
silences := []byte("test-silences")
nflog := []byte("test-notifications")
store := fakes.NewFakeKVStore(t)
fstore := notifier.NewFileStore(1, store, "")
fstore := notifier.NewFileStore(cfg.OrgID, store, "")
ctx := context.Background()
require.NoError(t, store.Set(ctx, cfg.OrgID, "alertmanager", notifier.SilencesFilename, base64.StdEncoding.EncodeToString(silences)))

@ -104,10 +104,11 @@ type UnifiedAlertingSettings struct {
// RemoteAlertmanagerSettings contains the configuration needed
// to disable the internal Alertmanager and use an external one instead.
type RemoteAlertmanagerSettings struct {
Enable bool
URL string
TenantID string
Password string
Enable bool
URL string
TenantID string
Password string
SyncInterval time.Duration
}
type UnifiedAlertingScreenshotSettings struct {
@ -352,6 +353,11 @@ func (cfg *Cfg) ReadUnifiedAlertingSettings(iniFile *ini.File) error {
TenantID: remoteAlertmanager.Key("tenant").MustString(""),
Password: remoteAlertmanager.Key("password").MustString(""),
}
uaCfgRemoteAM.SyncInterval, err = gtime.ParseDuration(valueAsString(remoteAlertmanager, "sync_interval", (schedulerDefaultAdminConfigPollInterval).String()))
if err != nil {
return err
}
uaCfg.RemoteAlertmanager = uaCfgRemoteAM
screenshots := iniFile.Section("unified_alerting.screenshots")

@ -160,9 +160,7 @@
"footer": {
"countRows": false,
"fields": "",
"reducer": [
"sum"
],
"reducer": ["sum"],
"show": false
},
"showHeader": true
@ -325,9 +323,7 @@
"footer": {
"countRows": false,
"fields": "",
"reducer": [
"sum"
],
"reducer": ["sum"],
"show": false
},
"showHeader": true

Loading…
Cancel
Save