Alerting: Use the forked Alertmanager for remote secondary mode (#79646)

* (WIP) Alerting: Use the forked Alertmanager for remote secondary mode

* fall back to using internal AM in case of error

* remove TODOs, clean up .ini file, add orgId as part of remote AM config struct

* log warnings and errors, fall back to remoteSecondary, fall back to internal AM only

* extract logic to decide remote Alertmanager mode to a separate function, switch on mode

* tests

* make linter happy

* remove func to decide remote Alertmanager mode

* refactor factory function and options

* add default case to switch statement

* remove ineffectual assignment
pull/79828/head
Santiago 2 years ago committed by GitHub
parent 12e473892f
commit a77ba40ed4
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
  1. 2
      conf/defaults.ini
  2. 78
      pkg/services/ngalert/ngalert.go
  3. 2
      pkg/services/ngalert/notifier/alertmanager.go
  4. 2
      pkg/services/ngalert/notifier/alertmanager_test.go
  5. 10
      pkg/services/ngalert/notifier/multiorg_alertmanager.go
  6. 2
      pkg/services/ngalert/remote/alertmanager_test.go
  7. 14
      pkg/setting/setting_unified_alerting.go
  8. 8
      public/app/features/dashboard-scene/panel-edit/testfiles/testDashboard.json

@ -1276,6 +1276,8 @@ tenant =
# If not present, the tenant ID will be set in the X-Scope-OrgID header. # If not present, the tenant ID will be set in the X-Scope-OrgID header.
password = password =
sync_interval = 5m
#################################### Alerting ############################ #################################### Alerting ############################
[alerting] [alerting]
# Enable the legacy alerting sub-system and interface. If Unified Alerting is already enabled and you try to go back to legacy alerting, all data that is part of Unified Alerting will be deleted. When this configuration section and flag are not defined, the state is defined at runtime. See the documentation for more details. # Enable the legacy alerting sub-system and interface. If Unified Alerting is already enabled and you try to go back to legacy alerting, all data that is part of Unified Alerting will be deleted. When this configuration section and flag are not defined, the state is defined at runtime. See the documentation for more details.

@ -162,32 +162,70 @@ type AlertNG struct {
} }
func (ng *AlertNG) init() error { func (ng *AlertNG) init() error {
var err error
// AlertNG should be initialized before the cancellation deadline of initCtx // AlertNG should be initialized before the cancellation deadline of initCtx
initCtx, cancelFunc := context.WithTimeout(context.Background(), 30*time.Second) initCtx, cancelFunc := context.WithTimeout(context.Background(), 30*time.Second)
defer cancelFunc() defer cancelFunc()
ng.store.Logger = ng.Log ng.store.Logger = ng.Log
decryptFn := ng.SecretsService.GetDecryptedValue // If enabled, configure the remote Alertmanager.
multiOrgMetrics := ng.Metrics.GetMultiOrgAlertmanagerMetrics() // - If several toggles are enabled, the order of precedence is RemoteOnly, RemotePrimary, RemoteSecondary
// - If no toggles are enabled, we default to using only the internal Alertmanager
// We currently support only remote secondary mode, so in case other toggles are enabled we fall back to remote secondary.
var overrides []notifier.Option var overrides []notifier.Option
moaLogger := log.New("ngalert.multiorg.alertmanager")
remoteOnly := ng.FeatureToggles.IsEnabled(initCtx, featuremgmt.FlagAlertmanagerRemoteOnly)
remotePrimary := ng.FeatureToggles.IsEnabled(initCtx, featuremgmt.FlagAlertmanagerRemotePrimary)
remoteSecondary := ng.FeatureToggles.IsEnabled(initCtx, featuremgmt.FlagAlertmanagerRemoteSecondary)
if ng.Cfg.UnifiedAlerting.RemoteAlertmanager.Enable { if ng.Cfg.UnifiedAlerting.RemoteAlertmanager.Enable {
override := notifier.WithAlertmanagerOverride(func(ctx context.Context, orgID int64) (notifier.Alertmanager, error) { switch {
externalAMCfg := remote.AlertmanagerConfig{} case remoteOnly, remotePrimary:
// We won't be handling files on disk, we can pass an empty string as workingDirPath. ng.Log.Warn("Only remote secondary mode is supported at the moment, falling back to remote secondary")
stateStore := notifier.NewFileStore(orgID, ng.KVStore, "") fallthrough
return remote.NewAlertmanager(externalAMCfg, stateStore)
}) case remoteSecondary:
ng.Log.Debug("Starting Grafana with remote secondary mode enabled")
overrides = append(overrides, override) // This function will be used by the MOA to create new Alertmanagers.
override := notifier.WithAlertmanagerOverride(func(factoryFn notifier.OrgAlertmanagerFactory) notifier.OrgAlertmanagerFactory {
return func(ctx context.Context, orgID int64) (notifier.Alertmanager, error) {
// Create internal Alertmanager.
internalAM, err := factoryFn(ctx, orgID)
if err != nil {
return nil, err
}
// Create remote Alertmanager.
remoteAM, err := createRemoteAlertmanager(orgID, ng.Cfg.UnifiedAlerting.RemoteAlertmanager, ng.KVStore)
if err != nil {
moaLogger.Error("Failed to create remote Alertmanager, falling back to using only the internal one", "err", err)
return internalAM, nil
}
// Use both Alertmanager implementations in the forked Alertmanager.
cfg := remote.RemoteSecondaryConfig{
Logger: log.New("ngalert.forked-alertmanager.remote-secondary"),
OrgID: orgID,
Store: ng.store,
SyncInterval: ng.Cfg.UnifiedAlerting.RemoteAlertmanager.SyncInterval,
}
return remote.NewRemoteSecondaryForkedAlertmanager(cfg, internalAM, remoteAM)
}
})
overrides = append(overrides, override)
default:
ng.Log.Error("A mode should be selected when enabling the remote Alertmanager, falling back to using only the internal Alertmanager")
}
} }
ng.MultiOrgAlertmanager, err = notifier.NewMultiOrgAlertmanager(ng.Cfg, ng.store, ng.store, ng.KVStore, ng.store, decryptFn, multiOrgMetrics, ng.NotificationService, log.New("ngalert.multiorg.alertmanager"), ng.SecretsService, overrides...)
decryptFn := ng.SecretsService.GetDecryptedValue
multiOrgMetrics := ng.Metrics.GetMultiOrgAlertmanagerMetrics()
moa, err := notifier.NewMultiOrgAlertmanager(ng.Cfg, ng.store, ng.store, ng.KVStore, ng.store, decryptFn, multiOrgMetrics, ng.NotificationService, moaLogger, ng.SecretsService, overrides...)
if err != nil { if err != nil {
return err return err
} }
ng.MultiOrgAlertmanager = moa
imageService, err := image.NewScreenshotImageServiceFromCfg(ng.Cfg, ng.store, ng.dashboardService, ng.renderService, ng.Metrics.Registerer) imageService, err := image.NewScreenshotImageServiceFromCfg(ng.Cfg, ng.store, ng.dashboardService, ng.renderService, ng.Metrics.Registerer)
if err != nil { if err != nil {
@ -490,3 +528,15 @@ func ApplyStateHistoryFeatureToggles(cfg *setting.UnifiedAlertingStateHistorySet
return return
} }
} }
func createRemoteAlertmanager(orgID int64, amCfg setting.RemoteAlertmanagerSettings, kvstore kvstore.KVStore) (*remote.Alertmanager, error) {
externalAMCfg := remote.AlertmanagerConfig{
OrgID: orgID,
URL: amCfg.URL,
TenantID: amCfg.TenantID,
BasicAuthPassword: amCfg.Password,
}
// We won't be handling files on disk, we can pass an empty string as workingDirPath.
stateStore := notifier.NewFileStore(orgID, kvstore, "")
return remote.NewAlertmanager(externalAMCfg, stateStore)
}

@ -83,7 +83,7 @@ func (m maintenanceOptions) MaintenanceFunc(state alertingNotify.State) (int64,
return m.maintenanceFunc(state) return m.maintenanceFunc(state)
} }
func newAlertmanager(ctx context.Context, orgID int64, cfg *setting.Cfg, store AlertingStore, kvStore kvstore.KVStore, func NewAlertmanager(ctx context.Context, orgID int64, cfg *setting.Cfg, store AlertingStore, kvStore kvstore.KVStore,
peer alertingNotify.ClusterPeer, decryptFn alertingNotify.GetDecryptedValueFn, ns notifications.Service, peer alertingNotify.ClusterPeer, decryptFn alertingNotify.GetDecryptedValueFn, ns notifications.Service,
m *metrics.Alertmanager) (*alertmanager, error) { m *metrics.Alertmanager) (*alertmanager, error) {
workingPath := filepath.Join(cfg.DataPath, workingDir, strconv.Itoa(int(orgID))) workingPath := filepath.Join(cfg.DataPath, workingDir, strconv.Itoa(int(orgID)))

@ -41,7 +41,7 @@ func setupAMTest(t *testing.T) *alertmanager {
kvStore := fakes.NewFakeKVStore(t) kvStore := fakes.NewFakeKVStore(t)
secretsService := secretsManager.SetupTestService(t, database.ProvideSecretsStore(sqlStore)) secretsService := secretsManager.SetupTestService(t, database.ProvideSecretsStore(sqlStore))
decryptFn := secretsService.GetDecryptedValue decryptFn := secretsService.GetDecryptedValue
am, err := newAlertmanager(context.Background(), 1, cfg, s, kvStore, &NilPeer{}, decryptFn, nil, m) am, err := NewAlertmanager(context.Background(), 1, cfg, s, kvStore, &NilPeer{}, decryptFn, nil, m)
require.NoError(t, err) require.NoError(t, err)
return am return am
} }

@ -77,7 +77,7 @@ type MultiOrgAlertmanager struct {
configStore AlertingStore configStore AlertingStore
orgStore store.OrgStore orgStore store.OrgStore
kvStore kvstore.KVStore kvStore kvstore.KVStore
factory orgAlertmanagerFactory factory OrgAlertmanagerFactory
decryptFn alertingNotify.GetDecryptedValueFn decryptFn alertingNotify.GetDecryptedValueFn
@ -85,13 +85,13 @@ type MultiOrgAlertmanager struct {
ns notifications.Service ns notifications.Service
} }
type orgAlertmanagerFactory func(ctx context.Context, orgID int64) (Alertmanager, error) type OrgAlertmanagerFactory func(ctx context.Context, orgID int64) (Alertmanager, error)
type Option func(*MultiOrgAlertmanager) type Option func(*MultiOrgAlertmanager)
func WithAlertmanagerOverride(f orgAlertmanagerFactory) Option { func WithAlertmanagerOverride(f func(OrgAlertmanagerFactory) OrgAlertmanagerFactory) Option {
return func(moa *MultiOrgAlertmanager) { return func(moa *MultiOrgAlertmanager) {
moa.factory = f moa.factory = f(moa.factory)
} }
} }
@ -122,7 +122,7 @@ func NewMultiOrgAlertmanager(cfg *setting.Cfg, configStore AlertingStore, orgSto
// Set up the default per tenant Alertmanager factory. // Set up the default per tenant Alertmanager factory.
moa.factory = func(ctx context.Context, orgID int64) (Alertmanager, error) { moa.factory = func(ctx context.Context, orgID int64) (Alertmanager, error) {
m := metrics.NewAlertmanagerMetrics(moa.metrics.GetOrCreateOrgRegistry(orgID)) m := metrics.NewAlertmanagerMetrics(moa.metrics.GetOrCreateOrgRegistry(orgID))
return newAlertmanager(ctx, orgID, moa.settings, moa.configStore, moa.kvStore, moa.peer, moa.decryptFn, moa.ns, m) return NewAlertmanager(ctx, orgID, moa.settings, moa.configStore, moa.kvStore, moa.peer, moa.decryptFn, moa.ns, m)
} }
for _, opt := range opts { for _, opt := range opts {

@ -159,7 +159,7 @@ func TestIntegrationRemoteAlertmanagerApplyConfigOnlyUploadsOnce(t *testing.T) {
silences := []byte("test-silences") silences := []byte("test-silences")
nflog := []byte("test-notifications") nflog := []byte("test-notifications")
store := fakes.NewFakeKVStore(t) store := fakes.NewFakeKVStore(t)
fstore := notifier.NewFileStore(1, store, "") fstore := notifier.NewFileStore(cfg.OrgID, store, "")
ctx := context.Background() ctx := context.Background()
require.NoError(t, store.Set(ctx, cfg.OrgID, "alertmanager", notifier.SilencesFilename, base64.StdEncoding.EncodeToString(silences))) require.NoError(t, store.Set(ctx, cfg.OrgID, "alertmanager", notifier.SilencesFilename, base64.StdEncoding.EncodeToString(silences)))

@ -104,10 +104,11 @@ type UnifiedAlertingSettings struct {
// RemoteAlertmanagerSettings contains the configuration needed // RemoteAlertmanagerSettings contains the configuration needed
// to disable the internal Alertmanager and use an external one instead. // to disable the internal Alertmanager and use an external one instead.
type RemoteAlertmanagerSettings struct { type RemoteAlertmanagerSettings struct {
Enable bool Enable bool
URL string URL string
TenantID string TenantID string
Password string Password string
SyncInterval time.Duration
} }
type UnifiedAlertingScreenshotSettings struct { type UnifiedAlertingScreenshotSettings struct {
@ -352,6 +353,11 @@ func (cfg *Cfg) ReadUnifiedAlertingSettings(iniFile *ini.File) error {
TenantID: remoteAlertmanager.Key("tenant").MustString(""), TenantID: remoteAlertmanager.Key("tenant").MustString(""),
Password: remoteAlertmanager.Key("password").MustString(""), Password: remoteAlertmanager.Key("password").MustString(""),
} }
uaCfgRemoteAM.SyncInterval, err = gtime.ParseDuration(valueAsString(remoteAlertmanager, "sync_interval", (schedulerDefaultAdminConfigPollInterval).String()))
if err != nil {
return err
}
uaCfg.RemoteAlertmanager = uaCfgRemoteAM uaCfg.RemoteAlertmanager = uaCfgRemoteAM
screenshots := iniFile.Section("unified_alerting.screenshots") screenshots := iniFile.Section("unified_alerting.screenshots")

@ -160,9 +160,7 @@
"footer": { "footer": {
"countRows": false, "countRows": false,
"fields": "", "fields": "",
"reducer": [ "reducer": ["sum"],
"sum"
],
"show": false "show": false
}, },
"showHeader": true "showHeader": true
@ -325,9 +323,7 @@
"footer": { "footer": {
"countRows": false, "countRows": false,
"fields": "", "fields": "",
"reducer": [ "reducer": ["sum"],
"sum"
],
"show": false "show": false
}, },
"showHeader": true "showHeader": true

Loading…
Cancel
Save