[v10.4.x] Alerting: Make context deadline on AlertNG service startup configurable (#96058)

* Alerting: Make context deadline on AlertNG service startup configurable (#96053)

* Make alerting context deadline configurable

* Remove debug logs

* Change default timeout

* Update tests

(cherry picked from commit 1fdc48faba)

* remove file

* Remove go.work.sum change

* Fix backport

* No cyclo flag
pull/96109/head
Fayzal Ghantiwala 8 months ago committed by GitHub
parent b29276dc4a
commit 880e33216b
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
  1. 3
      conf/defaults.ini
  2. 3
      conf/sample.ini
  3. 2
      pkg/services/ngalert/ngalert.go
  4. 3
      pkg/services/ngalert/tests/util.go
  5. 1
      pkg/services/quota/quotaimpl/quota_test.go
  6. 8
      pkg/setting/setting_unified_alerting.go
  7. 4
      pkg/setting/setting_unified_alerting_test.go

@ -1139,6 +1139,9 @@ enabled =
# Comma-separated list of organization IDs for which to disable unified alerting. Only supported if unified alerting is enabled. # Comma-separated list of organization IDs for which to disable unified alerting. Only supported if unified alerting is enabled.
disabled_orgs = disabled_orgs =
# Specify how long to wait for the alerting service to initialize
initialization_timeout = 30s
# Specify the frequency of polling for admin config changes. # Specify the frequency of polling for admin config changes.
# The interval string is a possibly signed sequence of decimal numbers, followed by a unit suffix (ms, s, m, h, d), e.g. 30s or 1m. # The interval string is a possibly signed sequence of decimal numbers, followed by a unit suffix (ms, s, m, h, d), e.g. 30s or 1m.
admin_config_poll_interval = 60s admin_config_poll_interval = 60s

@ -1063,6 +1063,9 @@
# Comma-separated list of organization IDs for which to disable unified alerting. Only supported if unified alerting is enabled. # Comma-separated list of organization IDs for which to disable unified alerting. Only supported if unified alerting is enabled.
;disabled_orgs = ;disabled_orgs =
# Specify how long to wait for the alerting service to initialize
;initialization_timeout = 30s
# Specify the frequency of polling for admin config changes. # Specify the frequency of polling for admin config changes.
# The interval string is a possibly signed sequence of decimal numbers, followed by a unit suffix (ms, s, m, h, d), e.g. 30s or 1m. # The interval string is a possibly signed sequence of decimal numbers, followed by a unit suffix (ms, s, m, h, d), e.g. 30s or 1m.
;admin_config_poll_interval = 60s ;admin_config_poll_interval = 60s

@ -165,7 +165,7 @@ type AlertNG struct {
func (ng *AlertNG) init() error { func (ng *AlertNG) init() error {
// AlertNG should be initialized before the cancellation deadline of initCtx // AlertNG should be initialized before the cancellation deadline of initCtx
initCtx, cancelFunc := context.WithTimeout(context.Background(), 30*time.Second) initCtx, cancelFunc := context.WithTimeout(context.Background(), ng.Cfg.UnifiedAlerting.InitializationTimeout)
defer cancelFunc() defer cancelFunc()
ng.store.Logger = ng.Log ng.store.Logger = ng.Log

@ -45,7 +45,8 @@ func SetupTestEnv(tb testing.TB, baseInterval time.Duration) (*ngalert.AlertNG,
cfg := setting.NewCfg() cfg := setting.NewCfg()
cfg.UnifiedAlerting = setting.UnifiedAlertingSettings{ cfg.UnifiedAlerting = setting.UnifiedAlertingSettings{
BaseInterval: setting.SchedulerBaseInterval, BaseInterval: setting.SchedulerBaseInterval,
InitializationTimeout: 30 * time.Second,
} }
// AlertNG database migrations run and the relative database tables are created only when it's enabled // AlertNG database migrations run and the relative database tables are created only when it's enabled
cfg.UnifiedAlerting.Enabled = new(bool) cfg.UnifiedAlerting.Enabled = new(bool)

@ -485,6 +485,7 @@ func setupEnv(t *testing.T, sqlStore *sqlstore.SQLStore, b bus.Bus, quotaService
ac := acimpl.ProvideAccessControl(sqlStore.Cfg) ac := acimpl.ProvideAccessControl(sqlStore.Cfg)
ruleStore, err := ngstore.ProvideDBStore(sqlStore.Cfg, featuremgmt.WithFeatures(), sqlStore, &foldertest.FakeService{}, &dashboards.FakeDashboardService{}, ac) ruleStore, err := ngstore.ProvideDBStore(sqlStore.Cfg, featuremgmt.WithFeatures(), sqlStore, &foldertest.FakeService{}, &dashboards.FakeDashboardService{}, ac)
require.NoError(t, err) require.NoError(t, err)
sqlStore.Cfg.UnifiedAlerting.InitializationTimeout = 30 * time.Second
_, err = ngalert.ProvideService( _, err = ngalert.ProvideService(
sqlStore.Cfg, featuremgmt.WithFeatures(), nil, nil, routing.NewRouteRegister(), sqlStore, nil, nil, nil, quotaService, sqlStore.Cfg, featuremgmt.WithFeatures(), nil, nil, routing.NewRouteRegister(), sqlStore, nil, nil, nil, quotaService,
secretsService, nil, m, &foldertest.FakeService{}, &acmock.Mock{}, &dashboards.FakeDashboardService{}, nil, b, &acmock.Mock{}, secretsService, nil, m, &foldertest.FakeService{}, &acmock.Mock{}, &dashboards.FakeDashboardService{}, nil, b, &acmock.Mock{},

@ -44,6 +44,7 @@ const (
} }
} }
` `
alertingDefaultInitializationTimeout = 30 * time.Second
evaluatorDefaultEvaluationTimeout = 30 * time.Second evaluatorDefaultEvaluationTimeout = 30 * time.Second
schedulerDefaultAdminConfigPollInterval = time.Minute schedulerDefaultAdminConfigPollInterval = time.Minute
schedulereDefaultExecuteAlerts = true schedulereDefaultExecuteAlerts = true
@ -81,6 +82,7 @@ type UnifiedAlertingSettings struct {
HARedisPassword string HARedisPassword string
HARedisDB int HARedisDB int
HARedisMaxConns int HARedisMaxConns int
InitializationTimeout time.Duration
MaxAttempts int64 MaxAttempts int64
MinInterval time.Duration MinInterval time.Duration
EvaluationTimeout time.Duration EvaluationTimeout time.Duration
@ -216,6 +218,7 @@ func (cfg *Cfg) readUnifiedAlertingEnabledSetting(section *ini.Section) (*bool,
// ReadUnifiedAlertingSettings reads both the `unified_alerting` and `alerting` sections of the configuration while preferring configuration the `alerting` section. // ReadUnifiedAlertingSettings reads both the `unified_alerting` and `alerting` sections of the configuration while preferring configuration the `alerting` section.
// It first reads the `unified_alerting` section, then looks for non-defaults on the `alerting` section and prefers those. // It first reads the `unified_alerting` section, then looks for non-defaults on the `alerting` section and prefers those.
// nolint:gocyclo
func (cfg *Cfg) ReadUnifiedAlertingSettings(iniFile *ini.File) error { func (cfg *Cfg) ReadUnifiedAlertingSettings(iniFile *ini.File) error {
var err error var err error
uaCfg := UnifiedAlertingSettings{} uaCfg := UnifiedAlertingSettings{}
@ -235,6 +238,11 @@ func (cfg *Cfg) ReadUnifiedAlertingSettings(iniFile *ini.File) error {
uaCfg.DisabledOrgs[orgID] = struct{}{} uaCfg.DisabledOrgs[orgID] = struct{}{}
} }
uaCfg.InitializationTimeout, err = gtime.ParseDuration(valueAsString(ua, "initialization_timeout", (alertingDefaultInitializationTimeout).String()))
if err != nil {
return err
}
uaCfg.AdminConfigPollInterval, err = gtime.ParseDuration(valueAsString(ua, "admin_config_poll_interval", (schedulerDefaultAdminConfigPollInterval).String())) uaCfg.AdminConfigPollInterval, err = gtime.ParseDuration(valueAsString(ua, "admin_config_poll_interval", (schedulerDefaultAdminConfigPollInterval).String()))
if err != nil { if err != nil {
return err return err

@ -25,6 +25,7 @@ func TestCfg_ReadUnifiedAlertingSettings(t *testing.T) {
require.Len(t, cfg.UnifiedAlerting.HAPeers, 0) require.Len(t, cfg.UnifiedAlerting.HAPeers, 0)
require.Equal(t, 200*time.Millisecond, cfg.UnifiedAlerting.HAGossipInterval) require.Equal(t, 200*time.Millisecond, cfg.UnifiedAlerting.HAGossipInterval)
require.Equal(t, time.Minute, cfg.UnifiedAlerting.HAPushPullInterval) require.Equal(t, time.Minute, cfg.UnifiedAlerting.HAPushPullInterval)
require.Equal(t, alertingDefaultInitializationTimeout, cfg.UnifiedAlerting.InitializationTimeout)
} }
// With peers set, it correctly parses them. // With peers set, it correctly parses them.
@ -34,10 +35,13 @@ func TestCfg_ReadUnifiedAlertingSettings(t *testing.T) {
require.NoError(t, err) require.NoError(t, err)
_, err = s.NewKey("ha_peers", "hostname1:9090,hostname2:9090,hostname3:9090") _, err = s.NewKey("ha_peers", "hostname1:9090,hostname2:9090,hostname3:9090")
require.NoError(t, err) require.NoError(t, err)
_, err = s.NewKey("initialization_timeout", "123s")
require.NoError(t, err)
require.NoError(t, cfg.ReadUnifiedAlertingSettings(cfg.Raw)) require.NoError(t, cfg.ReadUnifiedAlertingSettings(cfg.Raw))
require.Len(t, cfg.UnifiedAlerting.HAPeers, 3) require.Len(t, cfg.UnifiedAlerting.HAPeers, 3)
require.ElementsMatch(t, []string{"hostname1:9090", "hostname2:9090", "hostname3:9090"}, cfg.UnifiedAlerting.HAPeers) require.ElementsMatch(t, []string{"hostname1:9090", "hostname2:9090", "hostname3:9090"}, cfg.UnifiedAlerting.HAPeers)
require.Equal(t, 123*time.Second, cfg.UnifiedAlerting.InitializationTimeout)
} }
t.Run("should read 'scheduler_tick_interval'", func(t *testing.T) { t.Run("should read 'scheduler_tick_interval'", func(t *testing.T) {

Loading…
Cancel
Save