[v10.4.x] Alerting: Make context deadline on AlertNG service startup configurable (#96058)

* Alerting: Make context deadline on AlertNG service startup configurable (#96053)

* Make alerting context deadline configurable

* Remove debug logs

* Change default timeout

* Update tests

(cherry picked from commit 1fdc48faba)

* remove file

* Remove go.work.sum change

* Fix backport

* No cyclo flag
pull/96109/head
Fayzal Ghantiwala 8 months ago committed by GitHub
parent b29276dc4a
commit 880e33216b
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
  1. 3
      conf/defaults.ini
  2. 3
      conf/sample.ini
  3. 2
      pkg/services/ngalert/ngalert.go
  4. 3
      pkg/services/ngalert/tests/util.go
  5. 1
      pkg/services/quota/quotaimpl/quota_test.go
  6. 8
      pkg/setting/setting_unified_alerting.go
  7. 4
      pkg/setting/setting_unified_alerting_test.go

@ -1139,6 +1139,9 @@ enabled =
# Comma-separated list of organization IDs for which to disable unified alerting. Only supported if unified alerting is enabled.
disabled_orgs =
# Specify how long to wait for the alerting service to initialize
initialization_timeout = 30s
# Specify the frequency of polling for admin config changes.
# The interval string is a possibly signed sequence of decimal numbers, followed by a unit suffix (ms, s, m, h, d), e.g. 30s or 1m.
admin_config_poll_interval = 60s

@ -1063,6 +1063,9 @@
# Comma-separated list of organization IDs for which to disable unified alerting. Only supported if unified alerting is enabled.
;disabled_orgs =
# Specify how long to wait for the alerting service to initialize
;initialization_timeout = 30s
# Specify the frequency of polling for admin config changes.
# The interval string is a possibly signed sequence of decimal numbers, followed by a unit suffix (ms, s, m, h, d), e.g. 30s or 1m.
;admin_config_poll_interval = 60s

@ -165,7 +165,7 @@ type AlertNG struct {
func (ng *AlertNG) init() error {
// AlertNG should be initialized before the cancellation deadline of initCtx
initCtx, cancelFunc := context.WithTimeout(context.Background(), 30*time.Second)
initCtx, cancelFunc := context.WithTimeout(context.Background(), ng.Cfg.UnifiedAlerting.InitializationTimeout)
defer cancelFunc()
ng.store.Logger = ng.Log

@ -45,7 +45,8 @@ func SetupTestEnv(tb testing.TB, baseInterval time.Duration) (*ngalert.AlertNG,
cfg := setting.NewCfg()
cfg.UnifiedAlerting = setting.UnifiedAlertingSettings{
BaseInterval: setting.SchedulerBaseInterval,
BaseInterval: setting.SchedulerBaseInterval,
InitializationTimeout: 30 * time.Second,
}
// AlertNG database migrations run and the relative database tables are created only when it's enabled
cfg.UnifiedAlerting.Enabled = new(bool)

@ -485,6 +485,7 @@ func setupEnv(t *testing.T, sqlStore *sqlstore.SQLStore, b bus.Bus, quotaService
ac := acimpl.ProvideAccessControl(sqlStore.Cfg)
ruleStore, err := ngstore.ProvideDBStore(sqlStore.Cfg, featuremgmt.WithFeatures(), sqlStore, &foldertest.FakeService{}, &dashboards.FakeDashboardService{}, ac)
require.NoError(t, err)
sqlStore.Cfg.UnifiedAlerting.InitializationTimeout = 30 * time.Second
_, err = ngalert.ProvideService(
sqlStore.Cfg, featuremgmt.WithFeatures(), nil, nil, routing.NewRouteRegister(), sqlStore, nil, nil, nil, quotaService,
secretsService, nil, m, &foldertest.FakeService{}, &acmock.Mock{}, &dashboards.FakeDashboardService{}, nil, b, &acmock.Mock{},

@ -44,6 +44,7 @@ const (
}
}
`
alertingDefaultInitializationTimeout = 30 * time.Second
evaluatorDefaultEvaluationTimeout = 30 * time.Second
schedulerDefaultAdminConfigPollInterval = time.Minute
schedulereDefaultExecuteAlerts = true
@ -81,6 +82,7 @@ type UnifiedAlertingSettings struct {
HARedisPassword string
HARedisDB int
HARedisMaxConns int
InitializationTimeout time.Duration
MaxAttempts int64
MinInterval time.Duration
EvaluationTimeout time.Duration
@ -216,6 +218,7 @@ func (cfg *Cfg) readUnifiedAlertingEnabledSetting(section *ini.Section) (*bool,
// ReadUnifiedAlertingSettings reads both the `unified_alerting` and `alerting` sections of the configuration while preferring configuration the `alerting` section.
// It first reads the `unified_alerting` section, then looks for non-defaults on the `alerting` section and prefers those.
// nolint:gocyclo
func (cfg *Cfg) ReadUnifiedAlertingSettings(iniFile *ini.File) error {
var err error
uaCfg := UnifiedAlertingSettings{}
@ -235,6 +238,11 @@ func (cfg *Cfg) ReadUnifiedAlertingSettings(iniFile *ini.File) error {
uaCfg.DisabledOrgs[orgID] = struct{}{}
}
uaCfg.InitializationTimeout, err = gtime.ParseDuration(valueAsString(ua, "initialization_timeout", (alertingDefaultInitializationTimeout).String()))
if err != nil {
return err
}
uaCfg.AdminConfigPollInterval, err = gtime.ParseDuration(valueAsString(ua, "admin_config_poll_interval", (schedulerDefaultAdminConfigPollInterval).String()))
if err != nil {
return err

@ -25,6 +25,7 @@ func TestCfg_ReadUnifiedAlertingSettings(t *testing.T) {
require.Len(t, cfg.UnifiedAlerting.HAPeers, 0)
require.Equal(t, 200*time.Millisecond, cfg.UnifiedAlerting.HAGossipInterval)
require.Equal(t, time.Minute, cfg.UnifiedAlerting.HAPushPullInterval)
require.Equal(t, alertingDefaultInitializationTimeout, cfg.UnifiedAlerting.InitializationTimeout)
}
// With peers set, it correctly parses them.
@ -34,10 +35,13 @@ func TestCfg_ReadUnifiedAlertingSettings(t *testing.T) {
require.NoError(t, err)
_, err = s.NewKey("ha_peers", "hostname1:9090,hostname2:9090,hostname3:9090")
require.NoError(t, err)
_, err = s.NewKey("initialization_timeout", "123s")
require.NoError(t, err)
require.NoError(t, cfg.ReadUnifiedAlertingSettings(cfg.Raw))
require.Len(t, cfg.UnifiedAlerting.HAPeers, 3)
require.ElementsMatch(t, []string{"hostname1:9090", "hostname2:9090", "hostname3:9090"}, cfg.UnifiedAlerting.HAPeers)
require.Equal(t, 123*time.Second, cfg.UnifiedAlerting.InitializationTimeout)
}
t.Run("should read 'scheduler_tick_interval'", func(t *testing.T) {

Loading…
Cancel
Save