[v11.3.x] Alerting: Make context deadline on AlertNG service startup configurable (#96135)

Alerting: Make context deadline on AlertNG service startup configurable (#96053)

* Make alerting context deadline configurable

* Remove debug logs

* Change default timeout

* Update tests

(cherry picked from commit 1fdc48faba)
pull/96207/head
Fayzal Ghantiwala 8 months ago committed by GitHub
parent 8f10dbe12d
commit 8bc0014845
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
  1. 3
      conf/defaults.ini
  2. 3
      conf/sample.ini
  3. 1
      pkg/services/cloudmigration/cloudmigrationimpl/cloudmigration_test.go
  4. 2
      pkg/services/ngalert/ngalert.go
  5. 3
      pkg/services/ngalert/tests/util.go
  6. 1
      pkg/services/quota/quotaimpl/quota_test.go
  7. 7
      pkg/setting/setting_unified_alerting.go
  8. 4
      pkg/setting/setting_unified_alerting_test.go

@ -1209,6 +1209,9 @@ enabled =
# Comma-separated list of organization IDs for which to disable unified alerting. Only supported if unified alerting is enabled.
disabled_orgs =
# Specify how long to wait for the alerting service to initialize
initialization_timeout = 30s
# Specify the frequency of polling for admin config changes.
# The interval string is a possibly signed sequence of decimal numbers, followed by a unit suffix (ms, s, m, h, d), e.g. 30s or 1m.
admin_config_poll_interval = 60s

@ -1197,6 +1197,9 @@
# Comma-separated list of organization IDs for which to disable unified alerting. Only supported if unified alerting is enabled.
;disabled_orgs =
# Specify how long to wait for the alerting service to initialize
;initialization_timeout = 30s
# Specify the frequency of polling for admin config changes.
# The interval string is a possibly signed sequence of decimal numbers, followed by a unit suffix (ms, s, m, h, d), e.g. 30s or 1m.
;admin_config_poll_interval = 60s

@ -782,6 +782,7 @@ func setUpServiceTest(t *testing.T, withDashboardMock bool) cloudmigration.Servi
fakeAccessControlService := actest.FakeService{}
alertMetrics := metrics.NewNGAlert(prometheus.NewRegistry())
cfg.UnifiedAlerting.InitializationTimeout = 30 * time.Second
ruleStore, err := ngalertstore.ProvideDBStore(cfg, featureToggles, sqlStore, mockFolder, dashboardService, fakeAccessControl)
require.NoError(t, err)

@ -161,7 +161,7 @@ type AlertNG struct {
func (ng *AlertNG) init() error {
// AlertNG should be initialized before the cancellation deadline of initCtx
initCtx, cancelFunc := context.WithTimeout(context.Background(), 30*time.Second)
initCtx, cancelFunc := context.WithTimeout(context.Background(), ng.Cfg.UnifiedAlerting.InitializationTimeout)
defer cancelFunc()
ng.store.Logger = ng.Log

@ -47,7 +47,8 @@ func SetupTestEnv(tb testing.TB, baseInterval time.Duration) (*ngalert.AlertNG,
cfg := setting.NewCfg()
cfg.UnifiedAlerting = setting.UnifiedAlertingSettings{
BaseInterval: setting.SchedulerBaseInterval,
BaseInterval: setting.SchedulerBaseInterval,
InitializationTimeout: 30 * time.Second,
}
// AlertNG database migrations run and the relative database tables are created only when it's enabled
cfg.UnifiedAlerting.Enabled = new(bool)

@ -498,6 +498,7 @@ func setupEnv(t *testing.T, sqlStore db.DB, cfg *setting.Cfg, b bus.Bus, quotaSe
ac := acimpl.ProvideAccessControl(featuremgmt.WithFeatures(), zanzana.NewNoopClient())
ruleStore, err := ngstore.ProvideDBStore(cfg, featuremgmt.WithFeatures(), sqlStore, &foldertest.FakeService{}, &dashboards.FakeDashboardService{}, ac)
require.NoError(t, err)
cfg.UnifiedAlerting.InitializationTimeout = 30 * time.Second
_, err = ngalert.ProvideService(
cfg, featuremgmt.WithFeatures(), nil, nil, routing.NewRouteRegister(), sqlStore, ngalertfakes.NewFakeKVStore(t), nil, nil, quotaService,
secretsService, nil, m, &foldertest.FakeService{}, &acmock.Mock{}, &dashboards.FakeDashboardService{}, nil, b, &acmock.Mock{},

@ -45,6 +45,7 @@ const (
}
}
`
alertingDefaultInitializationTimeout = 30 * time.Second
evaluatorDefaultEvaluationTimeout = 30 * time.Second
schedulerDefaultAdminConfigPollInterval = time.Minute
schedulerDefaultExecuteAlerts = true
@ -90,6 +91,7 @@ type UnifiedAlertingSettings struct {
HARedisMaxConns int
HARedisTLSEnabled bool
HARedisTLSConfig dstls.ClientConfig
InitializationTimeout time.Duration
MaxAttempts int64
MinInterval time.Duration
EvaluationTimeout time.Duration
@ -229,6 +231,11 @@ func (cfg *Cfg) ReadUnifiedAlertingSettings(iniFile *ini.File) error {
uaCfg.DisabledOrgs[orgID] = struct{}{}
}
uaCfg.InitializationTimeout, err = gtime.ParseDuration(valueAsString(ua, "initialization_timeout", (alertingDefaultInitializationTimeout).String()))
if err != nil {
return err
}
uaCfg.AdminConfigPollInterval, err = gtime.ParseDuration(valueAsString(ua, "admin_config_poll_interval", (schedulerDefaultAdminConfigPollInterval).String()))
if err != nil {
return err

@ -26,6 +26,7 @@ func TestCfg_ReadUnifiedAlertingSettings(t *testing.T) {
require.Equal(t, 200*time.Millisecond, cfg.UnifiedAlerting.HAGossipInterval)
require.Equal(t, time.Minute, cfg.UnifiedAlerting.HAPushPullInterval)
require.Equal(t, 6*time.Hour, cfg.UnifiedAlerting.HAReconnectTimeout)
require.Equal(t, alertingDefaultInitializationTimeout, cfg.UnifiedAlerting.InitializationTimeout)
}
// With peers set, it correctly parses them.
@ -35,10 +36,13 @@ func TestCfg_ReadUnifiedAlertingSettings(t *testing.T) {
require.NoError(t, err)
_, err = s.NewKey("ha_peers", "hostname1:9090,hostname2:9090,hostname3:9090")
require.NoError(t, err)
_, err = s.NewKey("initialization_timeout", "123s")
require.NoError(t, err)
require.NoError(t, cfg.ReadUnifiedAlertingSettings(cfg.Raw))
require.Len(t, cfg.UnifiedAlerting.HAPeers, 3)
require.ElementsMatch(t, []string{"hostname1:9090", "hostname2:9090", "hostname3:9090"}, cfg.UnifiedAlerting.HAPeers)
require.Equal(t, 123*time.Second, cfg.UnifiedAlerting.InitializationTimeout)
}
t.Run("should read 'scheduler_tick_interval'", func(t *testing.T) {

Loading…
Cancel
Save