diff --git a/pkg/services/ngalert/metrics/scheduler.go b/pkg/services/ngalert/metrics/scheduler.go index 172fda5902b..35b56b95732 100644 --- a/pkg/services/ngalert/metrics/scheduler.go +++ b/pkg/services/ngalert/metrics/scheduler.go @@ -20,6 +20,7 @@ type Scheduler struct { EvalDuration *prometheus.HistogramVec ProcessDuration *prometheus.HistogramVec SendDuration *prometheus.HistogramVec + SimpleNotificationRules *prometheus.GaugeVec GroupRules *prometheus.GaugeVec Groups *prometheus.GaugeVec SchedulePeriodicDuration prometheus.Histogram @@ -91,6 +92,15 @@ func NewSchedulerMetrics(r prometheus.Registerer) *Scheduler { }, []string{"org"}, ), + SimpleNotificationRules: promauto.With(r).NewGaugeVec( + prometheus.GaugeOpts{ + Namespace: Namespace, + Subsystem: Subsystem, + Name: "simple_routing_rules", + Help: "The number of alert rules using simplified routing.", + }, + []string{"org"}, + ), // TODO: partition on rule group as well as tenant, similar to loki|cortex. GroupRules: promauto.With(r).NewGaugeVec( prometheus.GaugeOpts{ diff --git a/pkg/services/ngalert/schedule/metrics.go b/pkg/services/ngalert/schedule/metrics.go index 72a7f455691..62e9e590794 100644 --- a/pkg/services/ngalert/schedule/metrics.go +++ b/pkg/services/ngalert/schedule/metrics.go @@ -35,6 +35,7 @@ func sortedUIDs(alertRules []*models.AlertRule) []string { func (sch *schedule) updateRulesMetrics(alertRules []*models.AlertRule) { rulesPerOrg := make(map[int64]int64) // orgID -> count orgsPaused := make(map[int64]int64) // orgID -> count + orgsNfSettings := make(map[int64]int64) // orgID -> count groupsPerOrg := make(map[int64]map[string]struct{}) // orgID -> set of groups for _, rule := range alertRules { rulesPerOrg[rule.OrgID]++ @@ -43,6 +44,10 @@ func (sch *schedule) updateRulesMetrics(alertRules []*models.AlertRule) { orgsPaused[rule.OrgID]++ } + if len(rule.NotificationSettings) > 0 { + orgsNfSettings[rule.OrgID]++ + } + orgGroups, ok := groupsPerOrg[rule.OrgID] if !ok { orgGroups = make(map[string]struct{}) @@ -53,8 +58,10 @@ func (sch *schedule) updateRulesMetrics(alertRules []*models.AlertRule) { for orgID, numRules := range rulesPerOrg { numRulesPaused := orgsPaused[orgID] + numRulesNfSettings := orgsNfSettings[orgID] sch.metrics.GroupRules.WithLabelValues(fmt.Sprint(orgID), metrics.AlertRuleActiveLabelValue).Set(float64(numRules - numRulesPaused)) sch.metrics.GroupRules.WithLabelValues(fmt.Sprint(orgID), metrics.AlertRulePausedLabelValue).Set(float64(numRulesPaused)) + sch.metrics.SimpleNotificationRules.WithLabelValues(fmt.Sprint(orgID)).Set(float64(numRulesNfSettings)) } for orgID, groups := range groupsPerOrg {