From dd04757fc9e6e6cfdcd2772ac78f8a6bc2e83013 Mon Sep 17 00:00:00 2001 From: Alexander Weaver Date: Tue, 28 Mar 2023 08:49:51 -0500 Subject: [PATCH] Alerting: Add "backend" label to state history writes metrics (#65395) * Add backend label to state history writes metrics * Update test expectations --- pkg/services/ngalert/metrics/historian.go | 4 ++-- pkg/services/ngalert/state/historian/annotation.go | 4 ++-- pkg/services/ngalert/state/historian/annotation_test.go | 4 ++-- pkg/services/ngalert/state/historian/loki.go | 4 ++-- pkg/services/ngalert/state/historian/loki_test.go | 4 ++-- 5 files changed, 10 insertions(+), 10 deletions(-) diff --git a/pkg/services/ngalert/metrics/historian.go b/pkg/services/ngalert/metrics/historian.go index df481a7a31a..fa5d55965fd 100644 --- a/pkg/services/ngalert/metrics/historian.go +++ b/pkg/services/ngalert/metrics/historian.go @@ -41,13 +41,13 @@ func NewHistorianMetrics(r prometheus.Registerer) *Historian { Subsystem: Subsystem, Name: "state_history_writes_total", Help: "The total number of state history batches that were attempted to be written.", - }, []string{"org"}), + }, []string{"org", "backend"}), WritesFailed: promauto.With(r).NewCounterVec(prometheus.CounterOpts{ Namespace: Namespace, Subsystem: Subsystem, Name: "state_history_writes_failed_total", Help: "The total number of failed writes of state history batches.", - }, []string{"org"}), + }, []string{"org", "backend"}), WriteDuration: instrument.NewHistogramCollector(promauto.With(r).NewHistogramVec(prometheus.HistogramOpts{ Namespace: Namespace, Subsystem: Subsystem, diff --git a/pkg/services/ngalert/state/historian/annotation.go b/pkg/services/ngalert/state/historian/annotation.go index da76cb16a83..d4c45ca7418 100644 --- a/pkg/services/ngalert/state/historian/annotation.go +++ b/pkg/services/ngalert/state/historian/annotation.go @@ -199,11 +199,11 @@ func (h *AnnotationBackend) recordAnnotations(ctx context.Context, panel *panelK } org := fmt.Sprint(orgID) - h.metrics.WritesTotal.WithLabelValues(org).Inc() + h.metrics.WritesTotal.WithLabelValues(org, "annotations").Inc() h.metrics.TransitionsTotal.WithLabelValues(org).Add(float64(len(annotations))) if err := h.annotations.SaveMany(ctx, annotations); err != nil { logger.Error("Error saving alert annotation batch", "error", err) - h.metrics.WritesFailed.WithLabelValues(org).Inc() + h.metrics.WritesFailed.WithLabelValues(org, "annotations").Inc() h.metrics.TransitionsFailed.WithLabelValues(org).Add(float64(len(annotations))) return fmt.Errorf("error saving alert annotation batch: %w", err) } diff --git a/pkg/services/ngalert/state/historian/annotation_test.go b/pkg/services/ngalert/state/historian/annotation_test.go index c4431b188ed..73820c1070a 100644 --- a/pkg/services/ngalert/state/historian/annotation_test.go +++ b/pkg/services/ngalert/state/historian/annotation_test.go @@ -83,10 +83,10 @@ grafana_alerting_state_history_transitions_failed_total{org="1"} 1 grafana_alerting_state_history_transitions_total{org="1"} 2 # HELP grafana_alerting_state_history_writes_failed_total The total number of failed writes of state history batches. # TYPE grafana_alerting_state_history_writes_failed_total counter -grafana_alerting_state_history_writes_failed_total{org="1"} 1 +grafana_alerting_state_history_writes_failed_total{backend="annotations",org="1"} 1 # HELP grafana_alerting_state_history_writes_total The total number of state history batches that were attempted to be written. # TYPE grafana_alerting_state_history_writes_total counter -grafana_alerting_state_history_writes_total{org="1"} 2 +grafana_alerting_state_history_writes_total{backend="annotations",org="1"} 2 `) err := testutil.GatherAndCompare(reg, exp, "grafana_alerting_state_history_transitions_total", diff --git a/pkg/services/ngalert/state/historian/loki.go b/pkg/services/ngalert/state/historian/loki.go index 7b04b08fba9..78c91e6325b 100644 --- a/pkg/services/ngalert/state/historian/loki.go +++ b/pkg/services/ngalert/state/historian/loki.go @@ -83,7 +83,7 @@ func (h *RemoteLokiBackend) Record(ctx context.Context, rule history_model.RuleM defer close(errCh) org := fmt.Sprint(rule.OrgID) - h.metrics.WritesTotal.WithLabelValues(org).Inc() + h.metrics.WritesTotal.WithLabelValues(org, "loki").Inc() samples := 0 for _, s := range streams { samples += len(s.Values) @@ -92,7 +92,7 @@ func (h *RemoteLokiBackend) Record(ctx context.Context, rule history_model.RuleM if err := h.recordStreams(ctx, streams, logger); err != nil { logger.Error("Failed to save alert state history batch", "error", err) - h.metrics.WritesFailed.WithLabelValues(org).Inc() + h.metrics.WritesFailed.WithLabelValues(org, "loki").Inc() h.metrics.TransitionsFailed.WithLabelValues(org).Add(float64(samples)) errCh <- fmt.Errorf("failed to save alert state history batch: %w", err) } diff --git a/pkg/services/ngalert/state/historian/loki_test.go b/pkg/services/ngalert/state/historian/loki_test.go index 90438ea0a5d..667e1cbbf3a 100644 --- a/pkg/services/ngalert/state/historian/loki_test.go +++ b/pkg/services/ngalert/state/historian/loki_test.go @@ -298,10 +298,10 @@ grafana_alerting_state_history_transitions_failed_total{org="1"} 1 grafana_alerting_state_history_transitions_total{org="1"} 2 # HELP grafana_alerting_state_history_writes_failed_total The total number of failed writes of state history batches. # TYPE grafana_alerting_state_history_writes_failed_total counter -grafana_alerting_state_history_writes_failed_total{org="1"} 1 +grafana_alerting_state_history_writes_failed_total{backend="loki",org="1"} 1 # HELP grafana_alerting_state_history_writes_total The total number of state history batches that were attempted to be written. # TYPE grafana_alerting_state_history_writes_total counter -grafana_alerting_state_history_writes_total{org="1"} 2 +grafana_alerting_state_history_writes_total{backend="loki",org="1"} 2 `) err := testutil.GatherAndCompare(reg, exp, "grafana_alerting_state_history_transitions_total",