From cb6f7a1a7775a35f1a5e48a0264a377d64efe7ce Mon Sep 17 00:00:00 2001 From: Periklis Tsirakidis Date: Wed, 15 Mar 2023 18:44:43 +0100 Subject: [PATCH] operator: Add alerting style guide validation (#8743) Co-authored-by: Robert Jacob --- operator/CHANGELOG.md | 1 + operator/apis/loki/v1/v1.go | 8 + .../loki-operator.clusterserviceversion.yaml | 2 +- .../loki-operator.clusterserviceversion.yaml | 2 +- operator/hack/lokirules_ocp.yaml | 3 +- .../validation/openshift/alertingrule.go | 16 ++ .../validation/openshift/alertingrule_test.go | 196 ++++++++++++++++++ .../internal/validation/openshift/common.go | 35 ++++ 8 files changed, 260 insertions(+), 3 deletions(-) diff --git a/operator/CHANGELOG.md b/operator/CHANGELOG.md index c801e40ed0..2883fa8662 100644 --- a/operator/CHANGELOG.md +++ b/operator/CHANGELOG.md @@ -1,5 +1,6 @@ ## Main +- [8743](https://github.com/grafana/loki/pull/8743) **periklis**: Add alerting style guide validation - [8192](https://github.com/grafana/loki/pull/8192) **jotak**: Allow multiple matchers for multi-tenancy with Network tenant (OpenShift) - [8800](https://github.com/grafana/loki/pull/8800) **aminesnow**: Promote AlertingRules, RecordingRules and RulerConfig from v1beta1 to v1 - [8792](https://github.com/grafana/loki/pull/8792) **orenc1**: Improve documentation for LokiStack installation with ODF Object Storage diff --git a/operator/apis/loki/v1/v1.go b/operator/apis/loki/v1/v1.go index d54a735a97..cbfa584757 100644 --- a/operator/apis/loki/v1/v1.go +++ b/operator/apis/loki/v1/v1.go @@ -59,4 +59,12 @@ var ( // ErrRuleMustMatchNamespace indicates that an expression used in an alerting or recording rule is missing // matchers for a namespace. ErrRuleMustMatchNamespace = errors.New("rule needs to have a matcher for the namespace") + // ErrSeverityLabelMissing indicates that an alerting rule is missing the severity label + ErrSeverityLabelMissing = errors.New("rule requires label: severity") + // ErrSeverityLabelInvalid indicates that an alerting rule has an invalid value for the summary label + ErrSeverityLabelInvalid = errors.New("rule severity label value invalid, allowed values: critical, warning, info") + // ErrSummaryAnnotationMissing indicates that an alerting rule is missing the summary annotation + ErrSummaryAnnotationMissing = errors.New("rule requires annotation: summary") + // ErrDescriptionAnnotationMissing indicates that an alerting rule is missing the description annotation + ErrDescriptionAnnotationMissing = errors.New("rule requires annotation: description") ) diff --git a/operator/bundle/community/manifests/loki-operator.clusterserviceversion.yaml b/operator/bundle/community/manifests/loki-operator.clusterserviceversion.yaml index 63312b792d..cdc11e5bb3 100644 --- a/operator/bundle/community/manifests/loki-operator.clusterserviceversion.yaml +++ b/operator/bundle/community/manifests/loki-operator.clusterserviceversion.yaml @@ -150,7 +150,7 @@ metadata: categories: OpenShift Optional, Logging & Tracing certified: "false" containerImage: docker.io/grafana/loki-operator:main-39f2856 - createdAt: "2023-03-15T09:56:48Z" + createdAt: "2023-03-15T10:50:46Z" description: The Community Loki Operator provides Kubernetes native deployment and management of Loki and related logging components. operators.operatorframework.io/builder: operator-sdk-unknown diff --git a/operator/bundle/openshift/manifests/loki-operator.clusterserviceversion.yaml b/operator/bundle/openshift/manifests/loki-operator.clusterserviceversion.yaml index 2874d63cce..5094e5ba19 100644 --- a/operator/bundle/openshift/manifests/loki-operator.clusterserviceversion.yaml +++ b/operator/bundle/openshift/manifests/loki-operator.clusterserviceversion.yaml @@ -150,7 +150,7 @@ metadata: categories: OpenShift Optional, Logging & Tracing certified: "false" containerImage: quay.io/openshift-logging/loki-operator:v0.1.0 - createdAt: "2023-03-15T10:11:27Z" + createdAt: "2023-03-15T10:50:48Z" description: | The Loki Operator for OCP provides a means for configuring and managing a Loki stack for cluster logging. ## Prerequisites and Requirements diff --git a/operator/hack/lokirules_ocp.yaml b/operator/hack/lokirules_ocp.yaml index 554d44baf0..65250427d0 100644 --- a/operator/hack/lokirules_ocp.yaml +++ b/operator/hack/lokirules_ocp.yaml @@ -19,9 +19,10 @@ spec: > 0.01 for: 10s labels: - severity: page + severity: critical annotations: summary: High Loki Operator Reconciliation Errors + description: High Loki Operator Reconciliation Errors --- apiVersion: loki.grafana.com/v1 kind: RecordingRule diff --git a/operator/internal/validation/openshift/alertingrule.go b/operator/internal/validation/openshift/alertingrule.go index 5612a70051..c6c095fda5 100644 --- a/operator/internal/validation/openshift/alertingrule.go +++ b/operator/internal/validation/openshift/alertingrule.go @@ -33,6 +33,22 @@ func AlertingRuleValidator(_ context.Context, alertingRule *lokiv1.AlertingRule) err.Error(), )) } + + if err := validateRuleLabels(rule.Labels); err != nil { + allErrs = append(allErrs, field.Invalid( + field.NewPath("spec", "groups").Index(i).Child("rules").Index(j).Child("labels"), + rule.Labels, + err.Error(), + )) + } + + if err := validateRuleAnnotations(rule.Annotations); err != nil { + allErrs = append(allErrs, field.Invalid( + field.NewPath("spec", "groups").Index(i).Child("rules").Index(j).Child("annotations"), + rule.Annotations, + err.Error(), + )) + } } } diff --git a/operator/internal/validation/openshift/alertingrule_test.go b/operator/internal/validation/openshift/alertingrule_test.go index c1026c16cb..1457d87516 100644 --- a/operator/internal/validation/openshift/alertingrule_test.go +++ b/operator/internal/validation/openshift/alertingrule_test.go @@ -12,6 +12,8 @@ import ( ) func TestAlertingRuleValidator(t *testing.T) { + var nilMap map[string]string + tt := []struct { desc string spec *lokiv1.AlertingRule @@ -31,6 +33,13 @@ func TestAlertingRuleValidator(t *testing.T) { Rules: []*lokiv1.AlertingRuleGroupSpec{ { Expr: `sum(rate({kubernetes_namespace_name="example", level="error"}[5m])) by (job) > 0.1`, + Labels: map[string]string{ + severityLabelName: "warning", + }, + Annotations: map[string]string{ + summaryAnnotationName: "alert summary", + descriptionAnnotationName: "alert description", + }, }, }, }, @@ -53,6 +62,13 @@ func TestAlertingRuleValidator(t *testing.T) { Rules: []*lokiv1.AlertingRuleGroupSpec{ { Expr: `sum(rate({level="error"}[5m])) by (job) > 0.1`, + Labels: map[string]string{ + severityLabelName: "warning", + }, + Annotations: map[string]string{ + summaryAnnotationName: "alert summary", + descriptionAnnotationName: "alert description", + }, }, }, }, @@ -75,6 +91,13 @@ func TestAlertingRuleValidator(t *testing.T) { Rules: []*lokiv1.AlertingRuleGroupSpec{ { Expr: `sum(rate({kubernetes_namespace_name="openshift-example", level="error"}[5m])) by (job) > 0.1`, + Labels: map[string]string{ + severityLabelName: "warning", + }, + Annotations: map[string]string{ + summaryAnnotationName: "alert summary", + descriptionAnnotationName: "alert description", + }, }, }, }, @@ -104,6 +127,13 @@ func TestAlertingRuleValidator(t *testing.T) { Rules: []*lokiv1.AlertingRuleGroupSpec{ { Expr: "invalid", + Labels: map[string]string{ + severityLabelName: "warning", + }, + Annotations: map[string]string{ + summaryAnnotationName: "alert summary", + descriptionAnnotationName: "alert description", + }, }, }, }, @@ -133,6 +163,13 @@ func TestAlertingRuleValidator(t *testing.T) { Rules: []*lokiv1.AlertingRuleGroupSpec{ { Expr: `{kubernetes_namespace_name="example", level="error"}`, + Labels: map[string]string{ + severityLabelName: "warning", + }, + Annotations: map[string]string{ + summaryAnnotationName: "alert summary", + descriptionAnnotationName: "alert description", + }, }, }, }, @@ -162,6 +199,13 @@ func TestAlertingRuleValidator(t *testing.T) { Rules: []*lokiv1.AlertingRuleGroupSpec{ { Expr: `sum(rate({level="error"}[5m])) by (job) > 0.1`, + Labels: map[string]string{ + severityLabelName: "warning", + }, + Annotations: map[string]string{ + summaryAnnotationName: "alert summary", + descriptionAnnotationName: "alert description", + }, }, }, }, @@ -191,6 +235,13 @@ func TestAlertingRuleValidator(t *testing.T) { Rules: []*lokiv1.AlertingRuleGroupSpec{ { Expr: `sum(rate({kubernetes_namespace_name="other-ns", level="error"}[5m])) by (job) > 0.1`, + Labels: map[string]string{ + severityLabelName: "warning", + }, + Annotations: map[string]string{ + summaryAnnotationName: "alert summary", + descriptionAnnotationName: "alert description", + }, }, }, }, @@ -206,6 +257,151 @@ func TestAlertingRuleValidator(t *testing.T) { }, }, }, + { + desc: "missing severity label", + spec: &lokiv1.AlertingRule{ + ObjectMeta: metav1.ObjectMeta{ + Name: "alerting-rule", + Namespace: "example", + }, + Spec: lokiv1.AlertingRuleSpec{ + TenantID: "application", + Groups: []*lokiv1.AlertingRuleGroup{ + { + Rules: []*lokiv1.AlertingRuleGroupSpec{ + { + Expr: `sum(rate({kubernetes_namespace_name="example", level="error"}[5m])) by (job) > 0.1`, + Annotations: map[string]string{ + summaryAnnotationName: "alert summary", + descriptionAnnotationName: "alert description", + }, + }, + }, + }, + }, + }, + }, + wantErrors: []*field.Error{ + { + Type: field.ErrorTypeInvalid, + Field: "spec.groups[0].rules[0].labels", + BadValue: nilMap, + Detail: lokiv1.ErrSeverityLabelMissing.Error(), + }, + }, + }, + { + desc: "invalid severity label", + spec: &lokiv1.AlertingRule{ + ObjectMeta: metav1.ObjectMeta{ + Name: "alerting-rule", + Namespace: "example", + }, + Spec: lokiv1.AlertingRuleSpec{ + TenantID: "application", + Groups: []*lokiv1.AlertingRuleGroup{ + { + Rules: []*lokiv1.AlertingRuleGroupSpec{ + { + Expr: `sum(rate({kubernetes_namespace_name="example", level="error"}[5m])) by (job) > 0.1`, + Labels: map[string]string{ + severityLabelName: "page", + }, + Annotations: map[string]string{ + summaryAnnotationName: "alert summary", + descriptionAnnotationName: "alert description", + }, + }, + }, + }, + }, + }, + }, + wantErrors: []*field.Error{ + { + Type: field.ErrorTypeInvalid, + Field: "spec.groups[0].rules[0].labels", + BadValue: map[string]string{ + severityLabelName: "page", + }, + Detail: lokiv1.ErrSeverityLabelInvalid.Error(), + }, + }, + }, + { + desc: "missing summary annotation", + spec: &lokiv1.AlertingRule{ + ObjectMeta: metav1.ObjectMeta{ + Name: "alerting-rule", + Namespace: "example", + }, + Spec: lokiv1.AlertingRuleSpec{ + TenantID: "application", + Groups: []*lokiv1.AlertingRuleGroup{ + { + Rules: []*lokiv1.AlertingRuleGroupSpec{ + { + Expr: `sum(rate({kubernetes_namespace_name="example", level="error"}[5m])) by (job) > 0.1`, + Labels: map[string]string{ + severityLabelName: "warning", + }, + Annotations: map[string]string{ + descriptionAnnotationName: "alert description", + }, + }, + }, + }, + }, + }, + }, + wantErrors: []*field.Error{ + { + Type: field.ErrorTypeInvalid, + Field: "spec.groups[0].rules[0].annotations", + BadValue: map[string]string{ + descriptionAnnotationName: "alert description", + }, + Detail: lokiv1.ErrSummaryAnnotationMissing.Error(), + }, + }, + }, + { + desc: "missing description annotation", + spec: &lokiv1.AlertingRule{ + ObjectMeta: metav1.ObjectMeta{ + Name: "alerting-rule", + Namespace: "example", + }, + Spec: lokiv1.AlertingRuleSpec{ + TenantID: "application", + Groups: []*lokiv1.AlertingRuleGroup{ + { + Rules: []*lokiv1.AlertingRuleGroupSpec{ + { + Expr: `sum(rate({kubernetes_namespace_name="example", level="error"}[5m])) by (job) > 0.1`, + Labels: map[string]string{ + severityLabelName: "warning", + }, + Annotations: map[string]string{ + summaryAnnotationName: "alert summary", + }, + }, + }, + }, + }, + }, + }, + wantErrors: []*field.Error{ + { + Type: field.ErrorTypeInvalid, + Field: "spec.groups[0].rules[0].annotations", + BadValue: map[string]string{ + summaryAnnotationName: "alert summary", + }, + Detail: lokiv1.ErrDescriptionAnnotationMissing.Error(), + }, + }, + }, } for _, tc := range tt { diff --git a/operator/internal/validation/openshift/common.go b/operator/internal/validation/openshift/common.go index bde71a89a1..c26350160d 100644 --- a/operator/internal/validation/openshift/common.go +++ b/operator/internal/validation/openshift/common.go @@ -1,6 +1,7 @@ package openshift import ( + "regexp" "strings" lokiv1 "github.com/grafana/loki/operator/apis/loki/v1" @@ -10,6 +11,11 @@ import ( ) const ( + severityLabelName = "severity" + + summaryAnnotationName = "summary" + descriptionAnnotationName = "description" + namespaceLabelName = "kubernetes_namespace_name" namespaceOpenshiftLogging = "openshift-logging" @@ -18,6 +24,8 @@ const ( tenantInfrastructure = "infrastructure" ) +var severityRe = regexp.MustCompile("^critical|warning|info$") + func validateRuleExpression(namespace, tenantID, rawExpr string) error { // Check if the LogQL parser can parse the rule expression expr, err := syntax.ParseExpr(rawExpr) @@ -61,3 +69,30 @@ func tenantForNamespace(namespace string) []string { return []string{tenantApplication} } + +func validateRuleLabels(labels map[string]string) error { + value, found := labels[severityLabelName] + if !found { + return lokiv1.ErrSeverityLabelMissing + } + + if !severityRe.MatchString(value) { + return lokiv1.ErrSeverityLabelInvalid + } + + return nil +} + +func validateRuleAnnotations(annotations map[string]string) error { + value, found := annotations[summaryAnnotationName] + if !found || value == "" { + return lokiv1.ErrSummaryAnnotationMissing + } + + value, found = annotations[descriptionAnnotationName] + if !found || value == "" { + return lokiv1.ErrDescriptionAnnotationMissing + } + + return nil +}