From 7d01d998e4d09e8a4beec486470939dfb95e0f6e Mon Sep 17 00:00:00 2001 From: Periklis Tsirakidis Date: Wed, 3 May 2023 18:18:21 +0200 Subject: [PATCH] operator: Add support for custom tenant topology in rules (#9366) --- operator/CHANGELOG.md | 1 + operator/apis/loki/v1/v1.go | 5 + .../validation/openshift/alertingrule.go | 33 ++++-- .../validation/openshift/alertingrule_test.go | 109 ++++++++++++++++++ .../internal/validation/openshift/common.go | 20 ++++ .../validation/openshift/recordingrule.go | 9 ++ .../openshift/recordingrule_test.go | 57 +++++++++ 7 files changed, 222 insertions(+), 12 deletions(-) diff --git a/operator/CHANGELOG.md b/operator/CHANGELOG.md index 791670ba2f..2d5bc0ecd0 100644 --- a/operator/CHANGELOG.md +++ b/operator/CHANGELOG.md @@ -1,5 +1,6 @@ ## Main +- [9366](https://github.com/grafana/loki/pull/9366) **periklis**: Add support for custom tenant topology in rules - [9315](https://github.com/grafana/loki/pull/9315) **aminesnow**: Add zone awareness spec to LokiStack - [9343](https://github.com/grafana/loki/pull/9343) **JoaoBraveCoding**: Add default PodAntiAffinity to Query Frontend - [9339](https://github.com/grafana/loki/pull/9339) **JoaoBraveCoding**: Add default PodAntiAffinity to Ruler diff --git a/operator/apis/loki/v1/v1.go b/operator/apis/loki/v1/v1.go index 4863cd6936..f728e8dc70 100644 --- a/operator/apis/loki/v1/v1.go +++ b/operator/apis/loki/v1/v1.go @@ -28,6 +28,11 @@ const ( StorageSchemaUpdateBuffer = time.Hour * 2 ) +const ( + // The AnnotationDisableTenantValidation annotation can contain a boolean value that, if true, disables the tenant-ID validation. + AnnotationDisableTenantValidation = "loki.grafana.com/disable-tenant-validation" +) + var ( // ErrGroupNamesNotUnique is the error type when loki groups have not unique names. ErrGroupNamesNotUnique = errors.New("Group names are not unique") diff --git a/operator/internal/validation/openshift/alertingrule.go b/operator/internal/validation/openshift/alertingrule.go index 1fc43d0611..e184729794 100644 --- a/operator/internal/validation/openshift/alertingrule.go +++ b/operator/internal/validation/openshift/alertingrule.go @@ -14,24 +14,33 @@ import ( func AlertingRuleValidator(_ context.Context, alertingRule *lokiv1.AlertingRule) field.ErrorList { var allErrs field.ErrorList + validateTenantIDs, fieldErr := tenantIDValidationEnabled(alertingRule.Annotations) + if fieldErr != nil { + return field.ErrorList{fieldErr} + } + // Check tenant matches expected value tenantID := alertingRule.Spec.TenantID - wantTenant := tenantForNamespace(alertingRule.Namespace) - if !slices.Contains(wantTenant, tenantID) { - allErrs = append(allErrs, field.Invalid( - field.NewPath("spec").Child("tenantID"), - tenantID, - fmt.Sprintf("AlertingRule does not use correct tenant %q", wantTenant))) + if validateTenantIDs { + wantTenant := tenantForNamespace(alertingRule.Namespace) + if !slices.Contains(wantTenant, tenantID) { + allErrs = append(allErrs, field.Invalid( + field.NewPath("spec").Child("tenantID"), + tenantID, + fmt.Sprintf("AlertingRule does not use correct tenant %q", wantTenant))) + } } for i, g := range alertingRule.Spec.Groups { for j, rule := range g.Rules { - if err := validateRuleExpression(alertingRule.Namespace, tenantID, rule.Expr); err != nil { - allErrs = append(allErrs, field.Invalid( - field.NewPath("spec").Child("groups").Index(i).Child("rules").Index(j).Child("expr"), - rule.Expr, - err.Error(), - )) + if validateTenantIDs { + if err := validateRuleExpression(alertingRule.Namespace, tenantID, rule.Expr); err != nil { + allErrs = append(allErrs, field.Invalid( + field.NewPath("spec").Child("groups").Index(i).Child("rules").Index(j).Child("expr"), + rule.Expr, + err.Error(), + )) + } } if err := validateRuleLabels(rule.Labels); err != nil { diff --git a/operator/internal/validation/openshift/alertingrule_test.go b/operator/internal/validation/openshift/alertingrule_test.go index f2f32acba7..ae911d537c 100644 --- a/operator/internal/validation/openshift/alertingrule_test.go +++ b/operator/internal/validation/openshift/alertingrule_test.go @@ -113,6 +113,115 @@ func TestAlertingRuleValidator(t *testing.T) { }, }, }, + { + desc: "custom tenant topology enabled", + spec: &lokiv1.AlertingRule{ + ObjectMeta: metav1.ObjectMeta{ + Name: "alerting-rule", + Namespace: "openshift-example", + Annotations: map[string]string{ + lokiv1.AnnotationDisableTenantValidation: "true", + }, + }, + Spec: lokiv1.AlertingRuleSpec{ + TenantID: "foobar", + Groups: []*lokiv1.AlertingRuleGroup{ + { + Rules: []*lokiv1.AlertingRuleGroupSpec{ + { + Expr: `sum(rate({kubernetes_namespace_name="openshift-example", level="error"}[5m])) by (job) > 0.1`, + Labels: map[string]string{ + severityLabelName: "warning", + }, + Annotations: map[string]string{ + summaryAnnotationName: "alert summary", + descriptionAnnotationName: "alert description", + }, + }, + }, + }, + }, + }, + }, + }, + { + desc: "custom tenant topology disabled", + spec: &lokiv1.AlertingRule{ + ObjectMeta: metav1.ObjectMeta{ + Name: "alerting-rule", + Namespace: "openshift-example", + Annotations: map[string]string{ + lokiv1.AnnotationDisableTenantValidation: "false", + }, + }, + Spec: lokiv1.AlertingRuleSpec{ + TenantID: "foobar", + Groups: []*lokiv1.AlertingRuleGroup{ + { + Rules: []*lokiv1.AlertingRuleGroupSpec{ + { + Expr: `sum(rate({kubernetes_namespace_name="openshift-example", level="error"}[5m])) by (job) > 0.1`, + Labels: map[string]string{ + severityLabelName: "warning", + }, + Annotations: map[string]string{ + summaryAnnotationName: "alert summary", + descriptionAnnotationName: "alert description", + }, + }, + }, + }, + }, + }, + }, + wantErrors: []*field.Error{ + { + Type: field.ErrorTypeInvalid, + Field: "spec.tenantID", + BadValue: "foobar", + Detail: `AlertingRule does not use correct tenant ["infrastructure"]`, + }, + }, + }, + { + desc: "wrong tenant topology", + spec: &lokiv1.AlertingRule{ + ObjectMeta: metav1.ObjectMeta{ + Name: "alerting-rule", + Namespace: "openshift-example", + Annotations: map[string]string{ + lokiv1.AnnotationDisableTenantValidation: "not-valid", + }, + }, + Spec: lokiv1.AlertingRuleSpec{ + TenantID: "foobar", + Groups: []*lokiv1.AlertingRuleGroup{ + { + Rules: []*lokiv1.AlertingRuleGroupSpec{ + { + Expr: `sum(rate({kubernetes_namespace_name="openshift-example", level="error"}[5m])) by (job) > 0.1`, + Labels: map[string]string{ + severityLabelName: "warning", + }, + Annotations: map[string]string{ + summaryAnnotationName: "alert summary", + descriptionAnnotationName: "alert description", + }, + }, + }, + }, + }, + }, + }, + wantErrors: []*field.Error{ + { + Type: field.ErrorTypeInvalid, + Field: `metadata.annotations[loki.grafana.com/disable-tenant-validation]`, + BadValue: "not-valid", + Detail: `strconv.ParseBool: parsing "not-valid": invalid syntax`, + }, + }, + }, { desc: "expression does not parse", spec: &lokiv1.AlertingRule{ diff --git a/operator/internal/validation/openshift/common.go b/operator/internal/validation/openshift/common.go index c85a0e02ba..810a58e567 100644 --- a/operator/internal/validation/openshift/common.go +++ b/operator/internal/validation/openshift/common.go @@ -2,9 +2,11 @@ package openshift import ( "regexp" + "strconv" "strings" lokiv1 "github.com/grafana/loki/operator/apis/loki/v1" + "k8s.io/apimachinery/pkg/util/validation/field" "github.com/grafana/loki/pkg/logql/syntax" "github.com/prometheus/prometheus/model/labels" @@ -26,6 +28,24 @@ const ( var severityRe = regexp.MustCompile("^critical|warning|info$") +func tenantIDValidationEnabled(annotations map[string]string) (bool, *field.Error) { + v, ok := annotations[lokiv1.AnnotationDisableTenantValidation] + if !ok { + return true, nil + } + + disableValidation, err := strconv.ParseBool(v) + if err != nil { + return false, field.Invalid( + field.NewPath("metadata").Child("annotations").Key(lokiv1.AnnotationDisableTenantValidation), + v, + err.Error(), + ) + } + + return !disableValidation, nil +} + func validateRuleExpression(namespace, tenantID, rawExpr string) error { // Check if the LogQL parser can parse the rule expression expr, err := syntax.ParseExpr(rawExpr) diff --git a/operator/internal/validation/openshift/recordingrule.go b/operator/internal/validation/openshift/recordingrule.go index 3f15c9eb4a..89893d1378 100644 --- a/operator/internal/validation/openshift/recordingrule.go +++ b/operator/internal/validation/openshift/recordingrule.go @@ -12,6 +12,15 @@ import ( // RecordingRuleValidator does extended-validation of RecordingRule resources for Openshift-based deployments. func RecordingRuleValidator(_ context.Context, recordingRule *lokiv1.RecordingRule) field.ErrorList { + validateTenantIDs, fieldErr := tenantIDValidationEnabled(recordingRule.Annotations) + if fieldErr != nil { + return field.ErrorList{fieldErr} + } + + if !validateTenantIDs { + return nil + } + var allErrs field.ErrorList // Check tenant matches expected value diff --git a/operator/internal/validation/openshift/recordingrule_test.go b/operator/internal/validation/openshift/recordingrule_test.go index 57ab3e69c5..139b9e8dfd 100644 --- a/operator/internal/validation/openshift/recordingrule_test.go +++ b/operator/internal/validation/openshift/recordingrule_test.go @@ -68,6 +68,63 @@ func TestRecordingRuleValidator(t *testing.T) { }, }, }, + { + desc: "custom tenant topology enabled", + spec: &lokiv1.RecordingRule{ + ObjectMeta: metav1.ObjectMeta{ + Name: "recording-rule", + Namespace: "openshift-example", + Annotations: map[string]string{ + lokiv1.AnnotationDisableTenantValidation: "true", + }, + }, + Spec: lokiv1.RecordingRuleSpec{ + TenantID: "foobar", + Groups: []*lokiv1.RecordingRuleGroup{ + { + Rules: []*lokiv1.RecordingRuleGroupSpec{ + { + Expr: `sum(rate({kubernetes_namespace_name="openshift-example", level="error"}[5m])) by (job) > 0.1`, + }, + }, + }, + }, + }, + }, + }, + { + desc: "wrong tenant topology", + spec: &lokiv1.RecordingRule{ + ObjectMeta: metav1.ObjectMeta{ + Name: "recording-rule", + Namespace: "openshift-example", + Annotations: map[string]string{ + lokiv1.AnnotationDisableTenantValidation: "not-valid", + }, + }, + Spec: lokiv1.RecordingRuleSpec{ + TenantID: "foobar", + Groups: []*lokiv1.RecordingRuleGroup{ + { + Rules: []*lokiv1.RecordingRuleGroupSpec{ + { + Expr: `sum(rate({kubernetes_namespace_name="openshift-example", level="error"}[5m])) by (job) > 0.1`, + }, + }, + }, + }, + }, + }, + wantErrors: []*field.Error{ + { + Type: field.ErrorTypeInvalid, + Field: `metadata.annotations[loki.grafana.com/disable-tenant-validation]`, + BadValue: "not-valid", + Detail: `strconv.ParseBool: parsing "not-valid": invalid syntax`, + }, + }, + }, + { desc: "expression does not parse", spec: &lokiv1.RecordingRule{