operator: Fix immediate reset of degraded condition (#5691)

pull/5697/head
Sashank Agarwal 3 years ago committed by GitHub
parent 0950034890
commit b3a2cec7c0
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
  1. 1
      operator/CHANGELOG.md
  2. 18
      operator/controllers/lokistack_controller.go
  3. 12
      operator/internal/handlers/internal/gateway/base_domain.go
  4. 24
      operator/internal/handlers/internal/gateway/tenant_secrets.go
  5. 47
      operator/internal/handlers/lokistack_create_or_update.go
  6. 83
      operator/internal/handlers/lokistack_create_or_update_test.go
  7. 12
      operator/internal/status/lokistack.go
  8. 12
      operator/internal/status/status.go

@ -1,5 +1,6 @@
## Main
- [5691](https://github.com/grafana/loki/pull/5691) **sasagarw**: Fix immediate reset of degraded condition
- [5704](https://github.com/grafana/loki/pull/5704) **xperimental**: Update operator-sdk to 1.18.1
- [5693](https://github.com/grafana/loki/pull/5693) **periklis**: Replace frontend_worker parallelism with match_max_concurrent
- [5699](https://github.com/grafana/loki/pull/5699) **Red-GV**: Configure boltdb_shipper and schema to use Azure, GCS, and Swift storage

@ -2,6 +2,7 @@ package controllers
import (
"context"
"errors"
"time"
monitoringv1 "github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring/v1"
@ -106,6 +107,23 @@ func (r *LokiStackReconciler) Reconcile(ctx context.Context, req ctrl.Request) (
}
err = handlers.CreateOrUpdateLokiStack(ctx, r.Log, req, r.Client, r.Scheme, r.Flags)
var degraded *status.DegradedError
if errors.As(err, &degraded) {
err = status.SetDegradedCondition(ctx, r.Client, req, degraded.Message, degraded.Reason)
if err != nil {
return ctrl.Result{
Requeue: true,
RequeueAfter: time.Second,
}, err
}
return ctrl.Result{
Requeue: degraded.Requeue,
RequeueAfter: time.Second,
}, nil
}
if err != nil {
return ctrl.Result{
Requeue: true,

@ -23,15 +23,11 @@ func GetOpenShiftBaseDomain(ctx context.Context, k k8s.Client, req ctrl.Request)
if err := k.Get(ctx, key, &cluster); err != nil {
if apierrors.IsNotFound(err) {
statusErr := status.SetDegradedCondition(ctx, k, req,
"Missing cluster DNS configuration to read base domain",
lokiv1beta1.ReasonMissingGatewayOpenShiftBaseDomain,
)
if statusErr != nil {
return "", statusErr
return "", &status.DegradedError{
Message: "Missing cluster DNS configuration to read base domain",
Reason: lokiv1beta1.ReasonMissingGatewayOpenShiftBaseDomain,
Requeue: true,
}
return "", kverrors.Wrap(err, "Missing cluster DNS configuration to read base domain")
}
return "", kverrors.Wrap(err, "failed to lookup lokistack gateway base domain",
"name", key)

@ -37,15 +37,11 @@ func GetTenantSecrets(
key := client.ObjectKey{Name: tenant.OIDC.Secret.Name, Namespace: req.Namespace}
if err := k.Get(ctx, key, &gatewaySecret); err != nil {
if apierrors.IsNotFound(err) {
statusErr := status.SetDegradedCondition(ctx, k, req,
fmt.Sprintf("Missing secrets for tenant %s", tenant.TenantName),
lokiv1beta1.ReasonMissingGatewayTenantSecret,
)
if statusErr != nil {
return nil, statusErr
return nil, &status.DegradedError{
Message: fmt.Sprintf("Missing secrets for tenant %s", tenant.TenantName),
Reason: lokiv1beta1.ReasonMissingGatewayTenantSecret,
Requeue: true,
}
return nil, kverrors.Wrap(err, "Missing gateway secrets")
}
return nil, kverrors.Wrap(err, "failed to lookup lokistack gateway tenant secret",
"name", key)
@ -54,15 +50,11 @@ func GetTenantSecrets(
var ts *manifests.TenantSecrets
ts, err := secrets.ExtractGatewaySecret(&gatewaySecret, tenant.TenantName)
if err != nil {
statusErr := status.SetDegradedCondition(ctx, k, req,
"Invalid gateway tenant secret contents",
lokiv1beta1.ReasonInvalidGatewayTenantSecret,
)
if statusErr != nil {
return nil, statusErr
return nil, &status.DegradedError{
Message: "Invalid gateway tenant secret contents",
Reason: lokiv1beta1.ReasonInvalidGatewayTenantSecret,
Requeue: true,
}
return nil, kverrors.Wrap(err, "Invalid gateway tenant secret")
}
tenantSecrets = append(tenantSecrets, ts)
}

@ -25,7 +25,14 @@ import (
)
// CreateOrUpdateLokiStack handles LokiStack create and update events.
func CreateOrUpdateLokiStack(ctx context.Context, log logr.Logger, req ctrl.Request, k k8s.Client, s *runtime.Scheme, flags manifests.FeatureFlags) error {
func CreateOrUpdateLokiStack(
ctx context.Context,
log logr.Logger,
req ctrl.Request,
k k8s.Client,
s *runtime.Scheme,
flags manifests.FeatureFlags,
) error {
ll := log.WithValues("lokistack", req.NamespacedName, "event", "createOrUpdate")
var stack lokiv1beta1.LokiStack
@ -52,20 +59,22 @@ func CreateOrUpdateLokiStack(ctx context.Context, log logr.Logger, req ctrl.Requ
key := client.ObjectKey{Name: stack.Spec.Storage.Secret.Name, Namespace: stack.Namespace}
if err := k.Get(ctx, key, &storageSecret); err != nil {
if apierrors.IsNotFound(err) {
return status.SetDegradedCondition(ctx, k, req,
"Missing object storage secret",
lokiv1beta1.ReasonMissingObjectStorageSecret,
)
return &status.DegradedError{
Message: "Missing object storage secret",
Reason: lokiv1beta1.ReasonMissingObjectStorageSecret,
Requeue: false,
}
}
return kverrors.Wrap(err, "failed to lookup lokistack storage secret", "name", key)
}
storage, err := secrets.ExtractStorageSecret(&storageSecret, stack.Spec.Storage.Secret.Type)
if err != nil {
return status.SetDegradedCondition(ctx, k, req,
"Invalid object storage secret contents",
lokiv1beta1.ReasonInvalidObjectStorageSecret,
)
return &status.DegradedError{
Message: "Invalid object storage secret contents",
Reason: lokiv1beta1.ReasonInvalidObjectStorageSecret,
Requeue: false,
}
}
var (
@ -74,16 +83,18 @@ func CreateOrUpdateLokiStack(ctx context.Context, log logr.Logger, req ctrl.Requ
tenantConfigMap map[string]openshift.TenantData
)
if flags.EnableGateway && stack.Spec.Tenants == nil {
return status.SetDegradedCondition(ctx, k, req,
"Invalid tenants configuration - TenantsSpec cannot be nil when gateway flag is enabled",
lokiv1beta1.ReasonInvalidTenantsConfiguration,
)
return &status.DegradedError{
Message: "Invalid tenants configuration - TenantsSpec cannot be nil when gateway flag is enabled",
Reason: lokiv1beta1.ReasonInvalidTenantsConfiguration,
Requeue: false,
}
} else if flags.EnableGateway && stack.Spec.Tenants != nil {
if err = gateway.ValidateModes(stack); err != nil {
return status.SetDegradedCondition(ctx, k, req,
fmt.Sprintf("Invalid tenants configuration: %s", err),
lokiv1beta1.ReasonInvalidTenantsConfiguration,
)
return &status.DegradedError{
Message: fmt.Sprintf("Invalid tenants configuration: %s", err),
Reason: lokiv1beta1.ReasonInvalidTenantsConfiguration,
Requeue: false,
}
}
if stack.Spec.Tenants.Mode != lokiv1beta1.OpenshiftLogging {
@ -96,7 +107,7 @@ func CreateOrUpdateLokiStack(ctx context.Context, log logr.Logger, req ctrl.Requ
if stack.Spec.Tenants.Mode == lokiv1beta1.OpenshiftLogging {
baseDomain, err = gateway.GetOpenShiftBaseDomain(ctx, k, req)
if err != nil {
return nil
return err
}
// extract the existing tenant's id, cookieSecret if exists, otherwise create new.

@ -12,6 +12,7 @@ import (
"github.com/grafana/loki/operator/internal/external/k8s/k8sfakes"
"github.com/grafana/loki/operator/internal/handlers"
"github.com/grafana/loki/operator/internal/manifests"
"github.com/grafana/loki/operator/internal/status"
"github.com/ViaQ/logerr/log"
routev1 "github.com/openshift/api/route/v1"
@ -620,6 +621,12 @@ func TestCreateOrUpdateLokiStack_WhenMissingSecret_SetDegraded(t *testing.T) {
},
}
degradedErr := &status.DegradedError{
Message: "Missing object storage secret",
Reason: lokiv1beta1.ReasonMissingObjectStorageSecret,
Requeue: false,
}
stack := &lokiv1beta1.LokiStack{
TypeMeta: metav1.TypeMeta{
Kind: "LokiStack",
@ -654,12 +661,9 @@ func TestCreateOrUpdateLokiStack_WhenMissingSecret_SetDegraded(t *testing.T) {
err := handlers.CreateOrUpdateLokiStack(context.TODO(), logger, r, k, scheme, flags)
// make sure error is returned to re-trigger reconciliation
require.NoError(t, err)
// make sure status and status-update calls
require.NotZero(t, k.StatusCallCount())
require.NotZero(t, sw.UpdateCallCount())
// make sure error is returned
require.Error(t, err)
require.Equal(t, degradedErr, err)
}
func TestCreateOrUpdateLokiStack_WhenInvalidSecret_SetDegraded(t *testing.T) {
@ -672,6 +676,12 @@ func TestCreateOrUpdateLokiStack_WhenInvalidSecret_SetDegraded(t *testing.T) {
},
}
degradedErr := &status.DegradedError{
Message: "Invalid object storage secret contents",
Reason: lokiv1beta1.ReasonInvalidObjectStorageSecret,
Requeue: false,
}
stack := &lokiv1beta1.LokiStack{
TypeMeta: metav1.TypeMeta{
Kind: "LokiStack",
@ -710,12 +720,9 @@ func TestCreateOrUpdateLokiStack_WhenInvalidSecret_SetDegraded(t *testing.T) {
err := handlers.CreateOrUpdateLokiStack(context.TODO(), logger, r, k, scheme, flags)
// make sure error is returned to re-trigger reconciliation
require.NoError(t, err)
// make sure status and status-update calls
require.NotZero(t, k.StatusCallCount())
require.NotZero(t, sw.UpdateCallCount())
// make sure error is returned
require.Error(t, err)
require.Equal(t, degradedErr, err)
}
func TestCreateOrUpdateLokiStack_WhenInvalidTenantsConfiguration_SetDegraded(t *testing.T) {
@ -728,6 +735,12 @@ func TestCreateOrUpdateLokiStack_WhenInvalidTenantsConfiguration_SetDegraded(t *
},
}
degradedErr := &status.DegradedError{
Message: "Invalid tenants configuration: mandatory configuration - missing OPA Url",
Reason: lokiv1beta1.ReasonInvalidTenantsConfiguration,
Requeue: false,
}
ff := manifests.FeatureFlags{
EnableGateway: true,
}
@ -785,12 +798,9 @@ func TestCreateOrUpdateLokiStack_WhenInvalidTenantsConfiguration_SetDegraded(t *
err := handlers.CreateOrUpdateLokiStack(context.TODO(), logger, r, k, scheme, ff)
// make sure error is returned to re-trigger reconciliation
require.NoError(t, err)
// make sure status and status-update calls
require.NotZero(t, k.StatusCallCount())
require.NotZero(t, sw.UpdateCallCount())
// make sure error is returned
require.Error(t, err)
require.Equal(t, degradedErr, err)
}
func TestCreateOrUpdateLokiStack_WhenMissingGatewaySecret_SetDegraded(t *testing.T) {
@ -803,6 +813,12 @@ func TestCreateOrUpdateLokiStack_WhenMissingGatewaySecret_SetDegraded(t *testing
},
}
degradedErr := &status.DegradedError{
Message: "Missing secrets for tenant test",
Reason: lokiv1beta1.ReasonMissingGatewayTenantSecret,
Requeue: true,
}
ff := manifests.FeatureFlags{
EnableGateway: true,
}
@ -867,10 +883,7 @@ func TestCreateOrUpdateLokiStack_WhenMissingGatewaySecret_SetDegraded(t *testing
// make sure error is returned to re-trigger reconciliation
require.Error(t, err)
// make sure status and status-update calls
require.NotZero(t, k.StatusCallCount())
require.NotZero(t, sw.UpdateCallCount())
require.Equal(t, degradedErr, err)
}
func TestCreateOrUpdateLokiStack_WhenInvalidGatewaySecret_SetDegraded(t *testing.T) {
@ -883,6 +896,12 @@ func TestCreateOrUpdateLokiStack_WhenInvalidGatewaySecret_SetDegraded(t *testing
},
}
degradedErr := &status.DegradedError{
Message: "Invalid gateway tenant secret contents",
Reason: lokiv1beta1.ReasonInvalidGatewayTenantSecret,
Requeue: true,
}
ff := manifests.FeatureFlags{
EnableGateway: true,
}
@ -951,10 +970,7 @@ func TestCreateOrUpdateLokiStack_WhenInvalidGatewaySecret_SetDegraded(t *testing
// make sure error is returned to re-trigger reconciliation
require.Error(t, err)
// make sure status and status-update calls
require.NotZero(t, k.StatusCallCount())
require.NotZero(t, sw.UpdateCallCount())
require.Equal(t, degradedErr, err)
}
func TestCreateOrUpdateLokiStack_MissingTenantsSpec_SetDegraded(t *testing.T) {
@ -967,6 +983,12 @@ func TestCreateOrUpdateLokiStack_MissingTenantsSpec_SetDegraded(t *testing.T) {
},
}
degradedErr := &status.DegradedError{
Message: "Invalid tenants configuration - TenantsSpec cannot be nil when gateway flag is enabled",
Reason: lokiv1beta1.ReasonInvalidTenantsConfiguration,
Requeue: false,
}
ff := manifests.FeatureFlags{
EnableGateway: true,
}
@ -1011,10 +1033,7 @@ func TestCreateOrUpdateLokiStack_MissingTenantsSpec_SetDegraded(t *testing.T) {
err := handlers.CreateOrUpdateLokiStack(context.TODO(), logger, r, k, scheme, ff)
// make sure no error is returned
require.NoError(t, err)
// make sure status and status-update calls
require.NotZero(t, k.StatusCallCount())
require.NotZero(t, sw.UpdateCallCount())
// make sure error is returned
require.Error(t, err)
require.Equal(t, degradedErr, err)
}

@ -2,6 +2,7 @@ package status
import (
"context"
"fmt"
"github.com/ViaQ/logerr/kverrors"
lokiv1beta1 "github.com/grafana/loki/operator/api/v1beta1"
@ -13,6 +14,17 @@ import (
"sigs.k8s.io/controller-runtime/pkg/client"
)
// DegradedError contains information about why the managed LokiStack has an invalid configuration.
type DegradedError struct {
Message string
Reason lokiv1beta1.LokiStackConditionReason
Requeue bool
}
func (e *DegradedError) Error() string {
return fmt.Sprintf("cluster degraded: %s", e.Message)
}
// SetReadyCondition updates or appends the condition Ready to the lokistack status conditions.
// In addition it resets all other Status conditions to false.
func SetReadyCondition(ctx context.Context, k k8s.Client, req ctrl.Request) error {

@ -35,13 +35,17 @@ func Refresh(ctx context.Context, k k8s.Client, req ctrl.Request) error {
len(cs.Distributor[corev1.PodFailed]) +
len(cs.Ingester[corev1.PodFailed]) +
len(cs.Querier[corev1.PodFailed]) +
len(cs.QueryFrontend[corev1.PodFailed])
len(cs.QueryFrontend[corev1.PodFailed]) +
len(cs.Gateway[corev1.PodFailed]) +
len(cs.IndexGateway[corev1.PodFailed])
unknown := len(cs.Compactor[corev1.PodUnknown]) +
len(cs.Distributor[corev1.PodUnknown]) +
len(cs.Ingester[corev1.PodUnknown]) +
len(cs.Querier[corev1.PodUnknown]) +
len(cs.QueryFrontend[corev1.PodUnknown])
len(cs.QueryFrontend[corev1.PodUnknown]) +
len(cs.Gateway[corev1.PodUnknown]) +
len(cs.IndexGateway[corev1.PodUnknown])
if failed != 0 || unknown != 0 {
return SetFailedCondition(ctx, k, req)
@ -52,7 +56,9 @@ func Refresh(ctx context.Context, k k8s.Client, req ctrl.Request) error {
len(cs.Distributor[corev1.PodPending]) +
len(cs.Ingester[corev1.PodPending]) +
len(cs.Querier[corev1.PodPending]) +
len(cs.QueryFrontend[corev1.PodPending])
len(cs.QueryFrontend[corev1.PodPending]) +
len(cs.Gateway[corev1.PodPending]) +
len(cs.IndexGateway[corev1.PodPending])
if pending != 0 {
return SetPendingCondition(ctx, k, req)

Loading…
Cancel
Save