operator: Use a condition to warn when there are no nodes with matching labels for zone-awareness (#9942)

Co-authored-by: Robert Jacob <rojacob@redhat.com>
pull/10178/head
Bayan Taani 2 years ago committed by GitHub
parent bde65667f7
commit 7d818099a0
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
  1. 2
      operator/CHANGELOG.md
  2. 2
      operator/apis/loki/v1/lokistack_types.go
  3. 48
      operator/internal/status/lokistack.go
  4. 109
      operator/internal/status/lokistack_test.go
  5. 5
      operator/internal/status/status.go
  6. 65
      operator/internal/status/status_test.go

@ -1,5 +1,7 @@
## Main
- [9942](https://github.com/grafana/loki/pull/9942) **btaani**: Use a condition to warn when there are no nodes with matching labels for zone-awareness
## 0.4.0 (2023-07-27)
- [10019](https://github.com/grafana/loki/pull/10019) **periklis**: Update Loki operand to v2.8.3

@ -990,6 +990,8 @@ const (
ReasonFailedCertificateRotation LokiStackConditionReason = "FailedCertificateRotation"
// ReasonQueryTimeoutInvalid when the QueryTimeout can not be parsed.
ReasonQueryTimeoutInvalid LokiStackConditionReason = "ReasonQueryTimeoutInvalid"
// ReasonNoZoneAwareNodes when the cluster does not contain any nodes with the labels needed for zone-awareness.
ReasonNoZoneAwareNodes LokiStackConditionReason = "ReasonNoZoneAwareNodes"
)
// PodStatusMap defines the type for mapping pod status to pod name.

@ -13,12 +13,14 @@ import (
apierrors "k8s.io/apimachinery/pkg/api/errors"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
ctrl "sigs.k8s.io/controller-runtime"
"sigs.k8s.io/controller-runtime/pkg/client"
)
const (
messageReady = "All components ready"
messageFailed = "Some LokiStack components failed"
messagePending = "Some LokiStack components pending on dependencies"
messageReady = "All components ready"
messageFailed = "Some LokiStack components failed"
messagePending = "Some LokiStack components pending on dependencies"
messageDegradedNodeLabels = "Cluster contains no nodes matching the labels used for zone-awareness"
)
var (
@ -37,6 +39,11 @@ var (
Message: messageReady,
Reason: string(lokiv1.ReasonReadyComponents),
}
conditionDegradedNodeLabels = metav1.Condition{
Type: string(lokiv1.ConditionDegraded),
Message: messageDegradedNodeLabels,
Reason: string(lokiv1.ReasonNoZoneAwareNodes),
}
)
// DegradedError contains information about why the managed LokiStack has an invalid configuration.
@ -61,7 +68,7 @@ func SetDegradedCondition(ctx context.Context, k k8s.Client, req ctrl.Request, m
return updateCondition(ctx, k, req, degraded)
}
func generateCondition(cs *lokiv1.LokiStackComponentStatus) metav1.Condition {
func generateCondition(ctx context.Context, cs *lokiv1.LokiStackComponentStatus, k client.Client, req ctrl.Request, stack *lokiv1.LokiStack) (metav1.Condition, error) {
// Check for failed pods first
failed := len(cs.Compactor[corev1.PodFailed]) +
len(cs.Distributor[corev1.PodFailed]) +
@ -73,7 +80,7 @@ func generateCondition(cs *lokiv1.LokiStackComponentStatus) metav1.Condition {
len(cs.Ruler[corev1.PodFailed])
if failed != 0 {
return conditionFailed
return conditionFailed, nil
}
// Check for pending pods
@ -87,10 +94,37 @@ func generateCondition(cs *lokiv1.LokiStackComponentStatus) metav1.Condition {
len(cs.Ruler[corev1.PodPending])
if pending != 0 {
return conditionPending
if stack.Spec.Replication != nil && len(stack.Spec.Replication.Zones) > 0 {
// When there are pending pods and zone-awareness is enabled check if there are any nodes
// that can satisfy the constraints and emit a condition if not.
nodesOk, err := checkForZoneawareNodes(ctx, k, stack.Spec.Replication.Zones)
if err != nil {
return metav1.Condition{}, err
}
if !nodesOk {
return conditionDegradedNodeLabels, nil
}
}
return conditionPending, nil
}
return conditionReady, nil
}
func checkForZoneawareNodes(ctx context.Context, k client.Client, zones []lokiv1.ZoneSpec) (bool, error) {
nodeLabels := client.HasLabels{}
for _, z := range zones {
nodeLabels = append(nodeLabels, z.TopologyKey)
}
nodeList := &corev1.NodeList{}
if err := k.List(ctx, nodeList, nodeLabels); err != nil {
return false, err
}
return conditionReady
return len(nodeList.Items) > 0, nil
}
func updateCondition(ctx context.Context, k k8s.Client, req ctrl.Request, condition metav1.Condition) error {

@ -2,6 +2,7 @@ package status
import (
"context"
"errors"
"testing"
lokiv1 "github.com/grafana/loki/operator/apis/loki/v1"
@ -152,7 +153,23 @@ func TestSetDegradedCondition_WhenNoneExisting_AppendDegradedCondition(t *testin
require.NotZero(t, sw.UpdateCallCount())
}
func TestGenerateConditions(t *testing.T) {
func TestGenerateCondition(t *testing.T) {
k := &k8sfakes.FakeClient{}
r := ctrl.Request{
NamespacedName: types.NamespacedName{
Name: "test-lokistack",
Namespace: "some-ns",
},
}
lokiStack := lokiv1.LokiStack{
TypeMeta: metav1.TypeMeta{
Kind: "LokiStack",
},
ObjectMeta: metav1.ObjectMeta{
Name: "test-lokistack",
Namespace: "test-ns",
},
}
tt := []struct {
desc string
componentStatus *lokiv1.LokiStackComponentStatus
@ -192,7 +209,95 @@ func TestGenerateConditions(t *testing.T) {
t.Run(tc.desc, func(t *testing.T) {
t.Parallel()
condition := generateCondition(tc.componentStatus)
condition, err := generateCondition(context.TODO(), tc.componentStatus, k, r, &lokiStack)
require.Nil(t, err)
require.Equal(t, tc.wantCondition, condition)
})
}
}
func TestGenerateCondition_ZoneAwareLokiStack(t *testing.T) {
testError := errors.New("test-error")
tt := []struct {
desc string
nodes []corev1.Node
wantCondition metav1.Condition
wantErr error
}{
{
desc: "nodes available",
nodes: []corev1.Node{
{},
},
wantCondition: conditionPending,
},
{
desc: "no nodes available",
nodes: []corev1.Node{},
wantCondition: conditionDegradedNodeLabels,
},
{
desc: "api error",
nodes: []corev1.Node{},
wantErr: testError,
},
}
for _, tc := range tt {
tc := tc
t.Run(tc.desc, func(t *testing.T) {
t.Parallel()
r := ctrl.Request{
NamespacedName: types.NamespacedName{
Name: "test-lokistack",
Namespace: "some-ns",
},
}
componentStatus := &lokiv1.LokiStackComponentStatus{
Ingester: map[corev1.PodPhase][]string{
corev1.PodPending: {
"pod-0",
},
},
}
lokiStack := lokiv1.LokiStack{
Spec: lokiv1.LokiStackSpec{
Replication: &lokiv1.ReplicationSpec{
Zones: []lokiv1.ZoneSpec{
{
TopologyKey: "topology-key",
},
},
},
},
ObjectMeta: metav1.ObjectMeta{
Name: "test-lokistack",
Namespace: "test-ns",
},
TypeMeta: metav1.TypeMeta{
Kind: "LokiStack",
},
}
k, _ := setupFakesNoError(t, &lokiStack)
k.ListStub = func(_ context.Context, ol client.ObjectList, options ...client.ListOption) error {
for _, o := range options {
if labels, ok := o.(client.HasLabels); ok {
require.Len(t, labels, 1)
require.Equal(t, "topology-key", labels[0])
}
}
k.SetClientObjectList(ol, &corev1.NodeList{
Items: tc.nodes,
})
return tc.wantErr
}
condition, err := generateCondition(context.TODO(), componentStatus, k, r, &lokiStack)
require.Equal(t, tc.wantErr, err)
require.Equal(t, tc.wantCondition, condition)
})
}

@ -31,7 +31,10 @@ func Refresh(ctx context.Context, k k8s.Client, req ctrl.Request, now time.Time)
return err
}
condition := generateCondition(cs)
condition, err := generateCondition(ctx, cs, k, req, &stack)
if err != nil {
return err
}
condition.LastTransitionTime = metav1.NewTime(now)
condition.Status = metav1.ConditionTrue

@ -12,6 +12,7 @@ import (
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/types"
ctrl "sigs.k8s.io/controller-runtime"
"sigs.k8s.io/controller-runtime/pkg/client"
)
func TestRefreshSuccess(t *testing.T) {
@ -81,3 +82,67 @@ func TestRefreshSuccess(t *testing.T) {
require.Equal(t, wantStatus, updatedStack.Status)
}
func TestRefreshSuccess_ZoneAwarePendingPod(t *testing.T) {
now := time.Now()
req := ctrl.Request{
NamespacedName: types.NamespacedName{
Name: "test-stack",
Namespace: "test-ns",
},
}
stack := lokiv1.LokiStack{
ObjectMeta: metav1.ObjectMeta{
Name: "test-stack",
Namespace: "test-ns",
},
Spec: lokiv1.LokiStackSpec{
Replication: &lokiv1.ReplicationSpec{
Zones: []lokiv1.ZoneSpec{
{
TopologyKey: corev1.LabelTopologyZone,
},
},
},
},
}
testPod := corev1.Pod{
Status: corev1.PodStatus{
Phase: corev1.PodPending,
},
}
k, sw := setupFakesNoError(t, &stack)
k.ListStub = func(ctx context.Context, ol client.ObjectList, _ ...client.ListOption) error {
switch ol.(type) {
case *corev1.PodList:
k.SetClientObjectList(ol, &corev1.PodList{
Items: []corev1.Pod{
testPod,
},
})
case *corev1.NodeList:
k.SetClientObjectList(ol, &corev1.NodeList{
Items: []corev1.Node{},
})
}
return nil
}
err := Refresh(context.Background(), k, req, now)
require.NoError(t, err)
require.Equal(t, 1, k.GetCallCount())
require.Equal(t, 9, k.ListCallCount())
require.Equal(t, 1, sw.UpdateCallCount())
_, updated, _ := sw.UpdateArgsForCall(0)
updatedStack, ok := updated.(*lokiv1.LokiStack)
if !ok {
t.Fatalf("not a LokiStack: %T", updatedStack)
}
require.Len(t, updatedStack.Status.Conditions, 1)
condition := updatedStack.Status.Conditions[0]
require.Equal(t, conditionDegradedNodeLabels.Reason, condition.Reason)
require.Equal(t, conditionDegradedNodeLabels.Type, condition.Type)
}

Loading…
Cancel
Save