Advisor: Address error logs (#107125)

pull/107175/head
Andres Martinez Gotor 2 days ago committed by GitHub
parent f621e2f507
commit c5a6bb5ab1
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
  1. 12
      apps/advisor/pkg/app/checkscheduler/checkscheduler.go
  2. 11
      apps/advisor/pkg/app/checktyperegisterer/checktyperegisterer.go
  3. 18
      apps/advisor/pkg/app/checktyperegisterer/checktyperegisterer_test.go
  4. 6
      apps/advisor/pkg/app/utils.go
  5. 5
      apps/advisor/pkg/app/utils_test.go

@ -85,7 +85,7 @@ func New(cfg app.Config, log logging.Logger) (app.Runnable, error) {
func (r *Runner) Run(ctx context.Context) error {
logger := r.log.WithContext(ctx)
lastCreated, err := r.checkLastCreated(ctx, logger)
lastCreated, err := r.checkLastCreated(context.WithoutCancel(ctx), logger)
if err != nil {
logger.Error("Error getting last check creation time", "error", err)
// Wait for interval to create the next scheduled check
@ -93,7 +93,7 @@ func (r *Runner) Run(ctx context.Context) error {
} else {
// do an initial creation if necessary
if lastCreated.IsZero() {
err = r.createChecks(ctx, logger)
err = r.createChecks(context.WithoutCancel(ctx), logger)
if err != nil {
logger.Error("Error creating new check reports", "error", err)
} else {
@ -109,12 +109,12 @@ func (r *Runner) Run(ctx context.Context) error {
for {
select {
case <-ticker.C:
err = r.createChecks(ctx, logger)
err = r.createChecks(context.WithoutCancel(ctx), logger)
if err != nil {
logger.Error("Error creating new check reports", "error", err)
}
err = r.cleanupChecks(ctx, logger)
err = r.cleanupChecks(context.WithoutCancel(ctx), logger)
if err != nil {
logger.Error("Error cleaning up old check reports", "error", err)
}
@ -147,7 +147,7 @@ func (r *Runner) checkLastCreated(ctx context.Context, log logging.Logger) (time
// If the check is unprocessed, set it to error
if checks.GetStatusAnnotation(item) == "" {
log.Error("Check is unprocessed", "check", item.GetStaticMetadata().Identifier())
log.Info("Check is unprocessed, marking as error", "check", item.GetStaticMetadata().Identifier())
err := checks.SetStatusAnnotation(ctx, r.client, item, checks.StatusAnnotationError)
if err != nil {
log.Error("Error setting check status to error", "error", err)
@ -168,7 +168,7 @@ func (r *Runner) createChecks(ctx context.Context, logger logging.Logger) error
allChecksRegistered := len(list.GetItems()) == len(r.checkRegistry.Checks())
retryCount := 0
for !allChecksRegistered && retryCount < waitMaxRetries {
logger.Error("Waiting for all check types to be registered", "retryCount", retryCount, "waitInterval", waitInterval)
logger.Info("Waiting for all check types to be registered", "retryCount", retryCount, "waitInterval", waitInterval)
time.Sleep(waitInterval)
list, err = r.typesClient.List(ctx, r.namespace, resource.ListOptions{})
if err != nil {

@ -4,6 +4,7 @@ import (
"context"
"fmt"
"maps"
"strings"
"time"
"github.com/grafana/grafana-app-sdk/app"
@ -81,7 +82,7 @@ func (r *Runner) createOrUpdate(ctx context.Context, log logging.Logger, obj res
_, err = r.client.Update(ctx, id, obj, resource.UpdateOptions{})
if err != nil && !errors.IsAlreadyExists(err) {
// Ignore the error, it's probably due to a race condition
log.Error("Error updating check type", "error", err)
log.Info("Error updating check type, ignoring", "error", err)
}
return nil
}
@ -123,9 +124,13 @@ func (r *Runner) Run(ctx context.Context) error {
for i := 0; i < r.retryAttempts; i++ {
err := r.createOrUpdate(context.WithoutCancel(ctx), logger, obj)
if err != nil {
logger.Error("Error creating check type, retrying", "error", err, "attempt", i+1)
if strings.Contains(err.Error(), "apiserver is shutting down") {
logger.Debug("Error creating check type, not retrying", "error", err)
return nil
}
logger.Debug("Error creating check type, retrying", "error", err, "attempt", i+1)
if i == r.retryAttempts-1 {
logger.Error("Unable to register check type")
logger.Error("Unable to register check type", "check_type", t.ID(), "error", err)
} else {
// Calculate exponential backoff delay: baseDelay * 2^attempt
delay := r.retryDelay * time.Duration(1<<i)

@ -123,6 +123,24 @@ func TestCheckTypesRegisterer_Run(t *testing.T) {
},
expectedErr: errors.New("update error"),
},
{
name: "shutting down error",
checks: []checks.Check{
&mockCheck{
id: "check1",
steps: []checks.Step{
&mockStep{id: "step1", title: "Step 1", description: "Description 1"},
},
},
},
createFunc: func(ctx context.Context, id resource.Identifier, obj resource.Object, opts resource.CreateOptions) (resource.Object, error) {
return nil, k8sErrs.NewAlreadyExists(schema.GroupResource{}, obj.GetName())
},
updateFunc: func(ctx context.Context, id resource.Identifier, obj resource.Object, opts resource.UpdateOptions) (resource.Object, error) {
return nil, errors.New("apiserver is shutting down")
},
expectedErr: nil,
},
{
name: "custom namespace",
checks: []checks.Check{

@ -53,7 +53,7 @@ func processCheck(ctx context.Context, log logging.Logger, client resource.Clien
if setErr != nil {
return setErr
}
return fmt.Errorf("error initializing check: %w", err)
return fmt.Errorf("error listing items for check: %w", err)
}
// Get the check type
var checkType resource.Object
@ -130,7 +130,7 @@ func processCheckRetry(ctx context.Context, log logging.Logger, client resource.
if setErr != nil {
return setErr
}
return fmt.Errorf("error initializing check: %w", err)
return fmt.Errorf("error getting item for check: %w", err)
}
if item != nil {
// Get the check type
@ -206,7 +206,7 @@ func runStepsInParallel(ctx context.Context, log logging.Logger, spec *advisorv0
func() {
defer func() {
if r := recover(); r != nil {
err = fmt.Errorf("panic recovered in step %s: %v", step.ID(), r)
log.Error("panic recovered in step", "step", step.ID(), "error", r, "item", item)
}
}()
logger := log.With("step", step.ID())

@ -178,9 +178,8 @@ func TestProcessCheck_RunRecoversFromPanic(t *testing.T) {
}
err = processCheck(ctx, logging.DefaultLogger, client, typesClient, obj, check)
assert.Error(t, err)
assert.Contains(t, err.Error(), "panic recovered in step")
assert.Equal(t, checks.StatusAnnotationError, obj.GetAnnotations()[checks.StatusAnnotation])
assert.NoError(t, err)
assert.Equal(t, checks.StatusAnnotationProcessed, obj.GetAnnotations()[checks.StatusAnnotation])
}
func TestProcessCheckRetry_NoRetry(t *testing.T) {

Loading…
Cancel
Save