From c5a6bb5ab1ea528ebae9b577f3527192fba101ed Mon Sep 17 00:00:00 2001 From: Andres Martinez Gotor Date: Wed, 25 Jun 2025 12:08:30 +0200 Subject: [PATCH] Advisor: Address error logs (#107125) --- .../pkg/app/checkscheduler/checkscheduler.go | 12 ++++++------ .../checktyperegisterer/checktyperegisterer.go | 11 ++++++++--- .../checktyperegisterer_test.go | 18 ++++++++++++++++++ apps/advisor/pkg/app/utils.go | 6 +++--- apps/advisor/pkg/app/utils_test.go | 5 ++--- 5 files changed, 37 insertions(+), 15 deletions(-) diff --git a/apps/advisor/pkg/app/checkscheduler/checkscheduler.go b/apps/advisor/pkg/app/checkscheduler/checkscheduler.go index adf0cc1569d..10706c53fd8 100644 --- a/apps/advisor/pkg/app/checkscheduler/checkscheduler.go +++ b/apps/advisor/pkg/app/checkscheduler/checkscheduler.go @@ -85,7 +85,7 @@ func New(cfg app.Config, log logging.Logger) (app.Runnable, error) { func (r *Runner) Run(ctx context.Context) error { logger := r.log.WithContext(ctx) - lastCreated, err := r.checkLastCreated(ctx, logger) + lastCreated, err := r.checkLastCreated(context.WithoutCancel(ctx), logger) if err != nil { logger.Error("Error getting last check creation time", "error", err) // Wait for interval to create the next scheduled check @@ -93,7 +93,7 @@ func (r *Runner) Run(ctx context.Context) error { } else { // do an initial creation if necessary if lastCreated.IsZero() { - err = r.createChecks(ctx, logger) + err = r.createChecks(context.WithoutCancel(ctx), logger) if err != nil { logger.Error("Error creating new check reports", "error", err) } else { @@ -109,12 +109,12 @@ func (r *Runner) Run(ctx context.Context) error { for { select { case <-ticker.C: - err = r.createChecks(ctx, logger) + err = r.createChecks(context.WithoutCancel(ctx), logger) if err != nil { logger.Error("Error creating new check reports", "error", err) } - err = r.cleanupChecks(ctx, logger) + err = r.cleanupChecks(context.WithoutCancel(ctx), logger) if err != nil { logger.Error("Error cleaning up old check reports", "error", err) } @@ -147,7 +147,7 @@ func (r *Runner) checkLastCreated(ctx context.Context, log logging.Logger) (time // If the check is unprocessed, set it to error if checks.GetStatusAnnotation(item) == "" { - log.Error("Check is unprocessed", "check", item.GetStaticMetadata().Identifier()) + log.Info("Check is unprocessed, marking as error", "check", item.GetStaticMetadata().Identifier()) err := checks.SetStatusAnnotation(ctx, r.client, item, checks.StatusAnnotationError) if err != nil { log.Error("Error setting check status to error", "error", err) @@ -168,7 +168,7 @@ func (r *Runner) createChecks(ctx context.Context, logger logging.Logger) error allChecksRegistered := len(list.GetItems()) == len(r.checkRegistry.Checks()) retryCount := 0 for !allChecksRegistered && retryCount < waitMaxRetries { - logger.Error("Waiting for all check types to be registered", "retryCount", retryCount, "waitInterval", waitInterval) + logger.Info("Waiting for all check types to be registered", "retryCount", retryCount, "waitInterval", waitInterval) time.Sleep(waitInterval) list, err = r.typesClient.List(ctx, r.namespace, resource.ListOptions{}) if err != nil { diff --git a/apps/advisor/pkg/app/checktyperegisterer/checktyperegisterer.go b/apps/advisor/pkg/app/checktyperegisterer/checktyperegisterer.go index aaf9cc80d86..53d45d460a7 100644 --- a/apps/advisor/pkg/app/checktyperegisterer/checktyperegisterer.go +++ b/apps/advisor/pkg/app/checktyperegisterer/checktyperegisterer.go @@ -4,6 +4,7 @@ import ( "context" "fmt" "maps" + "strings" "time" "github.com/grafana/grafana-app-sdk/app" @@ -81,7 +82,7 @@ func (r *Runner) createOrUpdate(ctx context.Context, log logging.Logger, obj res _, err = r.client.Update(ctx, id, obj, resource.UpdateOptions{}) if err != nil && !errors.IsAlreadyExists(err) { // Ignore the error, it's probably due to a race condition - log.Error("Error updating check type", "error", err) + log.Info("Error updating check type, ignoring", "error", err) } return nil } @@ -123,9 +124,13 @@ func (r *Runner) Run(ctx context.Context) error { for i := 0; i < r.retryAttempts; i++ { err := r.createOrUpdate(context.WithoutCancel(ctx), logger, obj) if err != nil { - logger.Error("Error creating check type, retrying", "error", err, "attempt", i+1) + if strings.Contains(err.Error(), "apiserver is shutting down") { + logger.Debug("Error creating check type, not retrying", "error", err) + return nil + } + logger.Debug("Error creating check type, retrying", "error", err, "attempt", i+1) if i == r.retryAttempts-1 { - logger.Error("Unable to register check type") + logger.Error("Unable to register check type", "check_type", t.ID(), "error", err) } else { // Calculate exponential backoff delay: baseDelay * 2^attempt delay := r.retryDelay * time.Duration(1<