From 5c755e65ba61eda6d8444d59f21ebe45e7d35e51 Mon Sep 17 00:00:00 2001 From: Yauheni Kaliuta Date: Mon, 5 Aug 2024 15:34:39 +0300 Subject: [PATCH] upgrade: cleanup CreateWithRetry usage (#1145) * cluster: CreateWithRetry: make work for already existing object Intention of the function was to ensure that the object is created and retry in case of webhook is temporary not available. The original code lacks handling already existing object. Handling with and without webhook configurations make the things more complicated. Let's check Create() errors for different cases: If webhook enabled: - no error (err == nil) - 500 InternalError likely if webhook is not available (yet) - 403 Forbidden if webhook blocks creation (check of existance) - some problem (real error) else, if webhook disabled: - no error (err == nil) - 409 AlreadyExists if object exists - some problem (real error) Check already existing object after Create() with a call to Get(). It covers both with and without webhook configurations. Reuse the same object for Get() to avoid fetching Gvk for it. It doesn't make harm, the object structure is not used after creation. 500 InternalError is not 100% webhook problem, but consider it as a reason for retry. Fixes: e26100e87e53 ("upgrade: retry if default DSCI creation fails (#1008)") Signed-off-by: Yauheni Kaliuta * upgrade: cleanup CreateWithRetry usage CreateWithRetry() now checks AlreadyExists condition inside, so skip its handling. sleep() is not needed for DSCI with proper working CreateWithRetry since it is the actual point of existance of the function. Keep checking number of DSCI instances for non-webhook configuration. Basically, using CreateWithRetry for DSC is redundant from webhook unavailability point of view since it is created after DSCI which garantees working webhook. But it handles already existed object. Signed-off-by: Yauheni Kaliuta --------- Signed-off-by: Yauheni Kaliuta --- pkg/cluster/resources.go | 33 ++++++++++++++++++++++++++++----- pkg/upgrade/upgrade.go | 14 ++------------ 2 files changed, 30 insertions(+), 17 deletions(-) diff --git a/pkg/cluster/resources.go b/pkg/cluster/resources.go index d7c5e0d08a2..e5dcf34dd8f 100644 --- a/pkg/cluster/resources.go +++ b/pkg/cluster/resources.go @@ -206,11 +206,34 @@ func CreateWithRetry(ctx context.Context, cli client.Client, obj client.Object, timeout := time.Duration(timeoutMin) * time.Minute return wait.PollUntilContextTimeout(ctx, interval, timeout, true, func(ctx context.Context) (bool, error) { - err := cli.Create(ctx, obj) - if err != nil { - fmt.Printf("Error creating object: %v. Retrying...\n", err) - return false, err + // Create can return: + // If webhook enabled: + // - no error (err == nil) + // - 500 InternalError likely if webhook is not available (yet) + // - 403 Forbidden if webhook blocks creation (check of existence) + // - some problem (real error) + // else, if webhook disabled: + // - no error (err == nil) + // - 409 AlreadyExists if object exists + // - some problem (real error) + errCreate := cli.Create(ctx, obj) + if errCreate == nil { + return true, nil } - return true, nil + + // check existence, success case for the function, covers 409 and 403 (or newly created) + errGet := cli.Get(ctx, client.ObjectKeyFromObject(obj), obj) + if errGet == nil { + return true, nil + } + + // retry if 500, assume webhook is not available + if k8serr.IsInternalError(errCreate) { + fmt.Printf("Error creating object: %v. Retrying...\n", errCreate) + return false, nil + } + + // some other error + return false, errCreate }) } diff --git a/pkg/upgrade/upgrade.go b/pkg/upgrade/upgrade.go index 72a6d6d5cc8..e42929d015e 100644 --- a/pkg/upgrade/upgrade.go +++ b/pkg/upgrade/upgrade.go @@ -7,7 +7,6 @@ import ( "errors" "fmt" "reflect" - "time" "github.com/hashicorp/go-multierror" operatorv1 "github.com/openshift/api/operator/v1" @@ -104,17 +103,9 @@ func CreateDefaultDSC(ctx context.Context, cli client.Client) error { }, } err := cluster.CreateWithRetry(ctx, cli, releaseDataScienceCluster, 1) // 1 min timeout - switch { - case err == nil: - fmt.Printf("created DataScienceCluster resource\n") - case k8serr.IsAlreadyExists(err): - // Do not update the DSC if it already exists - fmt.Printf("DataScienceCluster resource already exists. It will not be updated with default DSC.\n") - return nil - default: + if err != nil { return fmt.Errorf("failed to create DataScienceCluster custom resource: %w", err) } - return nil } @@ -167,9 +158,8 @@ func CreateDefaultDSCI(ctx context.Context, cli client.Client, _ cluster.Platfor return nil case len(instances.Items) == 0: fmt.Println("create default DSCI CR.") - time.Sleep(10 * time.Second) // put 10 seconds sleep for webhook to fully functional before request first creation err := cluster.CreateWithRetry(ctx, cli, defaultDsci, 1) // 1 min timeout - if err != nil && !k8serr.IsAlreadyExists(err) { + if err != nil { return err } }