diff --git a/cmd/main.go b/cmd/main.go index 0d4ffd31..60a0dddc 100644 --- a/cmd/main.go +++ b/cmd/main.go @@ -57,6 +57,7 @@ func init() { const defaultMinimalRotationTimeRatio = 0.6 const defaultExpirationTime = 24 * time.Hour const defaultRuntimeReconcilerEnabled = false +const defaultGardenerRequestTimeout = 60 * time.Second func main() { var metricsAddr string @@ -66,6 +67,7 @@ func main() { var gardenerProjectName string var minimalRotationTimeRatio float64 var expirationTime time.Duration + var gardenerRequestTimeout time.Duration var enableRuntimeReconciler bool var persistShoot bool @@ -78,6 +80,7 @@ func main() { flag.StringVar(&gardenerProjectName, "gardener-project-name", "gardener-project", "Name of the Gardener project") flag.Float64Var(&minimalRotationTimeRatio, "minimal-rotation-time", defaultMinimalRotationTimeRatio, "The ratio determines what is the minimal time that needs to pass to rotate certificate.") flag.DurationVar(&expirationTime, "kubeconfig-expiration-time", defaultExpirationTime, "Dynamic kubeconfig expiration time") + flag.DurationVar(&gardenerRequestTimeout, "gardener-request-timeout", defaultGardenerRequestTimeout, "Timeout duration for requests to Gardener") flag.BoolVar(&enableRuntimeReconciler, "runtime-reconciler-enabled", defaultRuntimeReconcilerEnabled, "Feature flag for all runtime reconciler functionalities") flag.BoolVar(&persistShoot, "persist-shoot", false, "Feature flag to allow persisting created shoots") @@ -139,6 +142,7 @@ func main() { logger, rotationPeriod, minimalRotationTimeRatio, + gardenerRequestTimeout, metrics, ).SetupWithManager(mgr); err != nil { setupLog.Error(err, "unable to create controller", "controller", "GardenerCluster") diff --git a/config/default/manager_gardener_secret_patch.yaml b/config/default/manager_gardener_secret_patch.yaml index aceda89d..9a2b71c9 100644 --- a/config/default/manager_gardener_secret_patch.yaml +++ b/config/default/manager_gardener_secret_patch.yaml @@ -22,6 +22,7 @@ spec: - --gardener-project-name=kyma-dev - --kubeconfig-expiration-time=24h - --minimal-rotation-time=0.6 + - --gardener-request-timeout=60s - --runtime-reconciler-enabled=false volumeMounts: - name: gardener-kubeconfig diff --git a/docs/README.md b/docs/README.md index 6c9efe5f..f2a23fae 100644 --- a/docs/README.md +++ b/docs/README.md @@ -15,6 +15,7 @@ You can configure the Infrastructure Manager deployment with the following argum 2. `gardener-project` - the name of the Gardener project where the infrastructure operations are performed 3. `minimal-rotation-time` - the ratio determines what is the minimal time that needs to pass to rotate the certificate 4. `kubeconfig-expiration-time` - maximum time after which kubeconfig is rotated. The rotation happens between (`minimal-rotation-time` * `kubeconfig-expiration-time`) and `kubeconfig-expiration-time`. +4. `gardener-request-timeout` - specifies the timeout for requests to Gardener. Default value is `60s`. 5. `runtime-reconciler-enabled` - feature flag responsible for enabling the runtime reconciler. Default value is `false`. diff --git a/hack/runtime-migrator/main.go b/hack/runtime-migrator/main.go index e13e4ec6..97791825 100644 --- a/hack/runtime-migrator/main.go +++ b/hack/runtime-migrator/main.go @@ -31,22 +31,25 @@ const ( ShootNetworkingFilterExtensionType = "shoot-networking-filter" runtimeCrFullPath = "%sshoot-%s.yaml" runtimeIDAnnotation = "kcp.provisioner.kyma-project.io/runtime-id" + contextTimeout = 5 * time.Minute ) func main() { cfg := migrator.NewConfig() + migratorContext, cancel := context.WithTimeout(context.Background(), contextTimeout) + defer cancel() runtimeIDs := getRuntimeIDsFromStdin(cfg) gardenerNamespace := fmt.Sprintf("garden-%s", cfg.GardenerProjectName) - list := getShootList(cfg, gardenerNamespace) + list := getShootList(migratorContext, cfg, gardenerNamespace) provider, err := setupKubernetesKubeconfigProvider(cfg.GardenerKubeconfigPath, gardenerNamespace, expirationTime) if err != nil { - log.Fatal("failed to create kubeconfig provider - ", err) + log.Print("failed to create kubeconfig provider - ", err) } kcpClient, err := migrator.CreateKcpClient(&cfg) if err != nil { - log.Fatal("failed to create kcp client - ", kcpClient) + log.Print("failed to create kcp client - ", kcpClient) } results := make([]migrator.MigrationResult, 0) @@ -72,13 +75,13 @@ func main() { continue } - runtime, runtimeCrErr := createRuntime(shoot, cfg, provider) + runtime, runtimeCrErr := createRuntime(migratorContext, shoot, cfg, provider) if runtimeCrErr != nil { results = appendResult(results, shoot, migrator.StatusFailedToCreateRuntimeCR, runtimeCrErr) continue } - err := saveRuntime(cfg, runtime, kcpClient) + err := saveRuntime(migratorContext, cfg, runtime, kcpClient) if err != nil { log.Printf("Failed to apply runtime CR, %s\n", err) @@ -147,9 +150,9 @@ func getRuntimeIDsFromStdin(cfg migrator.Config) []string { return runtimeIDs } -func saveRuntime(cfg migrator.Config, runtime v1.Runtime, getClient client.Client) error { +func saveRuntime(ctx context.Context, cfg migrator.Config, runtime v1.Runtime, getClient client.Client) error { if !cfg.IsDryRun { - err := getClient.Create(context.Background(), &runtime) + err := getClient.Create(ctx, &runtime) if err != nil { return err @@ -160,12 +163,12 @@ func saveRuntime(cfg migrator.Config, runtime v1.Runtime, getClient client.Clien return nil } -func createRuntime(shoot v1beta1.Shoot, cfg migrator.Config, provider kubeconfig.Provider) (v1.Runtime, error) { - var subjects = getAdministratorsList(provider, shoot.Name) +func createRuntime(ctx context.Context, shoot v1beta1.Shoot, cfg migrator.Config, provider kubeconfig.Provider) (v1.Runtime, error) { + var subjects = getAdministratorsList(ctx, provider, shoot.Name) var oidcConfig = getOidcConfig(shoot) var hAFailureToleranceType = getFailureToleranceType(shoot) var licenceType = shoot.Annotations["kcp.provisioner.kyma-project.io/licence-type"] - labels, err := getAllRuntimeLabels(shoot, cfg.Client) + labels, err := getAllRuntimeLabels(ctx, shoot, cfg.Client) if err != nil { return v1.Runtime{}, err } @@ -267,11 +270,11 @@ func checkIfShootNetworkFilteringEnabled(shoot v1beta1.Shoot) bool { return false } -func getShootList(cfg migrator.Config, gardenerNamespace string) *v1beta1.ShootList { +func getShootList(ctx context.Context, cfg migrator.Config, gardenerNamespace string) *v1beta1.ShootList { gardenerShootClient := setupGardenerShootClient(cfg.GardenerKubeconfigPath, gardenerNamespace) - list, err := gardenerShootClient.List(context.Background(), metav1.ListOptions{}) + list, err := gardenerShootClient.List(ctx, metav1.ListOptions{}) if err != nil { - log.Fatal("Failed to retrieve shoots from Gardener - ", err) + log.Print("Failed to retrieve shoots from Gardener - ", err) } return list @@ -286,8 +289,8 @@ func getFailureToleranceType(shoot v1beta1.Shoot) v1beta1.FailureToleranceType { return "" } -func getAdministratorsList(provider kubeconfig.Provider, shootName string) []string { - var kubeconfig, err = provider.Fetch(context.Background(), shootName) +func getAdministratorsList(ctx context.Context, provider kubeconfig.Provider, shootName string) []string { + var kubeconfig, err = provider.Fetch(ctx, shootName) if kubeconfig == "" { log.Printf("Failed to get dynamic kubeconfig for shoot %s, %s\n", shootName, err.Error()) return []string{} @@ -304,7 +307,7 @@ func getAdministratorsList(provider kubeconfig.Provider, shootName string) []str log.Printf("Failed to create clientset from restconfig - %s\n", err) } - var clusterRoleBindings, _ = clientset.RbacV1().ClusterRoleBindings().List(context.Background(), metav1.ListOptions{ + var clusterRoleBindings, _ = clientset.RbacV1().ClusterRoleBindings().List(ctx, metav1.ListOptions{ LabelSelector: "reconciler.kyma-project.io/managed-by=reconciler,app=kyma", }) @@ -381,7 +384,7 @@ func setupKubernetesKubeconfigProvider(kubeconfigPath string, namespace string, int64(expirationTime.Seconds())), nil } -func getAllRuntimeLabels(shoot v1beta1.Shoot, getClient migrator.GetClient) (map[string]string, error) { +func getAllRuntimeLabels(ctx context.Context, shoot v1beta1.Shoot, getClient migrator.GetClient) (map[string]string, error) { enrichedRuntimeLabels := map[string]string{} var err error @@ -393,7 +396,7 @@ func getAllRuntimeLabels(shoot v1beta1.Shoot, getClient migrator.GetClient) (map } gardenerCluster := v1.GardenerCluster{} shootKey := types.NamespacedName{Name: shoot.Name, Namespace: "kcp-system"} - getGardenerCRerr := k8sClient.Get(context.Background(), shootKey, &gardenerCluster) + getGardenerCRerr := k8sClient.Get(ctx, shootKey, &gardenerCluster) if getGardenerCRerr != nil { var errMsg = fmt.Sprintf("Failed to retrieve GardenerCluster CR for shoot %s\n", shoot.Name) return map[string]string{}, errors.Wrap(getGardenerCRerr, errMsg) diff --git a/internal/controller/kubeconfig/gardener_cluster_controller.go b/internal/controller/kubeconfig/gardener_cluster_controller.go index b3322cb0..c532e203 100644 --- a/internal/controller/kubeconfig/gardener_cluster_controller.go +++ b/internal/controller/kubeconfig/gardener_cluster_controller.go @@ -53,10 +53,11 @@ type GardenerClusterController struct { log logr.Logger rotationPeriod time.Duration minimalRotationTimeRatio float64 + gardenerRequestTimeout time.Duration metrics metrics.Metrics } -func NewGardenerClusterController(mgr ctrl.Manager, kubeconfigProvider KubeconfigProvider, logger logr.Logger, rotationPeriod time.Duration, minimalRotationTimeRatio float64, metrics metrics.Metrics) *GardenerClusterController { +func NewGardenerClusterController(mgr ctrl.Manager, kubeconfigProvider KubeconfigProvider, logger logr.Logger, rotationPeriod time.Duration, minimalRotationTimeRatio float64, gardenerRequestTimeout time.Duration, metrics metrics.Metrics) *GardenerClusterController { return &GardenerClusterController{ Client: mgr.GetClient(), Scheme: mgr.GetScheme(), @@ -64,6 +65,7 @@ func NewGardenerClusterController(mgr ctrl.Manager, kubeconfigProvider Kubeconfi log: logger, rotationPeriod: rotationPeriod, minimalRotationTimeRatio: minimalRotationTimeRatio, + gardenerRequestTimeout: gardenerRequestTimeout, metrics: metrics, } } @@ -90,15 +92,17 @@ type KubeconfigProvider interface { // - https://pkg.go.dev/sigs.k8s.io/controller-runtime@v0.15.0/pkg/reconcile func (controller *GardenerClusterController) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) { //nolint:revive controller.log.Info("Starting reconciliation.", loggingContext(req)...) + reconciliationContext, cancel := context.WithTimeout(ctx, controller.gardenerRequestTimeout) + defer cancel() var cluster imv1.GardenerCluster - err := controller.Get(ctx, req.NamespacedName, &cluster) + err := controller.Get(reconciliationContext, req.NamespacedName, &cluster) if err != nil { if k8serrors.IsNotFound(err) { controller.unsetMetrics(req) - err = controller.deleteKubeconfigSecret(ctx, req.Name) + err = controller.deleteKubeconfigSecret(reconciliationContext, req.Name) } if err == nil { @@ -108,10 +112,10 @@ func (controller *GardenerClusterController) Reconcile(ctx context.Context, req return controller.resultWithoutRequeue(&cluster), err } - secret, err := controller.getSecret(cluster.Spec.Shoot.Name) + secret, err := controller.getSecret(reconciliationContext, cluster.Spec.Shoot.Name) if err != nil && !k8serrors.IsNotFound(err) { cluster.UpdateConditionForErrorState(imv1.ConditionTypeKubeconfigManagement, imv1.ConditionReasonFailedToGetSecret, err) - _ = controller.persistStatusChange(ctx, &cluster) + _ = controller.persistStatusChange(reconciliationContext, &cluster) return controller.resultWithoutRequeue(&cluster), err } @@ -127,11 +131,12 @@ func (controller *GardenerClusterController) Reconcile(ctx context.Context, req controller.log.WithValues(loggingContextFromCluster(&cluster)...).Info("rotation params", "lastSync", lastSyncTime.Format("2006-01-02 15:04:05"), "requeueAfter", requeueAfter.String(), + "gardenerRequestTimeout", controller.gardenerRequestTimeout.String(), ) - kubeconfigStatus, err := controller.handleKubeconfig(ctx, secret, &cluster, now) + kubeconfigStatus, err := controller.handleKubeconfig(reconciliationContext, secret, &cluster, now) if err != nil { - _ = controller.persistStatusChange(ctx, &cluster) + _ = controller.persistStatusChange(reconciliationContext, &cluster) // if a claster was not found in gardener, // CRD should not be rereconciled if k8serrors.IsNotFound(err) { @@ -142,13 +147,13 @@ func (controller *GardenerClusterController) Reconcile(ctx context.Context, req // there was a request to rotate the kubeconfig if kubeconfigStatus == ksRotated { - err = controller.removeForceRotationAnnotation(ctx, &cluster) + err = controller.removeForceRotationAnnotation(reconciliationContext, &cluster) if err != nil { return controller.resultWithoutRequeue(&cluster), err } } - if err := controller.persistStatusChange(ctx, &cluster); err != nil { + if err := controller.persistStatusChange(reconciliationContext, &cluster); err != nil { return controller.resultWithoutRequeue(&cluster), err } @@ -185,21 +190,21 @@ func (controller *GardenerClusterController) resultWithoutRequeue(cluster *imv1. return ctrl.Result{} } -func (controller *GardenerClusterController) persistStatusChange(ctx context.Context, cluster *imv1.GardenerCluster) error { - err := controller.Status().Update(ctx, cluster) +func (controller *GardenerClusterController) persistStatusChange(reconciliationContext context.Context, cluster *imv1.GardenerCluster) error { + err := controller.Status().Update(reconciliationContext, cluster) if err != nil { controller.log.Error(err, "status update failed") } return err } -func (controller *GardenerClusterController) deleteKubeconfigSecret(ctx context.Context, clusterCRName string) error { +func (controller *GardenerClusterController) deleteKubeconfigSecret(reconciliationContext context.Context, clusterCRName string) error { selector := client.MatchingLabels(map[string]string{ clusterCRNameLabel: clusterCRName, }) var secretList corev1.SecretList - err := controller.List(ctx, &secretList, selector) + err := controller.List(reconciliationContext, &secretList, selector) if err != nil && !k8serrors.IsNotFound(err) { return err @@ -214,17 +219,17 @@ func (controller *GardenerClusterController) deleteKubeconfigSecret(ctx context. return errors.Errorf("unexpected numer of secrets found for cluster CR `%s`", clusterCRName) } - return controller.Delete(ctx, &secretList.Items[0]) + return controller.Delete(reconciliationContext, &secretList.Items[0]) } -func (controller *GardenerClusterController) getSecret(shootName string) (*corev1.Secret, error) { +func (controller *GardenerClusterController) getSecret(ctx context.Context, shootName string) (*corev1.Secret, error) { var secretList corev1.SecretList shootNameSelector := client.MatchingLabels(map[string]string{ "kyma-project.io/shoot-name": shootName, }) - err := controller.List(context.Background(), &secretList, shootNameSelector) + err := controller.List(ctx, &secretList, shootNameSelector) if err != nil { return nil, err } diff --git a/internal/controller/kubeconfig/gardener_cluster_controller_test.go b/internal/controller/kubeconfig/gardener_cluster_controller_test.go index ca0dcdf1..3b725798 100644 --- a/internal/controller/kubeconfig/gardener_cluster_controller_test.go +++ b/internal/controller/kubeconfig/gardener_cluster_controller_test.go @@ -122,7 +122,10 @@ var _ = Describe("Gardener Cluster controller", func() { return false } - return newGardenerCluster.Status.State == imv1.ErrorState + return newGardenerCluster.Status.State == imv1.ErrorState && + len(newGardenerCluster.Status.Conditions) > 0 && + newGardenerCluster.Status.Conditions[0].Reason == string(imv1.ConditionReasonFailedToGetKubeconfig) && + newGardenerCluster.Status.Conditions[0].Message == "Failed to get kubeconfig. Error: this could be context deadline exceeded" }, time.Second*30, time.Second*3).Should(BeTrue()) By("Metrics should contain error label") @@ -404,7 +407,7 @@ func newTestGardenerClusterCR(name, namespace, shootName, secretName string) *Te Secret: imv1.Secret{ Name: secretName, Namespace: namespace, - Key: "config", //nolint:all TODO: fill it up with the actual data + Key: "config", //nolint:godox TODO: fill it up with the actual data }, }, }, diff --git a/internal/controller/kubeconfig/suite_test.go b/internal/controller/kubeconfig/suite_test.go index 84191573..bff77022 100644 --- a/internal/controller/kubeconfig/suite_test.go +++ b/internal/controller/kubeconfig/suite_test.go @@ -53,6 +53,7 @@ var ( const TestMinimalRotationTimeRatio = 0.5 const TestKubeconfigValidityTime = 24 * time.Hour const TestKubeconfigRotationPeriod = time.Duration(float64(TestKubeconfigValidityTime) * TestMinimalRotationTimeRatio) +const TestGardenerRequestTimeout = 60 * time.Second func TestControllers(t *testing.T) { RegisterFailHandler(Fail) @@ -84,9 +85,10 @@ var _ = BeforeSuite(func() { kubeconfigProviderMock := &kubeconfig_mocks.KubeconfigProvider{} setupKubeconfigProviderMock(kubeconfigProviderMock) + metrics := metrics.NewMetrics() - gardenerClusterController := NewGardenerClusterController(mgr, kubeconfigProviderMock, logger, TestKubeconfigRotationPeriod, TestMinimalRotationTimeRatio, metrics) + gardenerClusterController := NewGardenerClusterController(mgr, kubeconfigProviderMock, logger, TestKubeconfigRotationPeriod, TestMinimalRotationTimeRatio, TestGardenerRequestTimeout, metrics) Expect(gardenerClusterController).NotTo(BeNil()) @@ -113,7 +115,7 @@ var _ = BeforeSuite(func() { func setupKubeconfigProviderMock(kpMock *kubeconfig_mocks.KubeconfigProvider) { kpMock.On("Fetch", anyContext, "shootName1").Return("kubeconfig1", nil) kpMock.On("Fetch", anyContext, "shootName2").Return("kubeconfig2", nil) - kpMock.On("Fetch", anyContext, "shootName3").Return("", errors.New("failed to get kubeconfig")) + kpMock.On("Fetch", anyContext, "shootName3").Return("", errors.New("this could be context deadline exceeded")) kpMock.On("Fetch", anyContext, "shootName6").Return("kubeconfig6", nil) kpMock.On("Fetch", anyContext, "shootName4").Return("kubeconfig4", nil) kpMock.On("Fetch", anyContext, "shootName5").Return("kubeconfig5", nil) diff --git a/internal/gardener/kubeconfig/Provider.go b/internal/gardener/kubeconfig/Provider.go index c1a569ad..c7d06fa6 100644 --- a/internal/gardener/kubeconfig/Provider.go +++ b/internal/gardener/kubeconfig/Provider.go @@ -50,7 +50,7 @@ func (kp Provider) Fetch(ctx context.Context, shootName string) (string, error) }, } - err = kp.dynamicKubeconfigAPI.Create(context.Background(), shoot, &adminKubeconfigRequest) + err = kp.dynamicKubeconfigAPI.Create(ctx, shoot, &adminKubeconfigRequest) if err != nil { return "", errors.Wrap(err, "failed to create AdminKubeconfigRequest") }