Skip to content

Commit

Permalink
Merge pull request #271 from Disper/gardenercluster_controller_timeout
Browse files Browse the repository at this point in the history
gardenercluster controller timeout
  • Loading branch information
kyma-bot authored Jul 11, 2024
2 parents d357738 + 2b958b2 commit 91e79cc
Show file tree
Hide file tree
Showing 8 changed files with 58 additions and 39 deletions.
4 changes: 4 additions & 0 deletions cmd/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,7 @@ func init() {
const defaultMinimalRotationTimeRatio = 0.6
const defaultExpirationTime = 24 * time.Hour
const defaultRuntimeReconcilerEnabled = false
const defaultGardenerRequestTimeout = 60 * time.Second

func main() {
var metricsAddr string
Expand All @@ -66,6 +67,7 @@ func main() {
var gardenerProjectName string
var minimalRotationTimeRatio float64
var expirationTime time.Duration
var gardenerRequestTimeout time.Duration
var enableRuntimeReconciler bool
var persistShoot bool

Expand All @@ -78,6 +80,7 @@ func main() {
flag.StringVar(&gardenerProjectName, "gardener-project-name", "gardener-project", "Name of the Gardener project")
flag.Float64Var(&minimalRotationTimeRatio, "minimal-rotation-time", defaultMinimalRotationTimeRatio, "The ratio determines what is the minimal time that needs to pass to rotate certificate.")
flag.DurationVar(&expirationTime, "kubeconfig-expiration-time", defaultExpirationTime, "Dynamic kubeconfig expiration time")
flag.DurationVar(&gardenerRequestTimeout, "gardener-request-timeout", defaultGardenerRequestTimeout, "Timeout duration for requests to Gardener")
flag.BoolVar(&enableRuntimeReconciler, "runtime-reconciler-enabled", defaultRuntimeReconcilerEnabled, "Feature flag for all runtime reconciler functionalities")
flag.BoolVar(&persistShoot, "persist-shoot", false, "Feature flag to allow persisting created shoots")

Expand Down Expand Up @@ -139,6 +142,7 @@ func main() {
logger,
rotationPeriod,
minimalRotationTimeRatio,
gardenerRequestTimeout,
metrics,
).SetupWithManager(mgr); err != nil {
setupLog.Error(err, "unable to create controller", "controller", "GardenerCluster")
Expand Down
1 change: 1 addition & 0 deletions config/default/manager_gardener_secret_patch.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ spec:
- --gardener-project-name=kyma-dev
- --kubeconfig-expiration-time=24h
- --minimal-rotation-time=0.6
- --gardener-request-timeout=60s
- --runtime-reconciler-enabled=false
volumeMounts:
- name: gardener-kubeconfig
Expand Down
1 change: 1 addition & 0 deletions docs/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ You can configure the Infrastructure Manager deployment with the following argum
2. `gardener-project` - the name of the Gardener project where the infrastructure operations are performed
3. `minimal-rotation-time` - the ratio determines what is the minimal time that needs to pass to rotate the certificate
4. `kubeconfig-expiration-time` - maximum time after which kubeconfig is rotated. The rotation happens between (`minimal-rotation-time` * `kubeconfig-expiration-time`) and `kubeconfig-expiration-time`.
4. `gardener-request-timeout` - specifies the timeout for requests to Gardener. Default value is `60s`.
5. `runtime-reconciler-enabled` - feature flag responsible for enabling the runtime reconciler. Default value is `false`.


Expand Down
39 changes: 21 additions & 18 deletions hack/runtime-migrator/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -31,22 +31,25 @@ const (
ShootNetworkingFilterExtensionType = "shoot-networking-filter"
runtimeCrFullPath = "%sshoot-%s.yaml"
runtimeIDAnnotation = "kcp.provisioner.kyma-project.io/runtime-id"
contextTimeout = 5 * time.Minute
)

func main() {
cfg := migrator.NewConfig()
migratorContext, cancel := context.WithTimeout(context.Background(), contextTimeout)
defer cancel()

runtimeIDs := getRuntimeIDsFromStdin(cfg)
gardenerNamespace := fmt.Sprintf("garden-%s", cfg.GardenerProjectName)
list := getShootList(cfg, gardenerNamespace)
list := getShootList(migratorContext, cfg, gardenerNamespace)
provider, err := setupKubernetesKubeconfigProvider(cfg.GardenerKubeconfigPath, gardenerNamespace, expirationTime)
if err != nil {
log.Fatal("failed to create kubeconfig provider - ", err)
log.Print("failed to create kubeconfig provider - ", err)
}

kcpClient, err := migrator.CreateKcpClient(&cfg)
if err != nil {
log.Fatal("failed to create kcp client - ", kcpClient)
log.Print("failed to create kcp client - ", kcpClient)
}

results := make([]migrator.MigrationResult, 0)
Expand All @@ -72,13 +75,13 @@ func main() {
continue
}

runtime, runtimeCrErr := createRuntime(shoot, cfg, provider)
runtime, runtimeCrErr := createRuntime(migratorContext, shoot, cfg, provider)
if runtimeCrErr != nil {
results = appendResult(results, shoot, migrator.StatusFailedToCreateRuntimeCR, runtimeCrErr)
continue
}

err := saveRuntime(cfg, runtime, kcpClient)
err := saveRuntime(migratorContext, cfg, runtime, kcpClient)
if err != nil {
log.Printf("Failed to apply runtime CR, %s\n", err)

Expand Down Expand Up @@ -147,9 +150,9 @@ func getRuntimeIDsFromStdin(cfg migrator.Config) []string {
return runtimeIDs
}

func saveRuntime(cfg migrator.Config, runtime v1.Runtime, getClient client.Client) error {
func saveRuntime(ctx context.Context, cfg migrator.Config, runtime v1.Runtime, getClient client.Client) error {
if !cfg.IsDryRun {
err := getClient.Create(context.Background(), &runtime)
err := getClient.Create(ctx, &runtime)

if err != nil {
return err
Expand All @@ -160,12 +163,12 @@ func saveRuntime(cfg migrator.Config, runtime v1.Runtime, getClient client.Clien
return nil
}

func createRuntime(shoot v1beta1.Shoot, cfg migrator.Config, provider kubeconfig.Provider) (v1.Runtime, error) {
var subjects = getAdministratorsList(provider, shoot.Name)
func createRuntime(ctx context.Context, shoot v1beta1.Shoot, cfg migrator.Config, provider kubeconfig.Provider) (v1.Runtime, error) {
var subjects = getAdministratorsList(ctx, provider, shoot.Name)
var oidcConfig = getOidcConfig(shoot)
var hAFailureToleranceType = getFailureToleranceType(shoot)
var licenceType = shoot.Annotations["kcp.provisioner.kyma-project.io/licence-type"]
labels, err := getAllRuntimeLabels(shoot, cfg.Client)
labels, err := getAllRuntimeLabels(ctx, shoot, cfg.Client)
if err != nil {
return v1.Runtime{}, err
}
Expand Down Expand Up @@ -267,11 +270,11 @@ func checkIfShootNetworkFilteringEnabled(shoot v1beta1.Shoot) bool {
return false
}

func getShootList(cfg migrator.Config, gardenerNamespace string) *v1beta1.ShootList {
func getShootList(ctx context.Context, cfg migrator.Config, gardenerNamespace string) *v1beta1.ShootList {
gardenerShootClient := setupGardenerShootClient(cfg.GardenerKubeconfigPath, gardenerNamespace)
list, err := gardenerShootClient.List(context.Background(), metav1.ListOptions{})
list, err := gardenerShootClient.List(ctx, metav1.ListOptions{})
if err != nil {
log.Fatal("Failed to retrieve shoots from Gardener - ", err)
log.Print("Failed to retrieve shoots from Gardener - ", err)
}

return list
Expand All @@ -286,8 +289,8 @@ func getFailureToleranceType(shoot v1beta1.Shoot) v1beta1.FailureToleranceType {
return ""
}

func getAdministratorsList(provider kubeconfig.Provider, shootName string) []string {
var kubeconfig, err = provider.Fetch(context.Background(), shootName)
func getAdministratorsList(ctx context.Context, provider kubeconfig.Provider, shootName string) []string {
var kubeconfig, err = provider.Fetch(ctx, shootName)
if kubeconfig == "" {
log.Printf("Failed to get dynamic kubeconfig for shoot %s, %s\n", shootName, err.Error())
return []string{}
Expand All @@ -304,7 +307,7 @@ func getAdministratorsList(provider kubeconfig.Provider, shootName string) []str
log.Printf("Failed to create clientset from restconfig - %s\n", err)
}

var clusterRoleBindings, _ = clientset.RbacV1().ClusterRoleBindings().List(context.Background(), metav1.ListOptions{
var clusterRoleBindings, _ = clientset.RbacV1().ClusterRoleBindings().List(ctx, metav1.ListOptions{
LabelSelector: "reconciler.kyma-project.io/managed-by=reconciler,app=kyma",
})

Expand Down Expand Up @@ -381,7 +384,7 @@ func setupKubernetesKubeconfigProvider(kubeconfigPath string, namespace string,
int64(expirationTime.Seconds())), nil
}

func getAllRuntimeLabels(shoot v1beta1.Shoot, getClient migrator.GetClient) (map[string]string, error) {
func getAllRuntimeLabels(ctx context.Context, shoot v1beta1.Shoot, getClient migrator.GetClient) (map[string]string, error) {
enrichedRuntimeLabels := map[string]string{}
var err error

Expand All @@ -393,7 +396,7 @@ func getAllRuntimeLabels(shoot v1beta1.Shoot, getClient migrator.GetClient) (map
}
gardenerCluster := v1.GardenerCluster{}
shootKey := types.NamespacedName{Name: shoot.Name, Namespace: "kcp-system"}
getGardenerCRerr := k8sClient.Get(context.Background(), shootKey, &gardenerCluster)
getGardenerCRerr := k8sClient.Get(ctx, shootKey, &gardenerCluster)
if getGardenerCRerr != nil {
var errMsg = fmt.Sprintf("Failed to retrieve GardenerCluster CR for shoot %s\n", shoot.Name)
return map[string]string{}, errors.Wrap(getGardenerCRerr, errMsg)
Expand Down
37 changes: 21 additions & 16 deletions internal/controller/kubeconfig/gardener_cluster_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -53,17 +53,19 @@ type GardenerClusterController struct {
log logr.Logger
rotationPeriod time.Duration
minimalRotationTimeRatio float64
gardenerRequestTimeout time.Duration
metrics metrics.Metrics
}

func NewGardenerClusterController(mgr ctrl.Manager, kubeconfigProvider KubeconfigProvider, logger logr.Logger, rotationPeriod time.Duration, minimalRotationTimeRatio float64, metrics metrics.Metrics) *GardenerClusterController {
func NewGardenerClusterController(mgr ctrl.Manager, kubeconfigProvider KubeconfigProvider, logger logr.Logger, rotationPeriod time.Duration, minimalRotationTimeRatio float64, gardenerRequestTimeout time.Duration, metrics metrics.Metrics) *GardenerClusterController {
return &GardenerClusterController{
Client: mgr.GetClient(),
Scheme: mgr.GetScheme(),
KubeconfigProvider: kubeconfigProvider,
log: logger,
rotationPeriod: rotationPeriod,
minimalRotationTimeRatio: minimalRotationTimeRatio,
gardenerRequestTimeout: gardenerRequestTimeout,
metrics: metrics,
}
}
Expand All @@ -90,15 +92,17 @@ type KubeconfigProvider interface {
// - https://pkg.go.dev/sigs.k8s.io/controller-runtime@v0.15.0/pkg/reconcile
func (controller *GardenerClusterController) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) { //nolint:revive
controller.log.Info("Starting reconciliation.", loggingContext(req)...)
reconciliationContext, cancel := context.WithTimeout(ctx, controller.gardenerRequestTimeout)
defer cancel()

var cluster imv1.GardenerCluster

err := controller.Get(ctx, req.NamespacedName, &cluster)
err := controller.Get(reconciliationContext, req.NamespacedName, &cluster)

if err != nil {
if k8serrors.IsNotFound(err) {
controller.unsetMetrics(req)
err = controller.deleteKubeconfigSecret(ctx, req.Name)
err = controller.deleteKubeconfigSecret(reconciliationContext, req.Name)
}

if err == nil {
Expand All @@ -108,10 +112,10 @@ func (controller *GardenerClusterController) Reconcile(ctx context.Context, req
return controller.resultWithoutRequeue(&cluster), err
}

secret, err := controller.getSecret(cluster.Spec.Shoot.Name)
secret, err := controller.getSecret(reconciliationContext, cluster.Spec.Shoot.Name)
if err != nil && !k8serrors.IsNotFound(err) {
cluster.UpdateConditionForErrorState(imv1.ConditionTypeKubeconfigManagement, imv1.ConditionReasonFailedToGetSecret, err)
_ = controller.persistStatusChange(ctx, &cluster)
_ = controller.persistStatusChange(reconciliationContext, &cluster)
return controller.resultWithoutRequeue(&cluster), err
}

Expand All @@ -127,11 +131,12 @@ func (controller *GardenerClusterController) Reconcile(ctx context.Context, req
controller.log.WithValues(loggingContextFromCluster(&cluster)...).Info("rotation params",
"lastSync", lastSyncTime.Format("2006-01-02 15:04:05"),
"requeueAfter", requeueAfter.String(),
"gardenerRequestTimeout", controller.gardenerRequestTimeout.String(),
)

kubeconfigStatus, err := controller.handleKubeconfig(ctx, secret, &cluster, now)
kubeconfigStatus, err := controller.handleKubeconfig(reconciliationContext, secret, &cluster, now)
if err != nil {
_ = controller.persistStatusChange(ctx, &cluster)
_ = controller.persistStatusChange(reconciliationContext, &cluster)
// if a claster was not found in gardener,
// CRD should not be rereconciled
if k8serrors.IsNotFound(err) {
Expand All @@ -142,13 +147,13 @@ func (controller *GardenerClusterController) Reconcile(ctx context.Context, req

// there was a request to rotate the kubeconfig
if kubeconfigStatus == ksRotated {
err = controller.removeForceRotationAnnotation(ctx, &cluster)
err = controller.removeForceRotationAnnotation(reconciliationContext, &cluster)
if err != nil {
return controller.resultWithoutRequeue(&cluster), err
}
}

if err := controller.persistStatusChange(ctx, &cluster); err != nil {
if err := controller.persistStatusChange(reconciliationContext, &cluster); err != nil {
return controller.resultWithoutRequeue(&cluster), err
}

Expand Down Expand Up @@ -185,21 +190,21 @@ func (controller *GardenerClusterController) resultWithoutRequeue(cluster *imv1.
return ctrl.Result{}
}

func (controller *GardenerClusterController) persistStatusChange(ctx context.Context, cluster *imv1.GardenerCluster) error {
err := controller.Status().Update(ctx, cluster)
func (controller *GardenerClusterController) persistStatusChange(reconciliationContext context.Context, cluster *imv1.GardenerCluster) error {
err := controller.Status().Update(reconciliationContext, cluster)
if err != nil {
controller.log.Error(err, "status update failed")
}
return err
}

func (controller *GardenerClusterController) deleteKubeconfigSecret(ctx context.Context, clusterCRName string) error {
func (controller *GardenerClusterController) deleteKubeconfigSecret(reconciliationContext context.Context, clusterCRName string) error {
selector := client.MatchingLabels(map[string]string{
clusterCRNameLabel: clusterCRName,
})

var secretList corev1.SecretList
err := controller.List(ctx, &secretList, selector)
err := controller.List(reconciliationContext, &secretList, selector)

if err != nil && !k8serrors.IsNotFound(err) {
return err
Expand All @@ -214,17 +219,17 @@ func (controller *GardenerClusterController) deleteKubeconfigSecret(ctx context.
return errors.Errorf("unexpected numer of secrets found for cluster CR `%s`", clusterCRName)
}

return controller.Delete(ctx, &secretList.Items[0])
return controller.Delete(reconciliationContext, &secretList.Items[0])
}

func (controller *GardenerClusterController) getSecret(shootName string) (*corev1.Secret, error) {
func (controller *GardenerClusterController) getSecret(ctx context.Context, shootName string) (*corev1.Secret, error) {
var secretList corev1.SecretList

shootNameSelector := client.MatchingLabels(map[string]string{
"kyma-project.io/shoot-name": shootName,
})

err := controller.List(context.Background(), &secretList, shootNameSelector)
err := controller.List(ctx, &secretList, shootNameSelector)
if err != nil {
return nil, err
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -122,7 +122,10 @@ var _ = Describe("Gardener Cluster controller", func() {
return false
}

return newGardenerCluster.Status.State == imv1.ErrorState
return newGardenerCluster.Status.State == imv1.ErrorState &&
len(newGardenerCluster.Status.Conditions) > 0 &&
newGardenerCluster.Status.Conditions[0].Reason == string(imv1.ConditionReasonFailedToGetKubeconfig) &&
newGardenerCluster.Status.Conditions[0].Message == "Failed to get kubeconfig. Error: this could be context deadline exceeded"
}, time.Second*30, time.Second*3).Should(BeTrue())

By("Metrics should contain error label")
Expand Down Expand Up @@ -404,7 +407,7 @@ func newTestGardenerClusterCR(name, namespace, shootName, secretName string) *Te
Secret: imv1.Secret{
Name: secretName,
Namespace: namespace,
Key: "config", //nolint:all TODO: fill it up with the actual data
Key: "config", //nolint:godox TODO: fill it up with the actual data
},
},
},
Expand Down
6 changes: 4 additions & 2 deletions internal/controller/kubeconfig/suite_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@ var (
const TestMinimalRotationTimeRatio = 0.5
const TestKubeconfigValidityTime = 24 * time.Hour
const TestKubeconfigRotationPeriod = time.Duration(float64(TestKubeconfigValidityTime) * TestMinimalRotationTimeRatio)
const TestGardenerRequestTimeout = 60 * time.Second

func TestControllers(t *testing.T) {
RegisterFailHandler(Fail)
Expand Down Expand Up @@ -84,9 +85,10 @@ var _ = BeforeSuite(func() {

kubeconfigProviderMock := &kubeconfig_mocks.KubeconfigProvider{}
setupKubeconfigProviderMock(kubeconfigProviderMock)

metrics := metrics.NewMetrics()

gardenerClusterController := NewGardenerClusterController(mgr, kubeconfigProviderMock, logger, TestKubeconfigRotationPeriod, TestMinimalRotationTimeRatio, metrics)
gardenerClusterController := NewGardenerClusterController(mgr, kubeconfigProviderMock, logger, TestKubeconfigRotationPeriod, TestMinimalRotationTimeRatio, TestGardenerRequestTimeout, metrics)

Expect(gardenerClusterController).NotTo(BeNil())

Expand All @@ -113,7 +115,7 @@ var _ = BeforeSuite(func() {
func setupKubeconfigProviderMock(kpMock *kubeconfig_mocks.KubeconfigProvider) {
kpMock.On("Fetch", anyContext, "shootName1").Return("kubeconfig1", nil)
kpMock.On("Fetch", anyContext, "shootName2").Return("kubeconfig2", nil)
kpMock.On("Fetch", anyContext, "shootName3").Return("", errors.New("failed to get kubeconfig"))
kpMock.On("Fetch", anyContext, "shootName3").Return("", errors.New("this could be context deadline exceeded"))
kpMock.On("Fetch", anyContext, "shootName6").Return("kubeconfig6", nil)
kpMock.On("Fetch", anyContext, "shootName4").Return("kubeconfig4", nil)
kpMock.On("Fetch", anyContext, "shootName5").Return("kubeconfig5", nil)
Expand Down
2 changes: 1 addition & 1 deletion internal/gardener/kubeconfig/Provider.go
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ func (kp Provider) Fetch(ctx context.Context, shootName string) (string, error)
},
}

err = kp.dynamicKubeconfigAPI.Create(context.Background(), shoot, &adminKubeconfigRequest)
err = kp.dynamicKubeconfigAPI.Create(ctx, shoot, &adminKubeconfigRequest)
if err != nil {
return "", errors.Wrap(err, "failed to create AdminKubeconfigRequest")
}
Expand Down

0 comments on commit 91e79cc

Please sign in to comment.