From 85c958851408d99913c19925b4d2f1220ddc9213 Mon Sep 17 00:00:00 2001 From: Saurabh Parekh Date: Fri, 31 May 2024 12:36:14 -0700 Subject: [PATCH] Handle cluster status reconciliation with autoscaling configured --- pkg/api/v1alpha1/condition_consts.go | 3 +++ pkg/clusterapi/autoscaler.go | 9 ++++---- pkg/controller/clusters/status.go | 32 ++++++++++++++++++++++++++-- 3 files changed, 38 insertions(+), 6 deletions(-) diff --git a/pkg/api/v1alpha1/condition_consts.go b/pkg/api/v1alpha1/condition_consts.go index 85fe677c2e65..93c2c9c16400 100644 --- a/pkg/api/v1alpha1/condition_consts.go +++ b/pkg/api/v1alpha1/condition_consts.go @@ -55,6 +55,9 @@ const ( // ExternalEtcdNotAvailable reports the Cluster status is waiting for Etcd to be available. ExternalEtcdNotAvailable = "ExternalEtcdNotAvailable" + + // AutoscalerConstraintNotMetReason reports the Cluster status is waiting for autoscaler constraint to be met. + AutoscalerConstraintNotMetReason = "AutoscalerConstraintNotMet" ) const ( diff --git a/pkg/clusterapi/autoscaler.go b/pkg/clusterapi/autoscaler.go index 9790b3600cc8..8a51fc24f496 100644 --- a/pkg/clusterapi/autoscaler.go +++ b/pkg/clusterapi/autoscaler.go @@ -8,9 +8,10 @@ import ( anywherev1 "github.com/aws/eks-anywhere/pkg/api/v1alpha1" ) +// Autoscaler annotation constants. const ( - nodeGroupMinSizeAnnotation = "cluster.x-k8s.io/cluster-api-autoscaler-node-group-min-size" - nodeGroupMaxSizeAnnotation = "cluster.x-k8s.io/cluster-api-autoscaler-node-group-max-size" + NodeGroupMinSizeAnnotation = "cluster.x-k8s.io/cluster-api-autoscaler-node-group-min-size" + NodeGroupMaxSizeAnnotation = "cluster.x-k8s.io/cluster-api-autoscaler-node-group-max-size" ) func ConfigureAutoscalingInMachineDeployment(md *clusterv1.MachineDeployment, autoscalingConfig *anywherev1.AutoScalingConfiguration) { @@ -22,6 +23,6 @@ func ConfigureAutoscalingInMachineDeployment(md *clusterv1.MachineDeployment, au md.ObjectMeta.Annotations = map[string]string{} } - md.ObjectMeta.Annotations[nodeGroupMinSizeAnnotation] = strconv.Itoa(autoscalingConfig.MinCount) - md.ObjectMeta.Annotations[nodeGroupMaxSizeAnnotation] = strconv.Itoa(autoscalingConfig.MaxCount) + md.ObjectMeta.Annotations[NodeGroupMinSizeAnnotation] = strconv.Itoa(autoscalingConfig.MinCount) + md.ObjectMeta.Annotations[NodeGroupMaxSizeAnnotation] = strconv.Itoa(autoscalingConfig.MaxCount) } diff --git a/pkg/controller/clusters/status.go b/pkg/controller/clusters/status.go index 53456ae1057b..af406be58037 100644 --- a/pkg/controller/clusters/status.go +++ b/pkg/controller/clusters/status.go @@ -12,6 +12,7 @@ import ( "sigs.k8s.io/controller-runtime/pkg/client" anywherev1 "github.com/aws/eks-anywhere/pkg/api/v1alpha1" + "github.com/aws/eks-anywhere/pkg/clusterapi" "github.com/aws/eks-anywhere/pkg/controller" ) @@ -198,11 +199,17 @@ func updateWorkersReadyCondition(cluster *anywherev1.Cluster, machineDeployments } totalExpected := 0 + wngWithAutoScalingConfigurationMap := make(map[string]anywherev1.AutoScalingConfiguration) for _, wng := range cluster.Spec.WorkerNodeGroupConfigurations { - totalExpected += *wng.Count + // We want to consider only the worker node groups which don't have autoscaling configuration for expected worker nodes count. + if wng.AutoScalingConfiguration == nil { + totalExpected += *wng.Count + } else { + wngWithAutoScalingConfigurationMap[wng.Name] = *wng.AutoScalingConfiguration + } } - // First, we need aggregate the number of nodes across worker node groups to be able to assess the condition of the workers + // First, we need to aggregate the number of nodes across worker node groups to be able to assess the condition of the workers // as a whole. totalReadyReplicas := 0 totalUpdatedReplicas := 0 @@ -215,6 +222,13 @@ func updateWorkersReadyCondition(cluster *anywherev1.Cluster, machineDeployments return } + // Skip updating the replicas for the machine deployments which have autoscaling configuration annotation + if md.ObjectMeta.Annotations != nil { + if _, ok := md.ObjectMeta.Annotations[clusterapi.NodeGroupMinSizeAnnotation]; ok { + continue + } + } + totalReadyReplicas += int(md.Status.ReadyReplicas) totalUpdatedReplicas += int(md.Status.UpdatedReplicas) totalReplicas += int(md.Status.Replicas) @@ -253,6 +267,20 @@ func updateWorkersReadyCondition(cluster *anywherev1.Cluster, machineDeployments return } + // Iterating through the machine deployments which have autoscaling configured to check if the number of worker nodes replicas + // are between min count and max count specified in the cluster spec. + for _, md := range machineDeployments { + if wng, exists := wngWithAutoScalingConfigurationMap[md.ObjectMeta.Name]; exists { + minCount := wng.MinCount + maxCount := wng.MaxCount + replicas := int(md.Status.Replicas) + if replicas < minCount || replicas > maxCount { + conditions.MarkFalse(cluster, anywherev1.WorkersReadyCondition, anywherev1.AutoscalerConstraintNotMetReason, clusterv1.ConditionSeverityInfo, "Worker nodes count for %s not between %d and %d yet (%d actual)", md.ObjectMeta.Name, minCount, maxCount, replicas) + return + } + } + } + conditions.MarkTrue(cluster, anywherev1.WorkersReadyCondition) }