From 0021b45a7a19bb58fdf529c969e2fdb18278a49f Mon Sep 17 00:00:00 2001 From: Saurabh Parekh Date: Mon, 8 Jul 2024 22:36:47 -0700 Subject: [PATCH] Update cluster status (#8455) * Update cluster status ready condition for control plane and worker nodes * Fix typos in the comments * Add unit tests for updating cluster status --- pkg/api/v1alpha1/condition_consts.go | 6 ++ pkg/controller/clusters/status.go | 37 +++++++-- pkg/controller/clusters/status_test.go | 100 +++++++++++++++++++++---- 3 files changed, 123 insertions(+), 20 deletions(-) diff --git a/pkg/api/v1alpha1/condition_consts.go b/pkg/api/v1alpha1/condition_consts.go index 93c2c9c16400..f14b655f44d2 100644 --- a/pkg/api/v1alpha1/condition_consts.go +++ b/pkg/api/v1alpha1/condition_consts.go @@ -58,6 +58,12 @@ const ( // AutoscalerConstraintNotMetReason reports the Cluster status is waiting for autoscaler constraint to be met. AutoscalerConstraintNotMetReason = "AutoscalerConstraintNotMet" + + // KubeadmControlPlaneNotReadyReason reports that the kubeadm control plane is not ready. + KubeadmControlPlaneNotReadyReason = "KubeadmControlPlaneNotReady" + + // MachineDeploymentNotReadyReason reports that the machine deployment is not ready. + MachineDeploymentNotReadyReason = "MachineDeploymentNotReady" ) const ( diff --git a/pkg/controller/clusters/status.go b/pkg/controller/clusters/status.go index af406be58037..d5cf73c06b47 100644 --- a/pkg/controller/clusters/status.go +++ b/pkg/controller/clusters/status.go @@ -18,8 +18,8 @@ import ( // UpdateClusterStatusForControlPlane checks the current state of the Cluster's control plane and updates the // Cluster status information. -// There is a posibility that UpdateClusterStatusForControlPlane does not update the -// controleplane status specially in case where it still waiting for cluster objects to be created. +// There is a possibility that UpdateClusterStatusForControlPlane does not update the +// controlplane status specially in case where it is still waiting for cluster objects to be created. func UpdateClusterStatusForControlPlane(ctx context.Context, client client.Client, cluster *anywherev1.Cluster) error { kcp, err := controller.GetKubeadmControlPlane(ctx, client, cluster) if err != nil { @@ -114,8 +114,8 @@ func updateControlPlaneReadyCondition(cluster *anywherev1.Cluster, kcp *controlp totalReplicas := int(kcp.Status.Replicas) // First, in the case of a rolling upgrade, we get the number of outdated nodes, and as long as there are some, - // we want to reflect in the message that the Cluster is in progres upgdating the old nodes with the - // the new machine spec. + // we want to reflect in the message that the Cluster is in progress updating the old nodes with the + // new machine spec. updatedReplicas := int(kcp.Status.UpdatedReplicas) totalOutdated := totalReplicas - updatedReplicas @@ -156,6 +156,14 @@ func updateControlPlaneReadyCondition(cluster *anywherev1.Cluster, kcp *controlp return } + // We check for the Ready condition on the kubeadm control plane as a final validation. Usually, the kcp objects + // should be ready at this point but if that is not the case, we report it as an error. + kubeadmControlPlaneReadyCondition := conditions.Get(kcp, clusterv1.ReadyCondition) + if kubeadmControlPlaneReadyCondition != nil && kubeadmControlPlaneReadyCondition.Status == v1.ConditionFalse { + conditions.MarkFalse(cluster, anywherev1.ControlPlaneReadyCondition, anywherev1.KubeadmControlPlaneNotReadyReason, clusterv1.ConditionSeverityError, "Kubeadm control plane %s not ready yet", kcp.ObjectMeta.Name) + return + } + conditions.MarkTrue(cluster, anywherev1.ControlPlaneReadyCondition) } @@ -235,7 +243,7 @@ func updateWorkersReadyCondition(cluster *anywherev1.Cluster, machineDeployments } // There may be worker nodes that are not up to date yet in the case of a rolling upgrade, - // so reflect that on the conditon with an appropriate message. + // so reflect that on the condition with an appropriate message. totalOutdated := totalReplicas - totalUpdatedReplicas if totalOutdated > 0 { upgradeReason := anywherev1.RollingUpgradeInProgress @@ -281,6 +289,25 @@ func updateWorkersReadyCondition(cluster *anywherev1.Cluster, machineDeployments } } + // We check for the Ready condition on the machine deployments as a final validation. Usually, the md objects + // should be ready at this point but if that is not the case, we report it as an error. + for _, md := range machineDeployments { + mdConditions := md.GetConditions() + if mdConditions == nil { + continue + } + var machineDeploymentReadyCondition *clusterv1.Condition + for _, condition := range mdConditions { + if condition.Type == clusterv1.ReadyCondition { + machineDeploymentReadyCondition = &condition + } + } + if machineDeploymentReadyCondition != nil && machineDeploymentReadyCondition.Status == v1.ConditionFalse { + conditions.MarkFalse(cluster, anywherev1.WorkersReadyCondition, anywherev1.MachineDeploymentNotReadyReason, clusterv1.ConditionSeverityError, "Machine deployment %s not ready yet", md.ObjectMeta.Name) + return + } + } + conditions.MarkTrue(cluster, anywherev1.WorkersReadyCondition) } diff --git a/pkg/controller/clusters/status_test.go b/pkg/controller/clusters/status_test.go index 122d0a4ade7b..c935d179e625 100644 --- a/pkg/controller/clusters/status_test.go +++ b/pkg/controller/clusters/status_test.go @@ -400,6 +400,37 @@ func TestUpdateClusterStatusForControlPlane(t *testing.T) { Status: "False", }, }, + { + name: "kcp not ready yet", + kcp: test.KubeadmControlPlane(func(kcp *controlplanev1.KubeadmControlPlane) { + kcp.Status.Replicas = 3 + kcp.Status.ReadyReplicas = 3 + kcp.Status.UpdatedReplicas = 3 + + kcp.Status.Conditions = []clusterv1.Condition{ + { + Type: clusterv1.ReadyCondition, + Status: "False", + }, + } + }), + controlPlaneCount: 3, + conditions: []anywherev1.Condition{ + { + Type: anywherev1.ControlPlaneInitializedCondition, + Status: "True", + }, + }, + externalEtcdCount: 0, + externalEtcdCluster: nil, + wantCondition: &anywherev1.Condition{ + Type: anywherev1.ControlPlaneReadyCondition, + Status: "False", + Reason: anywherev1.KubeadmControlPlaneNotReadyReason, + Severity: clusterv1.ConditionSeverityError, + Message: "Kubeadm control plane test-cluster not ready yet", + }, + }, { name: "control plane ready", kcp: test.KubeadmControlPlane(func(kcp *controlplanev1.KubeadmControlPlane) { @@ -1011,14 +1042,11 @@ func TestUpdateClusterStatusForWorkers(t *testing.T) { }, }, { - name: "workers ready", + name: "workers not ready, md not ready yet", workerNodeGroupConfigurations: []anywherev1.WorkerNodeGroupConfiguration{ { Count: ptr.Int(1), }, - { - Count: ptr.Int(2), - }, }, machineDeployments: []clusterv1.MachineDeployment{ *test.MachineDeployment(func(md *clusterv1.MachineDeployment) { @@ -1026,18 +1054,16 @@ func TestUpdateClusterStatusForWorkers(t *testing.T) { md.ObjectMeta.Labels = map[string]string{ clusterv1.ClusterNameLabel: clusterName, } - md.Status.Replicas = 1 md.Status.ReadyReplicas = 1 + md.Status.Replicas = 1 md.Status.UpdatedReplicas = 1 - }), - *test.MachineDeployment(func(md *clusterv1.MachineDeployment) { - md.ObjectMeta.Name = "md-1" - md.ObjectMeta.Labels = map[string]string{ - clusterv1.ClusterNameLabel: clusterName, + + md.Status.Conditions = []clusterv1.Condition{ + { + Type: clusterv1.ReadyCondition, + Status: "False", + }, } - md.Status.Replicas = 2 - md.Status.ReadyReplicas = 2 - md.Status.UpdatedReplicas = 2 }), }, conditions: []anywherev1.Condition{ @@ -1047,8 +1073,11 @@ func TestUpdateClusterStatusForWorkers(t *testing.T) { }, }, wantCondition: &anywherev1.Condition{ - Type: anywherev1.WorkersReadyCondition, - Status: "True", + Type: anywherev1.WorkersReadyCondition, + Status: "False", + Reason: anywherev1.MachineDeploymentNotReadyReason, + Severity: clusterv1.ConditionSeverityError, + Message: "Machine deployment md-0 not ready yet", }, }, { @@ -1152,6 +1181,47 @@ func TestUpdateClusterStatusForWorkers(t *testing.T) { Status: "True", }, }, + { + name: "workers ready", + workerNodeGroupConfigurations: []anywherev1.WorkerNodeGroupConfiguration{ + { + Count: ptr.Int(1), + }, + { + Count: ptr.Int(2), + }, + }, + machineDeployments: []clusterv1.MachineDeployment{ + *test.MachineDeployment(func(md *clusterv1.MachineDeployment) { + md.ObjectMeta.Name = "md-0" + md.ObjectMeta.Labels = map[string]string{ + clusterv1.ClusterNameLabel: clusterName, + } + md.Status.Replicas = 1 + md.Status.ReadyReplicas = 1 + md.Status.UpdatedReplicas = 1 + }), + *test.MachineDeployment(func(md *clusterv1.MachineDeployment) { + md.ObjectMeta.Name = "md-1" + md.ObjectMeta.Labels = map[string]string{ + clusterv1.ClusterNameLabel: clusterName, + } + md.Status.Replicas = 2 + md.Status.ReadyReplicas = 2 + md.Status.UpdatedReplicas = 2 + }), + }, + conditions: []anywherev1.Condition{ + { + Type: anywherev1.ControlPlaneInitializedCondition, + Status: "True", + }, + }, + wantCondition: &anywherev1.Condition{ + Type: anywherev1.WorkersReadyCondition, + Status: "True", + }, + }, } for _, tt := range tests {