From 73209f6d8d2f5e921fd22ec5b90495b8a1a87b82 Mon Sep 17 00:00:00 2001 From: Taylor Neyland <57606775+taneyland@users.noreply.github.com> Date: Wed, 14 Feb 2024 17:34:13 -0600 Subject: [PATCH] Pause machine health checks during inplace upgrades (#7539) --- controllers/kubeadmcontrolplane_controller.go | 42 +++++++++ .../kubeadmcontrolplane_controller_test.go | 83 +++++++++++++++-- controllers/machinedeployment_controller.go | 31 +++++++ .../machinedeployment_controller_test.go | 88 +++++++++++++++++-- 4 files changed, 230 insertions(+), 14 deletions(-) diff --git a/controllers/kubeadmcontrolplane_controller.go b/controllers/kubeadmcontrolplane_controller.go index 920afc6046ce..9ec5d2daecfa 100644 --- a/controllers/kubeadmcontrolplane_controller.go +++ b/controllers/kubeadmcontrolplane_controller.go @@ -31,6 +31,7 @@ import ( kerrors "k8s.io/apimachinery/pkg/util/errors" clusterv1 "sigs.k8s.io/cluster-api/api/v1beta1" controlplanev1 "sigs.k8s.io/cluster-api/controlplane/kubeadm/api/v1beta1" + "sigs.k8s.io/cluster-api/util/annotations" "sigs.k8s.io/cluster-api/util/collections" "sigs.k8s.io/cluster-api/util/patch" ctrl "sigs.k8s.io/controller-runtime" @@ -118,12 +119,28 @@ func (r *KubeadmControlPlaneReconciler) reconcile(ctx context.Context, log logr. cpUpgrade := &anywherev1.ControlPlaneUpgrade{} cpuGetErr := r.client.Get(ctx, GetNamespacedNameType(cpUpgradeName(kcp.Name), constants.EksaSystemNamespace), cpUpgrade) + mhc := &clusterv1.MachineHealthCheck{} + if err := r.client.Get(ctx, GetNamespacedNameType(cpMachineHealthCheckName(kcp.Name), constants.EksaSystemNamespace), mhc); err != nil { + if apierrors.IsNotFound(err) { + return reconcile.Result{}, err + } + return ctrl.Result{}, fmt.Errorf("getting MachineHealthCheck %s: %v", cpMachineHealthCheckName(kcp.Name), err) + } + mhcPatchHelper, err := patch.NewHelper(mhc, r.client) + if err != nil { + return ctrl.Result{}, err + } + if kcp.Spec.Replicas != nil && (*kcp.Spec.Replicas == kcp.Status.UpdatedReplicas) { if cpuGetErr == nil && cpUpgrade.Status.Ready { log.Info("Control plane upgrade complete, deleting object", "ControlPlaneUpgrade", cpUpgrade.Name) if err := r.client.Delete(ctx, cpUpgrade); err != nil { return ctrl.Result{}, fmt.Errorf("deleting ControlPlaneUpgrade object: %v", err) } + log.Info("Resuming control plane machine health check", "MachineHealthCheck", cpMachineHealthCheckName(kcp.Name)) + if err := resumeMachineHealthCheck(ctx, mhc, mhcPatchHelper); err != nil { + return ctrl.Result{}, fmt.Errorf("updating annotations for machine health check: %v", err) + } } else if !apierrors.IsNotFound(cpuGetErr) { return ctrl.Result{}, fmt.Errorf("getting ControlPlaneUpgrade for KubeadmControlPlane %s: %v", kcp.Name, cpuGetErr) } @@ -145,6 +162,12 @@ func (r *KubeadmControlPlaneReconciler) reconcile(ctx context.Context, log logr. if err != nil { return ctrl.Result{}, fmt.Errorf("generating ControlPlaneUpgrade: %v", err) } + + log.Info("Pausing control plane machine health check", "MachineHealthCheck", cpMachineHealthCheckName(kcp.Name)) + if err := pauseMachineHealthCheck(ctx, mhc, mhcPatchHelper); err != nil { + return ctrl.Result{}, fmt.Errorf("updating annotations for machine health check: %v", err) + } + if err := r.client.Create(ctx, cpUpgrade); client.IgnoreAlreadyExists(err) != nil { return ctrl.Result{}, fmt.Errorf("failed to create ControlPlaneUpgrade for KubeadmControlPlane %s: %v", kcp.Name, err) } @@ -161,6 +184,11 @@ func (r *KubeadmControlPlaneReconciler) reconcile(ctx context.Context, log logr. return ctrl.Result{}, fmt.Errorf("deleting ControlPlaneUpgrade object: %v", err) } + log.Info("Resuming control plane machine health check", "MachineHealthCheck", cpMachineHealthCheckName(kcp.Name)) + if err := resumeMachineHealthCheck(ctx, mhc, mhcPatchHelper); err != nil { + return ctrl.Result{}, fmt.Errorf("updating annotations for machine health check: %v", err) + } + return ctrl.Result{}, nil } @@ -201,6 +229,16 @@ func (r *KubeadmControlPlaneReconciler) validateStackedEtcd(kcp *controlplanev1. return nil } +func pauseMachineHealthCheck(ctx context.Context, mhc *clusterv1.MachineHealthCheck, mhcPatchHelper *patch.Helper) error { + annotations.AddAnnotations(mhc, map[string]string{clusterv1.PausedAnnotation: "true"}) + return mhcPatchHelper.Patch(ctx, mhc) +} + +func resumeMachineHealthCheck(ctx context.Context, mhc *clusterv1.MachineHealthCheck, mhcPatchHelper *patch.Helper) error { + delete(mhc.Annotations, clusterv1.PausedAnnotation) + return mhcPatchHelper.Patch(ctx, mhc) +} + func controlPlaneUpgrade(kcp *controlplanev1.KubeadmControlPlane, machines []corev1.ObjectReference) (*anywherev1.ControlPlaneUpgrade, error) { kcpSpec, err := json.Marshal(kcp.Spec) if err != nil { @@ -235,3 +273,7 @@ func controlPlaneUpgrade(kcp *controlplanev1.KubeadmControlPlane, machines []cor func cpUpgradeName(kcpName string) string { return kcpName + "-cp-upgrade" } + +func cpMachineHealthCheckName(kcpName string) string { + return fmt.Sprintf("%s-kcp-unhealthy", kcpName) +} diff --git a/controllers/kubeadmcontrolplane_controller_test.go b/controllers/kubeadmcontrolplane_controller_test.go index 5f582522b9f5..a7f42b56aa43 100644 --- a/controllers/kubeadmcontrolplane_controller_test.go +++ b/controllers/kubeadmcontrolplane_controller_test.go @@ -4,7 +4,9 @@ import ( "context" "encoding/base64" "encoding/json" + "fmt" "testing" + "time" . "github.com/onsi/gomega" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" @@ -26,6 +28,7 @@ type kcpObjects struct { machines []*clusterv1.Machine cpUpgrade *anywherev1.ControlPlaneUpgrade kcp *controlplanev1.KubeadmControlPlane + mhc *clusterv1.MachineHealthCheck } func TestKCPSetupWithManager(t *testing.T) { @@ -41,7 +44,7 @@ func TestKCPReconcile(t *testing.T) { ctx := context.Background() kcpObjs := getObjectsForKCP() - runtimeObjs := []runtime.Object{kcpObjs.machines[0], kcpObjs.machines[1], kcpObjs.cpUpgrade, kcpObjs.kcp} + runtimeObjs := []runtime.Object{kcpObjs.machines[0], kcpObjs.machines[1], kcpObjs.cpUpgrade, kcpObjs.kcp, kcpObjs.mhc} client := fake.NewClientBuilder().WithRuntimeObjects(runtimeObjs...).Build() r := controllers.NewKubeadmControlPlaneReconciler(client) req := kcpRequest(kcpObjs.kcp) @@ -62,7 +65,7 @@ func TestKCPReconcileComplete(t *testing.T) { kcpObjs.kcp.Spec.Replicas = pointer.Int32(count) kcpObjs.kcp.Status.UpdatedReplicas = count - runtimeObjs := []runtime.Object{kcpObjs.kcp} + runtimeObjs := []runtime.Object{kcpObjs.kcp, kcpObjs.mhc} client := fake.NewClientBuilder().WithRuntimeObjects(runtimeObjs...).Build() r := controllers.NewKubeadmControlPlaneReconciler(client) req := kcpRequest(kcpObjs.kcp) @@ -73,6 +76,18 @@ func TestKCPReconcileComplete(t *testing.T) { err = client.Get(ctx, types.NamespacedName{Name: kcpObjs.kcp.Name, Namespace: constants.EksaSystemNamespace}, kcp) g.Expect(err).ToNot(HaveOccurred()) g.Expect(kcp.Annotations).ToNot(HaveKey("controlplane.clusters.x-k8s.io/in-place-upgrade-needed")) + + mhc := &clusterv1.MachineHealthCheck{} + err = client.Get(ctx, types.NamespacedName{Name: kcpObjs.mhc.Name, Namespace: constants.EksaSystemNamespace}, mhc) + g.Expect(err).ToNot(HaveOccurred()) + g.Eventually(func(g Gomega) error { + func(g Gomega) { + g.Expect(mhc.Annotations).To(HaveKey("cluster.x-k8s.io/paused")) + }(g) + + return nil + }) + g.Expect(mhc.Annotations).ToNot(HaveKey("cluster.x-k8s.io/paused")) } func TestKCPReconcileNotNeeded(t *testing.T) { @@ -82,12 +97,17 @@ func TestKCPReconcileNotNeeded(t *testing.T) { delete(kcpObjs.kcp.Annotations, "controlplane.clusters.x-k8s.io/in-place-upgrade-needed") - runtimeObjs := []runtime.Object{kcpObjs.kcp} + runtimeObjs := []runtime.Object{kcpObjs.kcp, kcpObjs.mhc} client := fake.NewClientBuilder().WithRuntimeObjects(runtimeObjs...).Build() r := controllers.NewKubeadmControlPlaneReconciler(client) req := kcpRequest(kcpObjs.kcp) _, err := r.Reconcile(ctx, req) g.Expect(err).ToNot(HaveOccurred()) + + mhc := &clusterv1.MachineHealthCheck{} + err = client.Get(ctx, types.NamespacedName{Name: kcpObjs.mhc.Name, Namespace: constants.EksaSystemNamespace}, mhc) + g.Expect(err).ToNot(HaveOccurred()) + g.Expect(mhc.Annotations).ToNot(HaveKey("cluster.x-k8s.io/paused")) } func TestKCPReconcileCreateControlPlaneUpgrade(t *testing.T) { @@ -95,7 +115,7 @@ func TestKCPReconcileCreateControlPlaneUpgrade(t *testing.T) { ctx := context.Background() kcpObjs := getObjectsForKCP() - runtimeObjs := []runtime.Object{kcpObjs.machines[0], kcpObjs.machines[1], kcpObjs.kcp} + runtimeObjs := []runtime.Object{kcpObjs.machines[0], kcpObjs.machines[1], kcpObjs.kcp, kcpObjs.mhc} client := fake.NewClientBuilder().WithRuntimeObjects(runtimeObjs...).Build() r := controllers.NewKubeadmControlPlaneReconciler(client) req := kcpRequest(kcpObjs.kcp) @@ -112,6 +132,11 @@ func TestKCPReconcileCreateControlPlaneUpgrade(t *testing.T) { kcpSpec, err := json.Marshal(kcpObjs.kcp.Spec) g.Expect(err).ToNot(HaveOccurred()) g.Expect(cpu.Spec.ControlPlaneSpecData).To(BeEquivalentTo(base64.StdEncoding.EncodeToString(kcpSpec))) + + mhc := &clusterv1.MachineHealthCheck{} + err = client.Get(ctx, types.NamespacedName{Name: kcpObjs.mhc.Name, Namespace: constants.EksaSystemNamespace}, mhc) + g.Expect(err).ToNot(HaveOccurred()) + g.Expect(mhc.Annotations).To(HaveKey("cluster.x-k8s.io/paused")) } func TestKCPReconcileControlPlaneUpgradeReady(t *testing.T) { @@ -121,7 +146,7 @@ func TestKCPReconcileControlPlaneUpgradeReady(t *testing.T) { kcpObjs.cpUpgrade.Status.Ready = true - runtimeObjs := []runtime.Object{kcpObjs.machines[0], kcpObjs.machines[1], kcpObjs.cpUpgrade, kcpObjs.kcp} + runtimeObjs := []runtime.Object{kcpObjs.machines[0], kcpObjs.machines[1], kcpObjs.cpUpgrade, kcpObjs.kcp, kcpObjs.mhc} client := fake.NewClientBuilder().WithRuntimeObjects(runtimeObjs...).Build() r := controllers.NewKubeadmControlPlaneReconciler(client) req := kcpRequest(kcpObjs.kcp) @@ -131,6 +156,11 @@ func TestKCPReconcileControlPlaneUpgradeReady(t *testing.T) { cpu := &anywherev1.ControlPlaneUpgrade{} err = client.Get(ctx, types.NamespacedName{Name: kcpObjs.cpUpgrade.Name, Namespace: constants.EksaSystemNamespace}, cpu) g.Expect(err).To(HaveOccurred()) + + mhc := &clusterv1.MachineHealthCheck{} + err = client.Get(ctx, types.NamespacedName{Name: kcpObjs.mhc.Name, Namespace: constants.EksaSystemNamespace}, mhc) + g.Expect(err).ToNot(HaveOccurred()) + g.Expect(mhc.Annotations).ToNot(HaveKey("cluster.x-k8s.io/paused")) } func TestKCPReconcileKCPAndControlPlaneUpgradeReady(t *testing.T) { @@ -141,7 +171,7 @@ func TestKCPReconcileKCPAndControlPlaneUpgradeReady(t *testing.T) { kcpObjs.kcp.Status.UpdatedReplicas = *kcpObjs.kcp.Spec.Replicas kcpObjs.cpUpgrade.Status.Ready = true - runtimeObjs := []runtime.Object{kcpObjs.machines[0], kcpObjs.machines[1], kcpObjs.cpUpgrade, kcpObjs.kcp} + runtimeObjs := []runtime.Object{kcpObjs.machines[0], kcpObjs.machines[1], kcpObjs.cpUpgrade, kcpObjs.kcp, kcpObjs.mhc} client := fake.NewClientBuilder().WithRuntimeObjects(runtimeObjs...).Build() r := controllers.NewKubeadmControlPlaneReconciler(client) req := kcpRequest(kcpObjs.kcp) @@ -157,6 +187,11 @@ func TestKCPReconcileKCPAndControlPlaneUpgradeReady(t *testing.T) { err = client.Get(ctx, types.NamespacedName{Name: kcpObjs.kcp.Name, Namespace: constants.EksaSystemNamespace}, kcp) g.Expect(err).ToNot(HaveOccurred()) g.Expect(kcp.Annotations).ToNot(HaveKey("controlplane.clusters.x-k8s.io/in-place-upgrade-needed")) + + mhc := &clusterv1.MachineHealthCheck{} + err = client.Get(ctx, types.NamespacedName{Name: kcpObjs.mhc.Name, Namespace: constants.EksaSystemNamespace}, mhc) + g.Expect(err).ToNot(HaveOccurred()) + g.Expect(mhc.Annotations).ToNot(HaveKey("cluster.x-k8s.io/paused")) } func TestKCPReconcileKCPReadyAndCPUpgradeAlreadyDeleted(t *testing.T) { @@ -166,7 +201,7 @@ func TestKCPReconcileKCPReadyAndCPUpgradeAlreadyDeleted(t *testing.T) { kcpObjs.kcp.Status.UpdatedReplicas = *kcpObjs.kcp.Spec.Replicas - runtimeObjs := []runtime.Object{kcpObjs.machines[0], kcpObjs.machines[1], kcpObjs.kcp} + runtimeObjs := []runtime.Object{kcpObjs.machines[0], kcpObjs.machines[1], kcpObjs.kcp, kcpObjs.mhc} client := fake.NewClientBuilder().WithRuntimeObjects(runtimeObjs...).Build() r := controllers.NewKubeadmControlPlaneReconciler(client) req := kcpRequest(kcpObjs.kcp) @@ -178,6 +213,11 @@ func TestKCPReconcileKCPReadyAndCPUpgradeAlreadyDeleted(t *testing.T) { err = client.Get(ctx, types.NamespacedName{Name: kcpObjs.kcp.Name, Namespace: constants.EksaSystemNamespace}, kcp) g.Expect(err).ToNot(HaveOccurred()) g.Expect(kcp.Annotations).ToNot(HaveKey("controlplane.clusters.x-k8s.io/in-place-upgrade-needed")) + + mhc := &clusterv1.MachineHealthCheck{} + err = client.Get(ctx, types.NamespacedName{Name: kcpObjs.mhc.Name, Namespace: constants.EksaSystemNamespace}, mhc) + g.Expect(err).ToNot(HaveOccurred()) + g.Expect(mhc.Annotations).ToNot(HaveKey("cluster.x-k8s.io/paused")) } func TestKCPReconcileNotFound(t *testing.T) { @@ -192,6 +232,19 @@ func TestKCPReconcileNotFound(t *testing.T) { g.Expect(err).To(MatchError("kubeadmcontrolplanes.controlplane.cluster.x-k8s.io \"my-cluster\" not found")) } +func TestKCPReconcileMHCNotFound(t *testing.T) { + g := NewWithT(t) + ctx := context.Background() + kcpObjs := getObjectsForKCP() + + runtimeObjs := []runtime.Object{kcpObjs.machines[0], kcpObjs.machines[1], kcpObjs.kcp} + client := fake.NewClientBuilder().WithRuntimeObjects(runtimeObjs...).Build() + r := controllers.NewKubeadmControlPlaneReconciler(client) + req := kcpRequest(kcpObjs.kcp) + _, err := r.Reconcile(ctx, req) + g.Expect(err).To(MatchError("machinehealthchecks.cluster.x-k8s.io \"my-cluster-kcp-unhealthy\" not found")) +} + func TestKCPReconcileClusterConfigurationMissing(t *testing.T) { g := NewWithT(t) ctx := context.Background() @@ -253,11 +306,13 @@ func getObjectsForKCP() kcpObjects { Name: kcp.Name, UID: kcp.UID, }} + mhc := generateMHCforKCP(kcp.Name) return kcpObjects{ machines: machines, cpUpgrade: cpUpgrade, kcp: kcp, + mhc: mhc, } } @@ -297,3 +352,17 @@ func generateKCP(name string) *controlplanev1.KubeadmControlPlane { }, } } + +func generateMHCforKCP(kcpName string) *clusterv1.MachineHealthCheck { + return &clusterv1.MachineHealthCheck{ + ObjectMeta: metav1.ObjectMeta{ + Name: fmt.Sprintf("%s-kcp-unhealthy", kcpName), + Namespace: "eksa-system", + }, + Spec: clusterv1.MachineHealthCheckSpec{ + NodeStartupTimeout: &metav1.Duration{ + Duration: 20 * time.Minute, + }, + }, + } +} diff --git a/controllers/machinedeployment_controller.go b/controllers/machinedeployment_controller.go index 936e497f38c3..3b12b45cc48d 100644 --- a/controllers/machinedeployment_controller.go +++ b/controllers/machinedeployment_controller.go @@ -117,12 +117,28 @@ func (r *MachineDeploymentReconciler) reconcile(ctx context.Context, log logr.Lo mdUpgrade := &anywherev1.MachineDeploymentUpgrade{} mduGetErr := r.client.Get(ctx, GetNamespacedNameType(mdUpgradeName(md.Name), constants.EksaSystemNamespace), mdUpgrade) + mhc := &clusterv1.MachineHealthCheck{} + if err := r.client.Get(ctx, GetNamespacedNameType(mdMachineHealthCheckName(md.Name), constants.EksaSystemNamespace), mhc); err != nil { + if apierrors.IsNotFound(err) { + return reconcile.Result{}, err + } + return ctrl.Result{}, fmt.Errorf("getting MachineHealthCheck %s: %v", mdMachineHealthCheckName(md.Name), err) + } + mhcPatchHelper, err := patch.NewHelper(mhc, r.client) + if err != nil { + return ctrl.Result{}, err + } + if md.Spec.Replicas != nil && (*md.Spec.Replicas == md.Status.UpdatedReplicas) { if mduGetErr == nil && mdUpgrade.Status.Ready { log.Info("Machine deployment upgrade complete, deleting object", "MachineDeploymentUpgrade", mdUpgrade.Name) if err := r.client.Delete(ctx, mdUpgrade); err != nil { return ctrl.Result{}, fmt.Errorf("deleting MachineDeploymentUpgrade object: %v", err) } + log.Info("Resuming machine deployment machine health check", "MachineHealthCheck", mdMachineHealthCheckName(md.Name)) + if err := resumeMachineHealthCheck(ctx, mhc, mhcPatchHelper); err != nil { + return ctrl.Result{}, fmt.Errorf("updating annotations for machine health check: %v", err) + } } else if !apierrors.IsNotFound(mduGetErr) { return ctrl.Result{}, fmt.Errorf("getting MachineDeploymentUpgrade for MachineDeployment %s: %v", md.Name, mduGetErr) } @@ -143,6 +159,12 @@ func (r *MachineDeploymentReconciler) reconcile(ctx context.Context, log logr.Lo if err != nil { return ctrl.Result{}, fmt.Errorf("generating MachineDeploymentUpgrade: %v", err) } + + log.Info("Pausing machine deployment machine health check", "MachineHealthCheck", mdMachineHealthCheckName(md.Name)) + if err := pauseMachineHealthCheck(ctx, mhc, mhcPatchHelper); err != nil { + return ctrl.Result{}, fmt.Errorf("updating annotations for machine health check: %v", err) + } + if err := r.client.Create(ctx, mdUpgrade); client.IgnoreAlreadyExists(err) != nil { return ctrl.Result{}, fmt.Errorf("failed to create MachineDeploymentUpgrade for MachineDeployment %s: %v", md.Name, err) } @@ -158,6 +180,11 @@ func (r *MachineDeploymentReconciler) reconcile(ctx context.Context, log logr.Lo return ctrl.Result{}, fmt.Errorf("deleting MachineDeploymentUpgrade object: %v", err) } + log.Info("Resuming machine deployment machine health check", "MachineHealthCheck", mdMachineHealthCheckName(md.Name)) + if err := resumeMachineHealthCheck(ctx, mhc, mhcPatchHelper); err != nil { + return ctrl.Result{}, fmt.Errorf("updating annotations for machine health check: %v", err) + } + return ctrl.Result{}, nil } @@ -220,3 +247,7 @@ func machineDeploymentUpgrade(md *clusterv1.MachineDeployment, machines []corev1 func mdUpgradeName(mdName string) string { return mdName + "-md-upgrade" } + +func mdMachineHealthCheckName(mdName string) string { + return fmt.Sprintf("%s-worker-unhealthy", mdName) +} diff --git a/controllers/machinedeployment_controller_test.go b/controllers/machinedeployment_controller_test.go index 7015e5f5038a..465447b106e0 100644 --- a/controllers/machinedeployment_controller_test.go +++ b/controllers/machinedeployment_controller_test.go @@ -2,7 +2,9 @@ package controllers_test import ( "context" + "fmt" "testing" + "time" . "github.com/onsi/gomega" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" @@ -22,6 +24,7 @@ type mdObjects struct { machine *clusterv1.Machine mdUpgrade *anywherev1.MachineDeploymentUpgrade md *clusterv1.MachineDeployment + mhc *clusterv1.MachineHealthCheck } func TestMDSetupWithManager(t *testing.T) { @@ -37,7 +40,7 @@ func TestMDReconcile(t *testing.T) { ctx := context.Background() mdObjs := getObjectsForMD() - runtimeObjs := []runtime.Object{mdObjs.machine, mdObjs.mdUpgrade, mdObjs.md} + runtimeObjs := []runtime.Object{mdObjs.machine, mdObjs.mdUpgrade, mdObjs.md, mdObjs.mhc} client := fake.NewClientBuilder().WithRuntimeObjects(runtimeObjs...).Build() r := controllers.NewMachineDeploymentReconciler(client) req := mdRequest(mdObjs.md) @@ -47,6 +50,11 @@ func TestMDReconcile(t *testing.T) { mdu := &anywherev1.MachineDeploymentUpgrade{} err = client.Get(ctx, types.NamespacedName{Name: mdObjs.mdUpgrade.Name, Namespace: constants.EksaSystemNamespace}, mdu) g.Expect(err).ToNot(HaveOccurred()) + + mhc := &clusterv1.MachineHealthCheck{} + err = client.Get(ctx, types.NamespacedName{Name: mdObjs.mhc.Name, Namespace: constants.EksaSystemNamespace}, mhc) + g.Expect(err).ToNot(HaveOccurred()) + g.Expect(mhc.Annotations).ToNot(HaveKey("cluster.x-k8s.io/paused")) } func TestMDReconcileComplete(t *testing.T) { @@ -57,7 +65,7 @@ func TestMDReconcileComplete(t *testing.T) { mdObjs.md.Spec.Replicas = pointer.Int32(1) mdObjs.md.Status.UpdatedReplicas = 1 - runtimeObjs := []runtime.Object{mdObjs.md} + runtimeObjs := []runtime.Object{mdObjs.md, mdObjs.mhc} client := fake.NewClientBuilder().WithRuntimeObjects(runtimeObjs...).Build() r := controllers.NewMachineDeploymentReconciler(client) req := mdRequest(mdObjs.md) @@ -68,6 +76,18 @@ func TestMDReconcileComplete(t *testing.T) { err = client.Get(ctx, types.NamespacedName{Name: mdObjs.md.Name, Namespace: constants.EksaSystemNamespace}, md) g.Expect(err).ToNot(HaveOccurred()) g.Expect(md.Annotations).ToNot(HaveKey("machinedeployment.clusters.x-k8s.io/in-place-upgrade-needed")) + + mhc := &clusterv1.MachineHealthCheck{} + err = client.Get(ctx, types.NamespacedName{Name: mdObjs.mhc.Name, Namespace: constants.EksaSystemNamespace}, mhc) + g.Expect(err).ToNot(HaveOccurred()) + g.Eventually(func(g Gomega) error { + func(g Gomega) { + g.Expect(mhc.Annotations).To(HaveKey("cluster.x-k8s.io/paused")) + }(g) + + return nil + }) + g.Expect(mhc.Annotations).ToNot(HaveKey("cluster.x-k8s.io/paused")) } func TestMDReconcileNotNeeded(t *testing.T) { @@ -77,12 +97,17 @@ func TestMDReconcileNotNeeded(t *testing.T) { delete(mdObjs.md.Annotations, "machinedeployment.clusters.x-k8s.io/in-place-upgrade-needed") - runtimeObjs := []runtime.Object{mdObjs.md} + runtimeObjs := []runtime.Object{mdObjs.md, mdObjs.mhc} client := fake.NewClientBuilder().WithRuntimeObjects(runtimeObjs...).Build() r := controllers.NewMachineDeploymentReconciler(client) req := mdRequest(mdObjs.md) _, err := r.Reconcile(ctx, req) g.Expect(err).ToNot(HaveOccurred()) + + mhc := &clusterv1.MachineHealthCheck{} + err = client.Get(ctx, types.NamespacedName{Name: mdObjs.mhc.Name, Namespace: constants.EksaSystemNamespace}, mhc) + g.Expect(err).ToNot(HaveOccurred()) + g.Expect(mhc.Annotations).ToNot(HaveKey("cluster.x-k8s.io/paused")) } func TestMDReconcileCreateMachineDeploymentUpgrade(t *testing.T) { @@ -90,7 +115,7 @@ func TestMDReconcileCreateMachineDeploymentUpgrade(t *testing.T) { ctx := context.Background() mdObjs := getObjectsForMD() - runtimeObjs := []runtime.Object{mdObjs.machine, mdObjs.md} + runtimeObjs := []runtime.Object{mdObjs.machine, mdObjs.md, mdObjs.mhc} client := fake.NewClientBuilder().WithRuntimeObjects(runtimeObjs...).Build() r := controllers.NewMachineDeploymentReconciler(client) req := mdRequest(mdObjs.md) @@ -103,6 +128,11 @@ func TestMDReconcileCreateMachineDeploymentUpgrade(t *testing.T) { g.Expect(mdu.OwnerReferences).To(BeEquivalentTo(mdObjs.mdUpgrade.OwnerReferences)) g.Expect(len(mdu.Spec.MachinesRequireUpgrade)).To(BeEquivalentTo(1)) g.Expect(mdu.Spec.KubernetesVersion).To(BeEquivalentTo(mdObjs.mdUpgrade.Spec.KubernetesVersion)) + + mhc := &clusterv1.MachineHealthCheck{} + err = client.Get(ctx, types.NamespacedName{Name: mdObjs.mhc.Name, Namespace: constants.EksaSystemNamespace}, mhc) + g.Expect(err).ToNot(HaveOccurred()) + g.Expect(mhc.Annotations).To(HaveKey("cluster.x-k8s.io/paused")) } func TestMDReconcileMachineDeploymentUpgradeReady(t *testing.T) { @@ -112,7 +142,7 @@ func TestMDReconcileMachineDeploymentUpgradeReady(t *testing.T) { mdObjs.mdUpgrade.Status.Ready = true - runtimeObjs := []runtime.Object{mdObjs.machine, mdObjs.md, mdObjs.mdUpgrade} + runtimeObjs := []runtime.Object{mdObjs.machine, mdObjs.md, mdObjs.mdUpgrade, mdObjs.mhc} client := fake.NewClientBuilder().WithRuntimeObjects(runtimeObjs...).Build() r := controllers.NewMachineDeploymentReconciler(client) req := mdRequest(mdObjs.md) @@ -122,6 +152,11 @@ func TestMDReconcileMachineDeploymentUpgradeReady(t *testing.T) { mdu := &anywherev1.MachineDeploymentUpgrade{} err = client.Get(ctx, types.NamespacedName{Name: mdObjs.mdUpgrade.Name, Namespace: constants.EksaSystemNamespace}, mdu) g.Expect(err).To(HaveOccurred()) + + mhc := &clusterv1.MachineHealthCheck{} + err = client.Get(ctx, types.NamespacedName{Name: mdObjs.mhc.Name, Namespace: constants.EksaSystemNamespace}, mhc) + g.Expect(err).ToNot(HaveOccurred()) + g.Expect(mhc.Annotations).ToNot(HaveKey("cluster.x-k8s.io/paused")) } func TestMDReconcileMDAndMachineDeploymentUpgradeReady(t *testing.T) { @@ -132,7 +167,7 @@ func TestMDReconcileMDAndMachineDeploymentUpgradeReady(t *testing.T) { mdObjs.mdUpgrade.Status.Ready = true mdObjs.md.Status.UpdatedReplicas = *mdObjs.md.Spec.Replicas - runtimeObjs := []runtime.Object{mdObjs.machine, mdObjs.md, mdObjs.mdUpgrade} + runtimeObjs := []runtime.Object{mdObjs.machine, mdObjs.md, mdObjs.mdUpgrade, mdObjs.mhc} client := fake.NewClientBuilder().WithRuntimeObjects(runtimeObjs...).Build() r := controllers.NewMachineDeploymentReconciler(client) req := mdRequest(mdObjs.md) @@ -148,6 +183,11 @@ func TestMDReconcileMDAndMachineDeploymentUpgradeReady(t *testing.T) { err = client.Get(ctx, types.NamespacedName{Name: mdObjs.md.Name, Namespace: constants.EksaSystemNamespace}, md) g.Expect(err).ToNot(HaveOccurred()) g.Expect(md.Annotations).ToNot(HaveKey("machinedeployment.clusters.x-k8s.io/in-place-upgrade-needed")) + + mhc := &clusterv1.MachineHealthCheck{} + err = client.Get(ctx, types.NamespacedName{Name: mdObjs.mhc.Name, Namespace: constants.EksaSystemNamespace}, mhc) + g.Expect(err).ToNot(HaveOccurred()) + g.Expect(mhc.Annotations).ToNot(HaveKey("cluster.x-k8s.io/paused")) } func TestMDReconcileMDReadyAndMachineDeploymentUpgradeAlreadyDeleted(t *testing.T) { @@ -157,7 +197,7 @@ func TestMDReconcileMDReadyAndMachineDeploymentUpgradeAlreadyDeleted(t *testing. mdObjs.md.Status.UpdatedReplicas = *mdObjs.md.Spec.Replicas - runtimeObjs := []runtime.Object{mdObjs.machine, mdObjs.md} + runtimeObjs := []runtime.Object{mdObjs.machine, mdObjs.md, mdObjs.mhc} client := fake.NewClientBuilder().WithRuntimeObjects(runtimeObjs...).Build() r := controllers.NewMachineDeploymentReconciler(client) req := mdRequest(mdObjs.md) @@ -169,6 +209,11 @@ func TestMDReconcileMDReadyAndMachineDeploymentUpgradeAlreadyDeleted(t *testing. err = client.Get(ctx, types.NamespacedName{Name: mdObjs.md.Name, Namespace: constants.EksaSystemNamespace}, md) g.Expect(err).ToNot(HaveOccurred()) g.Expect(md.Annotations).ToNot(HaveKey("machinedeployment.clusters.x-k8s.io/in-place-upgrade-needed")) + + mhc := &clusterv1.MachineHealthCheck{} + err = client.Get(ctx, types.NamespacedName{Name: mdObjs.mhc.Name, Namespace: constants.EksaSystemNamespace}, mhc) + g.Expect(err).ToNot(HaveOccurred()) + g.Expect(mhc.Annotations).ToNot(HaveKey("cluster.x-k8s.io/paused")) } func TestMDReconcileNotFound(t *testing.T) { @@ -183,6 +228,19 @@ func TestMDReconcileNotFound(t *testing.T) { g.Expect(err).To(MatchError("machinedeployments.cluster.x-k8s.io \"my-cluster\" not found")) } +func TestMDReconcileMHCNotFound(t *testing.T) { + g := NewWithT(t) + ctx := context.Background() + mdObjs := getObjectsForMD() + + runtimeObjs := []runtime.Object{mdObjs.machine, mdObjs.md} + client := fake.NewClientBuilder().WithRuntimeObjects(runtimeObjs...).Build() + r := controllers.NewMachineDeploymentReconciler(client) + req := mdRequest(mdObjs.md) + _, err := r.Reconcile(ctx, req) + g.Expect(err).To(MatchError("machinehealthchecks.cluster.x-k8s.io \"my-cluster-worker-unhealthy\" not found")) +} + func TestMDReconcileVersionMissing(t *testing.T) { g := NewWithT(t) ctx := context.Background() @@ -220,11 +278,13 @@ func getObjectsForMD() mdObjects { Name: md.Name, UID: md.UID, }} + mhc := generateMHCforMD(md.Name) return mdObjects{ machine: machine, mdUpgrade: mdUpgrade, md: md, + mhc: mhc, } } @@ -236,3 +296,17 @@ func mdRequest(md *clusterv1.MachineDeployment) reconcile.Request { }, } } + +func generateMHCforMD(mdName string) *clusterv1.MachineHealthCheck { + return &clusterv1.MachineHealthCheck{ + ObjectMeta: metav1.ObjectMeta{ + Name: fmt.Sprintf("%s-worker-unhealthy", mdName), + Namespace: "eksa-system", + }, + Spec: clusterv1.MachineHealthCheckSpec{ + NodeStartupTimeout: &metav1.Duration{ + Duration: 20 * time.Minute, + }, + }, + } +}