From 94b00f9d99f25197546cd8fc7568b3550d3aa146 Mon Sep 17 00:00:00 2001 From: Saurabh Parekh Date: Thu, 6 Jun 2024 11:53:01 -0700 Subject: [PATCH] Add E2E tests for autoscaler upgrade flow --- test/e2e/autoscaler.go | 20 ++++++ test/e2e/vsphere_test.go | 41 +++++++++++ test/framework/cluster.go | 71 ++++++++++++++----- .../autoscaler_package_workload_cluster.yaml | 15 ++++ 4 files changed, 128 insertions(+), 19 deletions(-) create mode 100644 test/framework/testdata/autoscaler_package_workload_cluster.yaml diff --git a/test/e2e/autoscaler.go b/test/e2e/autoscaler.go index f521c17431f0..eacdfe1960b6 100644 --- a/test/e2e/autoscaler.go +++ b/test/e2e/autoscaler.go @@ -29,3 +29,23 @@ func runAutoscalerWithMetricsServerTinkerbellSimpleFlow(test *framework.ClusterE test.DeleteCluster() test.ValidateHardwareDecommissioned() } + +func runAutoscalerUpgradeFlow(test *framework.MulticlusterE2ETest) { + test.CreateManagementClusterWithConfig() + test.RunInWorkloadClusters(func(e *framework.WorkloadCluster) { + e.GenerateClusterConfig() + e.CreateCluster() + autoscalerName := "cluster-autoscaler" + targetNamespace := "eksa-system" + mgmtCluster := withCluster(test.ManagementCluster) + workloadCluster := withCluster(e.ClusterE2ETest) + test.ManagementCluster.InstallAutoScaler(e.ClusterName, targetNamespace) + test.ManagementCluster.VerifyAutoScalerPackageInstalled(autoscalerName, targetNamespace, mgmtCluster) + e.T.Log("Cluster Autoscaler ready") + e.DeployTestWorkload(workloadCluster) + test.ManagementCluster.RestartClusterAutoscaler(targetNamespace) + e.VerifyWorkerNodesScaleUp(mgmtCluster) + e.DeleteCluster() + }) + test.DeleteManagementCluster() +} diff --git a/test/e2e/vsphere_test.go b/test/e2e/vsphere_test.go index 3944b669ddf2..3c8eb5c25bc0 100644 --- a/test/e2e/vsphere_test.go +++ b/test/e2e/vsphere_test.go @@ -862,6 +862,47 @@ func TestVSphereKubernetes130BottleRocketCuratedPackagesClusterAutoscalerSimpleF runAutoscalerWithMetricsServerSimpleFlow(test) } +func TestVSphereKubernetes129BottleRocketWorkloadClusterCuratedPackagesClusterAutoscalerUpgradeFlow(t *testing.T) { + minNodes := 1 + maxNodes := 2 + framework.CheckCuratedPackagesCredentials(t) + provider := framework.NewVSphere(t, framework.WithBottleRocket129()) + test := framework.NewMulticlusterE2ETest( + t, + framework.NewClusterE2ETest( + t, + provider, + framework.WithClusterFiller( + api.WithKubernetesVersion(v1alpha1.Kube129), + api.WithControlPlaneCount(1), + api.WithWorkerNodeCount(1), + api.WithExternalEtcdTopology(1), + ), + ), + framework.NewClusterE2ETest( + t, + provider, + framework.WithClusterFiller( + api.WithKubernetesVersion(v1alpha1.Kube129), + api.WithControlPlaneCount(1), + api.WithWorkerNodeCount(1), + api.WithExternalEtcdTopology(1), + api.WithWorkerNodeAutoScalingConfig(minNodes, maxNodes), + ), + framework.WithPackageConfig( + t, + packageBundleURI(v1alpha1.Kube129), + EksaPackageControllerHelmChartName, + EksaPackageControllerHelmURI, + EksaPackageControllerHelmVersion, + EksaPackageControllerHelmValues, + nil, + ), + ), + ) + runAutoscalerUpgradeFlow(test) +} + func TestVSphereKubernetes126UbuntuCuratedPackagesPrometheusSimpleFlow(t *testing.T) { framework.CheckCuratedPackagesCredentials(t) test := framework.NewClusterE2ETest(t, diff --git a/test/framework/cluster.go b/test/framework/cluster.go index 4f4ebbfcf66e..f9120f948537 100644 --- a/test/framework/cluster.go +++ b/test/framework/cluster.go @@ -1966,36 +1966,55 @@ func (e *ClusterE2ETest) InstallAutoScalerWithMetricServer(targetNamespace strin } } -// CombinedAutoScalerMetricServerTest verifies that new nodes are spun up after using a HPA to scale a deployment. -func (e *ClusterE2ETest) CombinedAutoScalerMetricServerTest(autoscalerName, metricServerName, targetNamespace string, mgmtCluster *types.Cluster) { +//go:embed testdata/autoscaler_package_workload_cluster.yaml +var autoscalerPackageWorkloadClusterDeploymentTemplate string + +// InstallAutoScaler installs autoscaler with a given target namespace. +func (e *ClusterE2ETest) InstallAutoScaler(workloadClusterName, targetNamespace string) { ctx := context.Background() - machineDeploymentName := e.ClusterName + "-" + "md-0" - autoscalerDeploymentName := "cluster-autoscaler-clusterapi-cluster-autoscaler" + packageMetadataNamespace := fmt.Sprintf("%s-%s", constants.EksaPackagesName, e.ClusterName) + data := map[string]interface{}{ + "targetNamespace": targetNamespace, + "workloadClusterName": workloadClusterName, + } + + autoscalerPackageWorkloadClusterDeployment, err := templater.Execute(autoscalerPackageWorkloadClusterDeploymentTemplate, data) + if err != nil { + e.T.Fatalf("Failed creating autoscaler Package Deployment: %s", err) + } + err = e.KubectlClient.ApplyKubeSpecFromBytesWithNamespace(ctx, e.Cluster(), autoscalerPackageWorkloadClusterDeployment, + packageMetadataNamespace) + if err != nil { + e.T.Fatalf("Error installing cluster autoscaler package: %s", err) + } +} + +// CombinedAutoScalerMetricServerTest verifies that new nodes are spun up after using a HPA to scale a deployment. +func (e *ClusterE2ETest) CombinedAutoScalerMetricServerTest(autoscalerName, metricServerName, targetNamespace string, mgmtCluster *types.Cluster) { e.VerifyMetricServerPackageInstalled(metricServerName, targetNamespace, mgmtCluster) e.VerifyAutoScalerPackageInstalled(autoscalerName, targetNamespace, mgmtCluster) e.T.Log("Metrics Server and Cluster Autoscaler ready") + e.DeployTestWorkload(mgmtCluster) + e.VerifyWorkerNodesScaleUp(mgmtCluster) +} +// DeployTestWorkload deploys the test workload on the cluster. +func (e *ClusterE2ETest) DeployTestWorkload(cluster *types.Cluster) { e.T.Log("Deploying test workload") - err := e.KubectlClient.ApplyKubeSpecFromBytes(ctx, mgmtCluster, autoscalerLoad) + err := e.KubectlClient.ApplyKubeSpecFromBytes(context.Background(), cluster, autoscalerLoad) if err != nil { e.T.Fatalf("Failed to apply autoscaler load %s", err) } +} - // There is a bug in cluster autoscaler currently where it's not able to autoscale the cluster - // because of missing permissions on infrastructure machine template. - // Cluster Autoscaler does restart after ~10 min after which it starts functioning normally. - // We are force triggering a restart so the e2e doesn't have to wait 10 min for the restart. - // This can be removed once the following issue is resolve upstream. - // https://github.com/kubernetes/autoscaler/issues/6490 - _, err = e.KubectlClient.ExecuteCommand(ctx, "rollout", "restart", "deployment", "-n", targetNamespace, autoscalerDeploymentName, "--kubeconfig", e.KubeconfigFilePath()) - if err != nil { - e.T.Fatalf("Failed to rollout cluster autoscaler %s", err) - } - e.VerifyAutoScalerPackageInstalled(autoscalerName, targetNamespace, mgmtCluster) +// VerifyWorkerNodesScaleUp verifies that the worker nodes are scaled up after a test workload is deployed on a cluster with Autoscaler installed. +func (e *ClusterE2ETest) VerifyWorkerNodesScaleUp(mgmtCluster *types.Cluster) { + ctx := context.Background() + machineDeploymentName := e.ClusterName + "-" + "md-0" e.T.Log("Waiting for machinedeployment to begin scaling up") - err = e.KubectlClient.WaitJSONPathLoop(ctx, mgmtCluster.KubeconfigFile, "10m", "status.phase", "ScalingUp", + err := e.KubectlClient.WaitJSONPathLoop(ctx, mgmtCluster.KubeconfigFile, "10m", "status.phase", "ScalingUp", fmt.Sprintf("machinedeployments.cluster.x-k8s.io/%s", machineDeploymentName), constants.EksaSystemNamespace) if err != nil { e.T.Fatalf("Failed to get ScalingUp phase for machinedeployment: %s", err) @@ -2008,8 +2027,7 @@ func (e *ClusterE2ETest) CombinedAutoScalerMetricServerTest(autoscalerName, metr e.T.Fatalf("Failed to get Running phase for machinedeployment: %s", err) } - err = e.KubectlClient.WaitForMachineDeploymentReady(ctx, mgmtCluster, "5m", - machineDeploymentName) + err = e.KubectlClient.WaitForMachineDeploymentReady(ctx, mgmtCluster, "5m", machineDeploymentName) if err != nil { e.T.Fatalf("Machine deployment stuck in scaling up: %s", err) } @@ -2017,6 +2035,21 @@ func (e *ClusterE2ETest) CombinedAutoScalerMetricServerTest(autoscalerName, metr e.T.Log("Finished scaling up machines") } +// RestartClusterAutoscaler restarts the cluster autoscaler deployment in the target namespace. +func (e *ClusterE2ETest) RestartClusterAutoscaler(targetNamespace string) { + // There is a bug in cluster autoscaler currently where it's not able to autoscale the cluster + // because of missing permissions on infrastructure machine template. + // Cluster Autoscaler does restart after ~10 min after which it starts functioning normally. + // We are force triggering a restart so the e2e doesn't have to wait 10 min for the restart. + // This can be removed once the following issue is resolve upstream. + // https://github.com/kubernetes/autoscaler/issues/6490 + autoscalerDeploymentName := "cluster-autoscaler-clusterapi-cluster-autoscaler" + _, err := e.KubectlClient.ExecuteCommand(context.Background(), "rollout", "restart", "deployment", "-n", targetNamespace, autoscalerDeploymentName, "--kubeconfig", e.KubeconfigFilePath()) + if err != nil { + e.T.Fatalf("Failed to rollout cluster autoscaler %s", err) + } +} + // ValidateClusterState runs a set of validations against the cluster to identify an invalid cluster state. func (e *ClusterE2ETest) ValidateClusterState() { validateClusterState(e.T.(*testing.T), e) diff --git a/test/framework/testdata/autoscaler_package_workload_cluster.yaml b/test/framework/testdata/autoscaler_package_workload_cluster.yaml new file mode 100644 index 000000000000..f91b46ea29e0 --- /dev/null +++ b/test/framework/testdata/autoscaler_package_workload_cluster.yaml @@ -0,0 +1,15 @@ +apiVersion: packages.eks.amazonaws.com/v1alpha1 +kind: Package +metadata: + name: cluster-autoscaler +spec: + packageName: cluster-autoscaler + targetNamespace: {{.targetNamespace}} + config: |- + cloudProvider: "clusterapi" + clusterAPIMode: "kubeconfig-incluster" + clusterAPIKubeconfigSecret: "{{.workloadClusterName}}-kubeconfig" + autoDiscovery: + clusterName: {{.workloadClusterName}} + +---