From 249f20de206b4394c110c4d9bf8d0d26168959bd Mon Sep 17 00:00:00 2001 From: Cavaughn Browne <113555337+cxbrowne1207@users.noreply.github.com> Date: Tue, 28 May 2024 19:52:34 -0500 Subject: [PATCH] Add poweroff hardware cleanup step after Tinkerbell E2E tests (#8140) * power down hardware in clean up step after tests fix linting errors * address PR comments * change cleanup machines to cleanup resources * moved context timeout inside poweroff hardware; leftover machines -> resources --- .../buildspecs/cloudstack-test-eks-a-cli.yml | 2 +- .../buildspecs/conformance-eks-a-cli.yml | 2 +- .../buildspecs/nutanix-test-eks-a-cli.yml | 2 +- .../build/buildspecs/quick-test-eks-a-cli.yml | 2 +- .../build/buildspecs/snow-test-eks-a-cli.yml | 2 +- .../buildspecs/tinkerbell-test-eks-a-cli.yml | 2 +- .../buildspecs/vsphere-test-eks-a-cli.yml | 2 +- cmd/integration_test/cmd/run.go | 8 +- internal/test/cleanup/cleanup.go | 89 +++++++++++++++++++ internal/test/e2e/run.go | 8 +- internal/test/e2e/setup.go | 6 +- test/e2e/README.md | 2 +- test/e2e/cloudstack_test.go | 12 +-- test/e2e/vsphere_test.go | 4 +- test/framework/cloudstack.go | 3 +- test/framework/cluster.go | 77 ++++------------ test/framework/docker.go | 4 +- test/framework/etcdencryption.go | 5 +- test/framework/nutanix.go | 4 +- test/framework/snow.go | 4 +- test/framework/tinkerbell.go | 6 +- test/framework/vsphere.go | 4 +- 22 files changed, 149 insertions(+), 101 deletions(-) diff --git a/cmd/integration_test/build/buildspecs/cloudstack-test-eks-a-cli.yml b/cmd/integration_test/build/buildspecs/cloudstack-test-eks-a-cli.yml index 8e70a0250f07..50584ea748c2 100644 --- a/cmd/integration_test/build/buildspecs/cloudstack-test-eks-a-cli.yml +++ b/cmd/integration_test/build/buildspecs/cloudstack-test-eks-a-cli.yml @@ -107,7 +107,7 @@ phases: -v 4 --skip ${SKIPPED_TESTS} --bundles-override=${BUNDLES_OVERRIDE} - --cleanup-vms=true + --cleanup-resources=true --test-report-folder=reports --branch-name=${BRANCH_NAME} --baremetal-branch=${BAREMETAL_BRANCH} diff --git a/cmd/integration_test/build/buildspecs/conformance-eks-a-cli.yml b/cmd/integration_test/build/buildspecs/conformance-eks-a-cli.yml index 558aec8a6301..d9bb3ea7bbd5 100644 --- a/cmd/integration_test/build/buildspecs/conformance-eks-a-cli.yml +++ b/cmd/integration_test/build/buildspecs/conformance-eks-a-cli.yml @@ -163,7 +163,7 @@ phases: -v 4 --skip ${SKIPPED_TESTS} --bundles-override=${BUNDLES_OVERRIDE} - --cleanup-vms=true + --cleanup-resources=true --test-report-folder=reports reports: e2e-reports: diff --git a/cmd/integration_test/build/buildspecs/nutanix-test-eks-a-cli.yml b/cmd/integration_test/build/buildspecs/nutanix-test-eks-a-cli.yml index deff7c9d3483..58ab4254c7c0 100644 --- a/cmd/integration_test/build/buildspecs/nutanix-test-eks-a-cli.yml +++ b/cmd/integration_test/build/buildspecs/nutanix-test-eks-a-cli.yml @@ -90,7 +90,7 @@ phases: -v 4 --skip ${SKIPPED_TESTS} --bundles-override=${BUNDLES_OVERRIDE} - --cleanup-vms=true + --cleanup-resources=true --test-report-folder=reports --branch-name=${BRANCH_NAME} --baremetal-branch=${BAREMETAL_BRANCH} diff --git a/cmd/integration_test/build/buildspecs/quick-test-eks-a-cli.yml b/cmd/integration_test/build/buildspecs/quick-test-eks-a-cli.yml index c69cd496f74b..ee22ccc83ede 100644 --- a/cmd/integration_test/build/buildspecs/quick-test-eks-a-cli.yml +++ b/cmd/integration_test/build/buildspecs/quick-test-eks-a-cli.yml @@ -216,7 +216,7 @@ phases: -v 4 --skip ${SKIPPED_TESTS} --bundles-override=${BUNDLES_OVERRIDE} - --cleanup-vms=true + --cleanup-resources=true --test-report-folder=reports --branch-name=${BRANCH_NAME} --baremetal-branch=${BAREMETAL_BRANCH} diff --git a/cmd/integration_test/build/buildspecs/snow-test-eks-a-cli.yml b/cmd/integration_test/build/buildspecs/snow-test-eks-a-cli.yml index 124c1033913b..c7fe1db38e79 100644 --- a/cmd/integration_test/build/buildspecs/snow-test-eks-a-cli.yml +++ b/cmd/integration_test/build/buildspecs/snow-test-eks-a-cli.yml @@ -57,7 +57,7 @@ phases: -v 4 --skip ${SKIPPED_TESTS} --bundles-override=${BUNDLES_OVERRIDE} - --cleanup-vms=true + --cleanup-resources=true --test-report-folder=reports --branch-name=${BRANCH_NAME} --baremetal-branch=${BAREMETAL_BRANCH} diff --git a/cmd/integration_test/build/buildspecs/tinkerbell-test-eks-a-cli.yml b/cmd/integration_test/build/buildspecs/tinkerbell-test-eks-a-cli.yml index 67f9c5ffdddb..9759bb0c6b25 100644 --- a/cmd/integration_test/build/buildspecs/tinkerbell-test-eks-a-cli.yml +++ b/cmd/integration_test/build/buildspecs/tinkerbell-test-eks-a-cli.yml @@ -104,7 +104,7 @@ phases: -v 4 --skip ${SKIPPED_TESTS} --bundles-override=${BUNDLES_OVERRIDE} - --cleanup-vms=true + --cleanup-resources=true --test-report-folder=reports --branch-name=${BRANCH_NAME} --baremetal-branch=${BAREMETAL_BRANCH} diff --git a/cmd/integration_test/build/buildspecs/vsphere-test-eks-a-cli.yml b/cmd/integration_test/build/buildspecs/vsphere-test-eks-a-cli.yml index 0e6d327238bc..6758074837a0 100644 --- a/cmd/integration_test/build/buildspecs/vsphere-test-eks-a-cli.yml +++ b/cmd/integration_test/build/buildspecs/vsphere-test-eks-a-cli.yml @@ -130,7 +130,7 @@ phases: -v 4 --skip ${SKIPPED_TESTS} --bundles-override=${BUNDLES_OVERRIDE} - --cleanup-vms=true + --cleanup-resources=true --test-report-folder=reports --branch-name=${BRANCH_NAME} --baremetal-branch=${BAREMETAL_BRANCH} diff --git a/cmd/integration_test/cmd/run.go b/cmd/integration_test/cmd/run.go index 5e3aee6d0f85..fbb4580cc77f 100644 --- a/cmd/integration_test/cmd/run.go +++ b/cmd/integration_test/cmd/run.go @@ -22,7 +22,7 @@ const ( maxConcurrentTestsFlagName = "max-concurrent-tests" skipFlagName = "skip" bundlesOverrideFlagName = "bundles-override" - cleanupVmsFlagName = "cleanup-vms" + cleanupResourcesFlagName = "cleanup-resources" testReportFolderFlagName = "test-report-folder" branchNameFlagName = "branch-name" instanceConfigFlagName = "instance-config" @@ -66,7 +66,7 @@ func init() { runE2ECmd.Flags().IntP(maxConcurrentTestsFlagName, "p", 1, "Maximum number of parallel tests that can be run at a time") runE2ECmd.Flags().StringSlice(skipFlagName, nil, "List of tests to skip") runE2ECmd.Flags().Bool(bundlesOverrideFlagName, false, "Flag to indicate if the tests should run with a bundles override") - runE2ECmd.Flags().Bool(cleanupVmsFlagName, false, "Flag to indicate if VSphere VMs should be cleaned up automatically as tests complete") + runE2ECmd.Flags().Bool(cleanupResourcesFlagName, false, "Flag to indicate if test resources should be cleaned up automatically as tests complete") runE2ECmd.Flags().String(testReportFolderFlagName, "", "Folder destination for JUnit tests reports") runE2ECmd.Flags().String(branchNameFlagName, "main", "EKS-A origin branch from where the tests are being run") runE2ECmd.Flags().String(baremetalBranchFlagName, "main", "Branch for baremetal tests to run on") @@ -88,7 +88,7 @@ func runE2E(ctx context.Context) error { maxConcurrentTests := viper.GetInt(maxConcurrentTestsFlagName) testsToSkip := viper.GetStringSlice(skipFlagName) bundlesOverride := viper.GetBool(bundlesOverrideFlagName) - cleanupVms := viper.GetBool(cleanupVmsFlagName) + cleanupResources := viper.GetBool(cleanupResourcesFlagName) testReportFolder := viper.GetString(testReportFolderFlagName) branchName := viper.GetString(branchNameFlagName) baremetalBranchName := viper.GetString(baremetalBranchFlagName) @@ -102,7 +102,7 @@ func runE2E(ctx context.Context) error { Regex: testRegex, TestsToSkip: testsToSkip, BundlesOverride: bundlesOverride, - CleanupVms: cleanupVms, + CleanupResources: cleanupResources, TestReportFolder: testReportFolder, BranchName: branchName, TestInstanceConfigFile: instanceConfigFile, diff --git a/internal/test/cleanup/cleanup.go b/internal/test/cleanup/cleanup.go index 023d3e025094..67c7ec16e334 100644 --- a/internal/test/cleanup/cleanup.go +++ b/internal/test/cleanup/cleanup.go @@ -5,19 +5,25 @@ import ( "fmt" "os" "strconv" + "strings" "time" "github.com/aws/aws-sdk-go/aws/session" + "github.com/bmc-toolbox/bmclib/v2" + "github.com/go-logr/logr" prismgoclient "github.com/nutanix-cloud-native/prism-go-client" v3 "github.com/nutanix-cloud-native/prism-go-client/v3" + "github.com/aws/eks-anywhere/internal/pkg/api" "github.com/aws/eks-anywhere/internal/pkg/ec2" "github.com/aws/eks-anywhere/internal/pkg/s3" + "github.com/aws/eks-anywhere/pkg/errors" "github.com/aws/eks-anywhere/pkg/executables" "github.com/aws/eks-anywhere/pkg/filewriter" "github.com/aws/eks-anywhere/pkg/logger" "github.com/aws/eks-anywhere/pkg/providers/cloudstack/decoder" "github.com/aws/eks-anywhere/pkg/providers/nutanix" + "github.com/aws/eks-anywhere/pkg/providers/tinkerbell/hardware" "github.com/aws/eks-anywhere/pkg/retrier" "github.com/aws/eks-anywhere/pkg/validations" ) @@ -189,3 +195,86 @@ func NutanixTestResources(clusterName, endpoint, port string, insecure, ignoreEr } return nil } + +// TinkerbellTestResources cleans up machines by powering them down. +func TinkerbellTestResources(inventoryCSVFilePath string, ignoreErrors bool) error { + hardwarePool, err := api.NewHardwareMapFromFile(inventoryCSVFilePath) + if err != nil { + return fmt.Errorf("failed to create hardware map from inventory csv: %v", err) + } + + logger.Info("Powering off hardware: %+v", hardwarePool) + return powerOffHardwarePool(hardwarePool, ignoreErrors) +} + +func powerOffHardwarePool(hardware map[string]*hardware.Machine, ignoreErrors bool) error { + errList := []error{} + for _, h := range hardware { + if err := powerOffHardware(h, ignoreErrors); err != nil { + errList = append(errList, err) + } + } + + if len(errList) > 0 { + return fmt.Errorf("failed to power off %d hardware: %+v", len(errList), errors.NewAggregate(errList)) + } + + return nil +} + +func powerOffHardware(h *hardware.Machine, ignoreErrors bool) (reterror error) { + ctx, done := context.WithTimeout(context.Background(), 2*time.Minute) + defer done() + bmcClient := newBmclibClient(logr.Discard(), h.BMCIPAddress, h.BMCUsername, h.BMCPassword) + + if err := bmcClient.Open(ctx); err != nil { + md := bmcClient.GetMetadata() + logger.Info("Warning: Failed to open connection to BMC: %v, hardware: %v, providersAttempted: %v, failedProviderDetail: %v", err, h.BMCIPAddress, md.ProvidersAttempted, md.SuccessfulOpenConns) + return handlePowerOffHardwareError(err, ignoreErrors) + } + + md := bmcClient.GetMetadata() + logger.Info("Connected to BMC: hardware: %v, providersAttempted: %v, successfulProvider: %v", h.BMCIPAddress, md.ProvidersAttempted, md.SuccessfulOpenConns) + + defer func() { + if err := bmcClient.Close(ctx); err != nil { + md := bmcClient.GetMetadata() + logger.Info("Warning: BMC close connection failed: %v, hardware: %v, providersAttempted: %v, failedProviderDetail: %v", err, h.BMCIPAddress, md.ProvidersAttempted, md.FailedProviderDetail) + reterror = handlePowerOffHardwareError(err, ignoreErrors) + } + }() + + state, err := bmcClient.GetPowerState(ctx) + if err != nil { + state = "unknown" + } + if strings.Contains(strings.ToLower(state), "off") { + return nil + } + + if _, err := bmcClient.SetPowerState(ctx, "off"); err != nil { + md := bmcClient.GetMetadata() + logger.Info("Warning: failed to power off hardware: %v, hardware: %v, providersAttempted: %v, failedProviderDetail: %v", err, h.BMCIPAddress, md.ProvidersAttempted, md.SuccessfulOpenConns) + return handlePowerOffHardwareError(err, ignoreErrors) + } + + return nil +} + +func handlePowerOffHardwareError(err error, ignoreErrors bool) error { + if err != nil && !ignoreErrors { + return err + } + return nil +} + +// newBmclibClient creates a new BMClib client. +func newBmclibClient(log logr.Logger, hostIP, username, password string) *bmclib.Client { + o := []bmclib.Option{} + log = log.WithValues("host", hostIP, "username", username) + o = append(o, bmclib.WithLogger(log)) + client := bmclib.NewClient(hostIP, username, password, o...) + client.Registry.Drivers = client.Registry.PreferProtocol("redfish") + + return client +} diff --git a/internal/test/e2e/run.go b/internal/test/e2e/run.go index e5c787811b1a..e5b97abaa5b9 100644 --- a/internal/test/e2e/run.go +++ b/internal/test/e2e/run.go @@ -47,7 +47,7 @@ type ParallelRunConf struct { Regex string TestsToSkip []string BundlesOverride bool - CleanupVms bool + CleanupResources bool TestReportFolder string BranchName string BaremetalBranchName string @@ -199,7 +199,7 @@ type instanceRunConf struct { BundlesOverride bool TestRunnerType TestRunnerType TestRunnerConfig TestInfraConfig - CleanupVMs bool + CleanupResources bool Logger logr.Logger Session *session.Session } @@ -231,7 +231,7 @@ func RunTests(conf instanceRunConf, inventoryCatalogue map[string]*hardwareCatal "branch_name", conf.BranchName, "ip_pool", conf.IPPool.ToString(), "hardware_count", conf.HardwareCount, "tinkerbell_airgapped_test", conf.TinkerbellAirgappedTest, "bundles_override", conf.BundlesOverride, "test_runner_type", conf.TestRunnerType, - "cleanup_vms", conf.CleanupVMs) + "cleanup_resources", conf.CleanupResources) instanceId, err := testRunner.createInstance(conf) if err != nil { @@ -519,7 +519,7 @@ func newInstanceRunConf(awsSession *session.Session, conf ParallelRunConf, jobNu BundlesOverride: conf.BundlesOverride, TestReportFolder: conf.TestReportFolder, BranchName: conf.BranchName, - CleanupVMs: conf.CleanupVms, + CleanupResources: conf.CleanupResources, TestRunnerType: testRunnerType, TestRunnerConfig: *testRunnerConfig, Logger: conf.Logger.WithValues("jobID", jobID, "test", testRegex), diff --git a/internal/test/e2e/setup.go b/internal/test/e2e/setup.go index e6ea140a88ca..ee66c8bcd3c8 100644 --- a/internal/test/e2e/setup.go +++ b/internal/test/e2e/setup.go @@ -40,7 +40,7 @@ type E2ESession struct { ipPool networkutils.IPPool testEnvVars map[string]string bundlesOverride bool - cleanupVms bool + cleanup bool requiredFiles []string branchName string hardware []*api.Hardware @@ -57,7 +57,7 @@ func newE2ESession(instanceId string, conf instanceRunConf) (*E2ESession, error) ipPool: conf.IPPool, testEnvVars: make(map[string]string), bundlesOverride: conf.BundlesOverride, - cleanupVms: conf.CleanupVMs, + cleanup: conf.CleanupResources, requiredFiles: requiredFiles, branchName: conf.BranchName, hardware: conf.Hardware, @@ -187,7 +187,7 @@ func (e *E2ESession) setup(regex string) error { // Adding JobId to Test Env variables e.testEnvVars[e2etests.JobIdVar] = e.jobId e.testEnvVars[e2etests.BundlesOverrideVar] = strconv.FormatBool(e.bundlesOverride) - e.testEnvVars[e2etests.CleanupVmsVar] = strconv.FormatBool(e.cleanupVms) + e.testEnvVars[e2etests.CleanupResourcesVar] = strconv.FormatBool(e.cleanup) if e.branchName != "" { e.testEnvVars[e2etests.BranchNameEnvVar] = e.branchName diff --git a/test/e2e/README.md b/test/e2e/README.md index 41e3eb87fd20..fa0325a124c6 100644 --- a/test/e2e/README.md +++ b/test/e2e/README.md @@ -45,7 +45,7 @@ In order to use bundle overrides, take your bundle overrides yaml file and move You will also need to set the environment variable `T_BUNDLES_OVERRIDE=true` ### Cleaning up VM's after a test run -In order to clean up VM's after a test runs automatically, set `T_CLEANUP_VMS=true` +In order to clean up VM's after a test runs automatically, set `T_CLEANUP_RESOURCES=true` ## VSphere tests requisites The following env variables need to be set: diff --git a/test/e2e/cloudstack_test.go b/test/e2e/cloudstack_test.go index 02bc373b4d5d..4c0842220670 100644 --- a/test/e2e/cloudstack_test.go +++ b/test/e2e/cloudstack_test.go @@ -3292,12 +3292,12 @@ func TestCloudStackKubernetes126RedhatTo127UpgradeWithCheckpoint(t *testing.T) { ) clusterOpts = append(clusterOpts, framework.WithClusterUpgrade(api.WithKubernetesVersion(v1alpha1.Kube127)), framework.ExpectFailure(true), - provider.WithProviderUpgrade(provider.Redhat9Kubernetes126Template()), framework.WithEnvVar(features.CheckpointEnabledEnvVar, "true"), framework.WithEnvVar(framework.CleanupVmsVar, "false")) + provider.WithProviderUpgrade(provider.Redhat9Kubernetes126Template()), framework.WithEnvVar(features.CheckpointEnabledEnvVar, "true"), framework.WithEnvVar(framework.CleanupResourcesVar, "false")) commandOpts := []framework.CommandOpt{framework.WithExternalEtcdWaitTimeout("10m")} clusterOpts2 = append(clusterOpts, framework.WithClusterUpgrade(api.WithKubernetesVersion(v1alpha1.Kube127)), framework.ExpectFailure(false), - provider.WithProviderUpgrade(provider.Redhat9Kubernetes127Template()), framework.WithEnvVar(features.CheckpointEnabledEnvVar, "true"), framework.WithEnvVar(framework.CleanupVmsVar, "true")) + provider.WithProviderUpgrade(provider.Redhat9Kubernetes127Template()), framework.WithEnvVar(features.CheckpointEnabledEnvVar, "true"), framework.WithEnvVar(framework.CleanupResourcesVar, "true")) runUpgradeFlowWithCheckpoint( test, @@ -3322,12 +3322,12 @@ func TestCloudStackKubernetes127RedhatTo128UpgradeWithCheckpoint(t *testing.T) { ) clusterOpts = append(clusterOpts, framework.WithClusterUpgrade(api.WithKubernetesVersion(v1alpha1.Kube128)), framework.ExpectFailure(true), - provider.WithProviderUpgrade(provider.Redhat9Kubernetes127Template()), framework.WithEnvVar(features.CheckpointEnabledEnvVar, "true"), framework.WithEnvVar(framework.CleanupVmsVar, "false")) + provider.WithProviderUpgrade(provider.Redhat9Kubernetes127Template()), framework.WithEnvVar(features.CheckpointEnabledEnvVar, "true"), framework.WithEnvVar(framework.CleanupResourcesVar, "false")) commandOpts := []framework.CommandOpt{framework.WithExternalEtcdWaitTimeout("10m")} clusterOpts2 = append(clusterOpts, framework.WithClusterUpgrade(api.WithKubernetesVersion(v1alpha1.Kube128)), framework.ExpectFailure(false), - provider.WithProviderUpgrade(provider.Redhat9Kubernetes128Template()), framework.WithEnvVar(features.CheckpointEnabledEnvVar, "true"), framework.WithEnvVar(framework.CleanupVmsVar, "true")) + provider.WithProviderUpgrade(provider.Redhat9Kubernetes128Template()), framework.WithEnvVar(features.CheckpointEnabledEnvVar, "true"), framework.WithEnvVar(framework.CleanupResourcesVar, "true")) runUpgradeFlowWithCheckpoint( test, @@ -3352,12 +3352,12 @@ func TestCloudStackKubernetes129RedhatTo130UpgradeWithCheckpoint(t *testing.T) { ) clusterOpts = append(clusterOpts, framework.WithClusterUpgrade(api.WithKubernetesVersion(v1alpha1.Kube130)), framework.ExpectFailure(true), - provider.WithProviderUpgrade(provider.Redhat9Kubernetes129Template()), framework.WithEnvVar(features.CheckpointEnabledEnvVar, "true"), framework.WithEnvVar(framework.CleanupVmsVar, "false")) + provider.WithProviderUpgrade(provider.Redhat9Kubernetes129Template()), framework.WithEnvVar(features.CheckpointEnabledEnvVar, "true"), framework.WithEnvVar(framework.CleanupResourcesVar, "false")) commandOpts := []framework.CommandOpt{framework.WithExternalEtcdWaitTimeout("10m")} clusterOpts2 = append(clusterOpts, framework.WithClusterUpgrade(api.WithKubernetesVersion(v1alpha1.Kube130)), framework.ExpectFailure(false), - provider.WithProviderUpgrade(provider.Redhat9Kubernetes130Template()), framework.WithEnvVar(features.CheckpointEnabledEnvVar, "true"), framework.WithEnvVar(framework.CleanupVmsVar, "true")) + provider.WithProviderUpgrade(provider.Redhat9Kubernetes130Template()), framework.WithEnvVar(features.CheckpointEnabledEnvVar, "true"), framework.WithEnvVar(framework.CleanupResourcesVar, "true")) runUpgradeFlowWithCheckpoint( test, diff --git a/test/e2e/vsphere_test.go b/test/e2e/vsphere_test.go index 9697473f23af..3944b669ddf2 100644 --- a/test/e2e/vsphere_test.go +++ b/test/e2e/vsphere_test.go @@ -3924,12 +3924,12 @@ func TestVSphereKubernetes127UbuntuTo128UpgradeWithCheckpoint(t *testing.T) { ) clusterOpts = append(clusterOpts, framework.WithClusterUpgrade(api.WithKubernetesVersion(v1alpha1.Kube128)), framework.ExpectFailure(true), - provider.WithProviderUpgrade(provider.Ubuntu128Template(), api.WithResourcePoolforCPMachines(vsphereInvalidResourcePoolUpdateVar)), framework.WithEnvVar(features.CheckpointEnabledEnvVar, "true"), framework.WithEnvVar(framework.CleanupVmsVar, "false")) + provider.WithProviderUpgrade(provider.Ubuntu128Template(), api.WithResourcePoolforCPMachines(vsphereInvalidResourcePoolUpdateVar)), framework.WithEnvVar(features.CheckpointEnabledEnvVar, "true"), framework.WithEnvVar(framework.CleanupResourcesVar, "false")) commandOpts := []framework.CommandOpt{framework.WithControlPlaneWaitTimeout("10m")} clusterOpts2 = append(clusterOpts, framework.WithClusterUpgrade(api.WithKubernetesVersion(v1alpha1.Kube128)), framework.ExpectFailure(false), - provider.WithProviderUpgrade(provider.Ubuntu128Template(), api.WithResourcePoolForAllMachines(os.Getenv(vsphereResourcePoolVar))), framework.WithEnvVar(features.CheckpointEnabledEnvVar, "true"), framework.WithEnvVar(framework.CleanupVmsVar, "true")) + provider.WithProviderUpgrade(provider.Ubuntu128Template(), api.WithResourcePoolForAllMachines(os.Getenv(vsphereResourcePoolVar))), framework.WithEnvVar(features.CheckpointEnabledEnvVar, "true"), framework.WithEnvVar(framework.CleanupResourcesVar, "true")) runUpgradeFlowWithCheckpoint( test, diff --git a/test/framework/cloudstack.go b/test/framework/cloudstack.go index a676cf1b28dc..ee1040862b66 100644 --- a/test/framework/cloudstack.go +++ b/test/framework/cloudstack.go @@ -264,7 +264,8 @@ func (c *CloudStack) ClusterConfigUpdates() []api.ClusterConfigFiller { return []api.ClusterConfigFiller{api.ClusterToConfigFiller(f...), api.CloudStackToConfigFiller(c.fillers...)} } -func (c *CloudStack) CleanupVMs(clusterName string) error { +// CleanupResources satisfies the test framework Provider. +func (c *CloudStack) CleanupResources(clusterName string) error { return cleanup.CloudstackTestResources(context.Background(), clusterName, false, false) } diff --git a/test/framework/cluster.go b/test/framework/cluster.go index 4a179abb195a..333024bab648 100644 --- a/test/framework/cluster.go +++ b/test/framework/cluster.go @@ -61,7 +61,7 @@ const ( BundlesOverrideVar = "T_BUNDLES_OVERRIDE" ClusterIPPoolEnvVar = "T_CLUSTER_IP_POOL" ClusterIPEnvVar = "T_CLUSTER_IP" - CleanupVmsVar = "T_CLEANUP_VMS" + CleanupResourcesVar = "T_CLEANUP_RESOURCES" hardwareYamlPath = "hardware.yaml" hardwareCsvPath = "hardware.csv" EksaPackagesInstallation = "eks-anywhere-packages" @@ -148,7 +148,7 @@ func NewClusterE2ETest(t T, provider Provider, opts ...ClusterE2ETestOpt) *Clust provider.Setup() e.T.Cleanup(func() { - e.CleanupVms() + e.cleanupResources() tinkerbellCIEnvironment := os.Getenv(TinkerbellCIEnvironment) if e.Provider.Name() == tinkerbellProviderName && tinkerbellCIEnvironment == "true" { @@ -341,7 +341,7 @@ type Provider interface { // Prefer to call UpdateClusterConfig directly from the tests to make it more explicit. ClusterConfigUpdates() []api.ClusterConfigFiller Setup() - CleanupVMs(clusterName string) error + CleanupResources(clusterName string) error UpdateKubeConfig(content *[]byte, clusterName string) error ClusterStateValidations() []clusterf.StateValidation WithKubeVersionAndOS(kubeVersion v1alpha1.KubernetesVersion, os OS, release *releasev1.EksARelease) api.ClusterConfigFiller @@ -362,53 +362,8 @@ func newBmclibClient(log logr.Logger, hostIP, username, password string) *bmclib return client } -// powerOffHardware issues power off calls to all Hardware. This function does not fail the test if it encounters an error. -// This function is a helper and not part of the code path that we are testing. -// For this reason, we are only logging the errors and not failing the test. -// This function exists not because we need the hardware to be powered off before a test run, -// but because we want to make sure that no other Tinkerbell Boots DHCP server is running. -// Another Boots DHCP server running can cause netboot issues with hardware. -func (e *ClusterE2ETest) powerOffHardware() { - for _, h := range e.TestHardware { - ctx, done := context.WithTimeout(context.Background(), 2*time.Minute) - defer done() - bmcClient := newBmclibClient(logr.Discard(), h.BMCIPAddress, h.BMCUsername, h.BMCPassword) - - if err := bmcClient.Open(ctx); err != nil { - md := bmcClient.GetMetadata() - e.T.Logf("Failed to open connection to BMC: %v, hardware: %v, providersAttempted: %v, failedProviderDetail: %v", err, h.BMCIPAddress, md.ProvidersAttempted, md.SuccessfulOpenConns) - - continue - } - md := bmcClient.GetMetadata() - e.T.Logf("Connected to BMC: hardware: %v, providersAttempted: %v, successfulProvider: %v", h.BMCIPAddress, md.ProvidersAttempted, md.SuccessfulOpenConns) - - defer func() { - if err := bmcClient.Close(ctx); err != nil { - md := bmcClient.GetMetadata() - e.T.Logf("BMC close connection failed: %v, hardware: %v, providersAttempted: %v, failedProviderDetail: %v", err, h.BMCIPAddress, md.ProvidersAttempted, md.FailedProviderDetail) - } - }() - - state, err := bmcClient.GetPowerState(ctx) - if err != nil { - state = "unknown" - } - if strings.Contains(strings.ToLower(state), "off") { - return - } - - if _, err := bmcClient.SetPowerState(ctx, "off"); err != nil { - md := bmcClient.GetMetadata() - e.T.Logf("failed to power off hardware: %v, hardware: %v, providersAttempted: %v, failedProviderDetail: %v", err, h.BMCIPAddress, md.ProvidersAttempted, md.SuccessfulOpenConns) - continue - } - } -} - // ValidateHardwareDecommissioned checks that the all hardware was powered off during the cluster deletion. -// This function tests that the hardware was powered off during the cluster deletion. If any hardware are not powered off -// this func calls powerOffHardware to power off the hardware and then fails this test. +// This function tests that the hardware was powered off during the cluster deletion. func (e *ClusterE2ETest) ValidateHardwareDecommissioned() { var failedToDecomm []*api.Hardware for _, h := range e.TestHardware { @@ -460,7 +415,6 @@ func (e *ClusterE2ETest) ValidateHardwareDecommissioned() { } if len(failedToDecomm) > 0 { - e.powerOffHardware() e.T.Fatalf("failed to decommission all hardware during cluster deletion") } } @@ -913,16 +867,17 @@ func (e *ClusterE2ETest) DeleteCluster(opts ...CommandOpt) { e.deleteCluster(opts...) } -// CleanupVms is a helper to clean up VMs. It is a noop if the T_CLEANUP_VMS environment variable +// cleanupResources is a helper to clean up test resources. It is a noop if the T_CLEANUP_RESOURCES environment variable // is false or unset. -func (e *ClusterE2ETest) CleanupVms() { - if !shouldCleanUpVms() { - e.T.Logf("Skipping VM cleanup") +func (e *ClusterE2ETest) cleanupResources() { + if !shouldCleanUpResources() { + e.T.Logf("Skipping provider resource cleanup") return } - if err := e.Provider.CleanupVMs(e.ClusterName); err != nil { - e.T.Logf("failed to clean up VMs: %v", err) + e.T.Logf("Cleaning up provider resources") + if err := e.Provider.CleanupResources(e.ClusterName); err != nil { + e.T.Logf("failed to clean up %s test resouces: %v", e.Provider.Name(), err) } } @@ -933,9 +888,9 @@ func (e *ClusterE2ETest) CleanupDockerEnvironment() { e.Run("docker", "rm", "-vf", "$(docker ps -a -q)", "||", "true") } -func shouldCleanUpVms() bool { - shouldCleanupVms, err := getCleanupVmsVar() - return err == nil && shouldCleanupVms +func shouldCleanUpResources() bool { + shouldCleanupResources, err := getCleanupResourcesVar() + return err == nil && shouldCleanupResources } func (e *ClusterE2ETest) deleteCluster(opts ...CommandOpt) { @@ -1118,8 +1073,8 @@ func getBundlesOverride() string { return os.Getenv(BundlesOverrideVar) } -func getCleanupVmsVar() (bool, error) { - return strconv.ParseBool(os.Getenv(CleanupVmsVar)) +func getCleanupResourcesVar() (bool, error) { + return strconv.ParseBool(os.Getenv(CleanupResourcesVar)) } func setEksctlVersionEnvVar() error { diff --git a/test/framework/docker.go b/test/framework/docker.go index 58842e1f5717..bac169e8ae1a 100644 --- a/test/framework/docker.go +++ b/test/framework/docker.go @@ -39,8 +39,8 @@ func (d *Docker) Name() string { // Setup implements the Provider interface. func (d *Docker) Setup() {} -// CleanupVMs implements the Provider interface. -func (d *Docker) CleanupVMs(_ string) error { +// CleanupResources implements the Provider interface. +func (d *Docker) CleanupResources(_ string) error { return nil } diff --git a/test/framework/etcdencryption.go b/test/framework/etcdencryption.go index 088dbb55f2a9..4d30ad31c550 100644 --- a/test/framework/etcdencryption.go +++ b/test/framework/etcdencryption.go @@ -192,7 +192,7 @@ func (e *ClusterE2ETest) PostClusterCreateEtcdEncryptionSetup() { } // register cleanup step to remove the keys from s3 after the test is done - e.T.Cleanup(e.cleanup) + e.T.Cleanup(e.cleanupKeysFromOIDCConfig) if err := e.deployPodIdentityWebhook(ctx, envVars); err != nil { e.T.Fatal(err) @@ -203,7 +203,8 @@ func (e *ClusterE2ETest) PostClusterCreateEtcdEncryptionSetup() { } } -func (e *ClusterE2ETest) cleanup() { +// cleanup removes the cluster's key from the IAM OIDC config. +func (e *ClusterE2ETest) cleanupKeysFromOIDCConfig() { e.T.Log("Removing cluster's key from the IAM OIDC config") data, err := os.ReadFile(fmt.Sprintf(keyIDFilenameFormat, e.ClusterName)) if err != nil { diff --git a/test/framework/nutanix.go b/test/framework/nutanix.go index ecde5892e811..0e3aa0929176 100644 --- a/test/framework/nutanix.go +++ b/test/framework/nutanix.go @@ -158,8 +158,8 @@ func (n *Nutanix) UpdateKubeConfig(content *[]byte, clusterName string) error { return nil } -// CleanupVMs satisfies the test framework Provider. -func (n *Nutanix) CleanupVMs(clustername string) error { +// CleanupResources satisfies the test framework Provider. +func (n *Nutanix) CleanupResources(clustername string) error { return cleanup.NutanixTestResources(clustername, os.Getenv(nutanixEndpoint), os.Getenv(nutanixPort), true, true) } diff --git a/test/framework/snow.go b/test/framework/snow.go index a326c5af1ab3..9e0d5cc67ea0 100644 --- a/test/framework/snow.go +++ b/test/framework/snow.go @@ -102,8 +102,8 @@ func (s *Snow) ClusterConfigUpdates() []api.ClusterConfigFiller { return []api.ClusterConfigFiller{api.ClusterToConfigFiller(f...), api.SnowToConfigFiller(s.fillers...)} } -// CleanupVMs satisfies the test framework Provider. -func (s *Snow) CleanupVMs(clusterName string) error { +// CleanupResources satisfies the test framework Provider. +func (s *Snow) CleanupResources(clusterName string) error { snowDeviceIPs := strings.Split(os.Getenv(snowDevices), ",") s.t.Logf("Cleaning ec2 instances of %s in snow devices: %v", clusterName, snowDeviceIPs) diff --git a/test/framework/tinkerbell.go b/test/framework/tinkerbell.go index 8c521240c979..1e6b89fc7525 100644 --- a/test/framework/tinkerbell.go +++ b/test/framework/tinkerbell.go @@ -7,6 +7,7 @@ import ( "testing" "github.com/aws/eks-anywhere/internal/pkg/api" + "github.com/aws/eks-anywhere/internal/test/cleanup" anywherev1 "github.com/aws/eks-anywhere/pkg/api/v1alpha1" releasev1 "github.com/aws/eks-anywhere/release/api/v1alpha1" clusterf "github.com/aws/eks-anywhere/test/framework/cluster" @@ -146,8 +147,9 @@ func (t *Tinkerbell) WithProviderUpgrade(fillers ...api.TinkerbellFiller) Cluste } } -func (t *Tinkerbell) CleanupVMs(_ string) error { - return nil +// CleanupResources runs a clean up the Tinkerbell machines which simply powers them down. +func (t *Tinkerbell) CleanupResources(_ string) error { + return cleanup.TinkerbellTestResources(t.inventoryCsvFilePath, true) } // WithKubeVersionAndOS returns a cluster config filler that sets the cluster kube version and the right image for all diff --git a/test/framework/vsphere.go b/test/framework/vsphere.go index 822b9f39c0ad..1fc0a138ffb3 100644 --- a/test/framework/vsphere.go +++ b/test/framework/vsphere.go @@ -441,8 +441,8 @@ func (v *VSphere) WithBottleRocket125() api.ClusterConfigFiller { return v.WithKubeVersionAndOS(anywherev1.Kube125, Bottlerocket1, nil) } -// CleanupVMs deletes all the VMs owned by the test EKS-A cluster. It satisfies the test framework Provider. -func (v *VSphere) CleanupVMs(clusterName string) error { +// CleanupResources deletes all the VMs owned by the test EKS-A cluster. It satisfies the test framework Provider. +func (v *VSphere) CleanupResources(clusterName string) error { return cleanup.CleanUpVsphereTestResources(context.Background(), clusterName) }