From ce65db3c698aa37c03ac68d400c12e226ec2f1f4 Mon Sep 17 00:00:00 2001 From: Guillermo Gaston Date: Fri, 15 Sep 2023 14:24:57 -0500 Subject: [PATCH] Validate Memory Availability for machine configs for Vsphere provider as a pre-flight check for create and upgrade operations. Signed-off-by: Rahul Ganesh --- pkg/executables/govc.go | 74 ++++++++++++ pkg/executables/govc_test.go | 79 +++++++++++++ pkg/providers/vsphere/mocks/client.go | 20 ++++ pkg/providers/vsphere/vsphere.go | 156 +++++++++++++++++++++++++- pkg/providers/vsphere/vsphere_test.go | 153 ++++++++++++++++++++++--- 5 files changed, 467 insertions(+), 15 deletions(-) diff --git a/pkg/executables/govc.go b/pkg/executables/govc.go index c60aff0d93803..96035be71a38b 100644 --- a/pkg/executables/govc.go +++ b/pkg/executables/govc.go @@ -11,6 +11,7 @@ import ( "net/http" "os" "path/filepath" + "regexp" "strconv" "strings" "time" @@ -39,6 +40,7 @@ const ( DeployOptsFile = "deploy-opts.json" disk1 = "Hard disk 1" disk2 = "Hard disk 2" + MemoryAvailable = "Memory_Available" ) var requiredEnvs = []string{govcUsernameKey, govcPasswordKey, govcURLKey, govcInsecure, govcDatacenterKey} @@ -1143,3 +1145,75 @@ func (g *Govc) SetGroupRoleOnObject(ctx context.Context, principal string, role return nil } + +type resourcePoolInfo struct { + ResourcePoolIdentifier *resourcePool +} + +type resourcePool struct { + memoryUsage string + memoryLimit string +} + +// GetResourcePoolInfo returns the pool info for the provided resource pool. +func (g *Govc) GetResourcePoolInfo(ctx context.Context, datacenter, resourcepool string, args ...string) (map[string]int, error) { + params := []string{"pool.info", "-dc", datacenter, resourcepool} + params = append(params, args...) + response, err := g.exec(ctx, params...) + if err != nil { + return nil, fmt.Errorf("getting resource pool information: %v", err) + } + + scanner := bufio.NewScanner(strings.NewReader(response.String())) + var resourcePoolInfoResponse resourcePoolInfo + resourcePoolInfoResponse.ResourcePoolIdentifier = new(resourcePool) + for scanner.Scan() { + metaData := scanner.Text() + if strings.Contains(metaData, "Mem Usage") { + resourcePoolInfoResponse.ResourcePoolIdentifier.memoryUsage = strings.Split(metaData, ":")[1] + } + if strings.Contains(metaData, "Mem Limit") { + resourcePoolInfoResponse.ResourcePoolIdentifier.memoryLimit = strings.Split(metaData, ":")[1] + } + } + + if err := scanner.Err(); err != nil { + return nil, fmt.Errorf("failure reading memory allocation for resource pool") + } + + poolInfo, err := getPoolInfo(resourcePoolInfoResponse.ResourcePoolIdentifier) + if err != nil { + return nil, err + } + return poolInfo, nil +} + +// helper function that parses the resource pool response and returns memory requirements. +func getPoolInfo(rp *resourcePool) (map[string]int, error) { + memoryUsed, err := getValueFromString(rp.memoryUsage) + if err != nil { + return nil, fmt.Errorf("unable to obtain memory usage for resource pool %s: %v", rp.memoryUsage, err) + } + memoryLimit, err := getValueFromString(rp.memoryLimit) + if err != nil { + return nil, fmt.Errorf("unable to obtain memory limit for resource pool %s: %v", rp.memoryLimit, err) + } + poolInfo := make(map[string]int) + if memoryLimit != -1 { + poolInfo[MemoryAvailable] = memoryLimit - memoryUsed + } else { + poolInfo[MemoryAvailable] = memoryLimit + } + return poolInfo, nil +} + +func getValueFromString(str string) (int, error) { + splitResponse := strings.Split(strings.TrimSpace(str), " ") + nonNumericRegex := regexp.MustCompile(`[^0-9- ]+`) + cleanedString := nonNumericRegex.ReplaceAllString(splitResponse[0], "") + numValue, err := strconv.Atoi(cleanedString) + if err != nil { + return 0, err + } + return numValue, nil +} diff --git a/pkg/executables/govc_test.go b/pkg/executables/govc_test.go index 424fdde9834c6..f3c03109167e1 100644 --- a/pkg/executables/govc_test.go +++ b/pkg/executables/govc_test.go @@ -1643,3 +1643,82 @@ func TestGovcGetHardDiskSizeError(t *testing.T) { }) } } + +func TestGovcGetResourcePoolInfo(t *testing.T) { + datacenter := "SDDC-Datacenter" + resourcePool := "*/Resources/Test-ResourcePool" + govcErr := errors.New("error PoolInfo()") + ctx := context.Background() + _, g, executable, env := setup(t) + + tests := []struct { + testName string + response string + govcErr error + wantErr error + wantMemInfo map[string]int + }{ + { + testName: "pool_info_memory_limit_set", + response: `Name: Test-ResourcePool + Path: /SDDC-Datacenter/host/Cluster-1/Resources/Test-ResourcePool + Mem Usage: 100MB (11.3%) + Mem Shares: normal + Mem Reservation: 0MB (expandable=true) + Mem Limit: 1000MB`, + govcErr: nil, + wantErr: nil, + wantMemInfo: map[string]int{executables.MemoryAvailable: 900}, + }, + { + testName: "pool_info_memory_limit_unset", + response: `Name: Test-ResourcePool + Path: /SDDC-Datacenter/host/Cluster-1/Resources/Test-ResourcePool + Mem Usage: 100MB (11.3%) + Mem Shares: normal + Mem Reservation: 0MB (expandable=true) + Mem Limit: -1MB`, + govcErr: nil, + wantErr: nil, + wantMemInfo: map[string]int{executables.MemoryAvailable: -1}, + }, + { + testName: "pool_info_memory_usage_corrupt", + response: `Name: Test-ResourcePool + Mem Usage:corrupt-val + Mem Limit:-1MB`, + govcErr: nil, + wantErr: fmt.Errorf("unable to obtain memory usage for resource pool corrupt-val: strconv.Atoi: parsing \"-\": invalid syntax"), + wantMemInfo: nil, + }, + { + testName: "pool_info_memory_limit_corrupt", + response: `Name: Test-ResourcePool + Mem Usage:100 + Mem Limit:corrupt-val`, + govcErr: nil, + wantErr: fmt.Errorf("unable to obtain memory limit for resource pool corrupt-val: strconv.Atoi: parsing \"-\": invalid syntax"), + wantMemInfo: nil, + }, + { + testName: "pool_info_error", + response: "", + govcErr: govcErr, + wantErr: fmt.Errorf("getting resource pool information: %v", govcErr), + wantMemInfo: nil, + }, + } + + for _, tt := range tests { + t.Run(tt.testName, func(t *testing.T) { + gt := NewWithT(t) + responseBytes := bytes.NewBuffer([]byte(tt.response)) + executable.EXPECT().ExecuteWithEnv(ctx, env, "pool.info", "-dc", datacenter, resourcePool).Return(*responseBytes, tt.govcErr) + poolMemInfo, err := g.GetResourcePoolInfo(ctx, datacenter, resourcePool) + if tt.wantErr != nil { + gt.Expect(err.Error()).To(Equal(tt.wantErr.Error())) + } + gt.Expect(poolMemInfo).To(Equal(tt.wantMemInfo)) + }) + } +} diff --git a/pkg/providers/vsphere/mocks/client.go b/pkg/providers/vsphere/mocks/client.go index 886647f290023..ac5ff8d60e7ba 100644 --- a/pkg/providers/vsphere/mocks/client.go +++ b/pkg/providers/vsphere/mocks/client.go @@ -258,6 +258,26 @@ func (mr *MockProviderGovcClientMockRecorder) GetLibraryElementContentVersion(ar return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "GetLibraryElementContentVersion", reflect.TypeOf((*MockProviderGovcClient)(nil).GetLibraryElementContentVersion), arg0, arg1) } +// GetResourcePoolInfo mocks base method. +func (m *MockProviderGovcClient) GetResourcePoolInfo(arg0 context.Context, arg1, arg2 string, arg3 ...string) (map[string]int, error) { + m.ctrl.T.Helper() + varargs := []interface{}{arg0, arg1, arg2} + for _, a := range arg3 { + varargs = append(varargs, a) + } + ret := m.ctrl.Call(m, "GetResourcePoolInfo", varargs...) + ret0, _ := ret[0].(map[string]int) + ret1, _ := ret[1].(error) + return ret0, ret1 +} + +// GetResourcePoolInfo indicates an expected call of GetResourcePoolInfo. +func (mr *MockProviderGovcClientMockRecorder) GetResourcePoolInfo(arg0, arg1, arg2 interface{}, arg3 ...interface{}) *gomock.Call { + mr.mock.ctrl.T.Helper() + varargs := append([]interface{}{arg0, arg1, arg2}, arg3...) + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "GetResourcePoolInfo", reflect.TypeOf((*MockProviderGovcClient)(nil).GetResourcePoolInfo), varargs...) +} + // GetTags mocks base method. func (m *MockProviderGovcClient) GetTags(arg0 context.Context, arg1 string) ([]string, error) { m.ctrl.T.Helper() diff --git a/pkg/providers/vsphere/vsphere.go b/pkg/providers/vsphere/vsphere.go index 84b9fa342bfeb..b1ebb2e8043b7 100644 --- a/pkg/providers/vsphere/vsphere.go +++ b/pkg/providers/vsphere/vsphere.go @@ -52,6 +52,7 @@ const ( backOffPeriod = 5 * time.Second disk1 = "Hard disk 1" disk2 = "Hard disk 2" + MemoryAvailable = "Memory_Available" ethtoolDaemonSetName = "vsphere-disable-udp-offload" ) @@ -122,6 +123,7 @@ type ProviderGovcClient interface { CreateRole(ctx context.Context, name string, privileges []string) error SetGroupRoleOnObject(ctx context.Context, principal string, role string, object string, domain string) error GetHardDiskSize(ctx context.Context, vm, datacenter string) (map[string]float64, error) + GetResourcePoolInfo(ctx context.Context, datacenter, resourcepool string, args ...string) (map[string]int, error) } type ProviderKubectlClient interface { @@ -338,7 +340,9 @@ func (p *vsphereProvider) SetupAndValidateCreateCluster(ctx context.Context, clu if err := p.validateDatastoreUsageForCreate(ctx, vSphereClusterSpec); err != nil { return fmt.Errorf("validating vsphere machine configs datastore usage: %v", err) } - + if err := p.validateMemoryUsageForCreate(ctx, vSphereClusterSpec); err != nil { + return fmt.Errorf("validating vsphere machine configs resource pool memory usage: %v", err) + } if err := p.generateSSHKeysIfNotSet(clusterSpec.VSphereMachineConfigs); err != nil { return fmt.Errorf("failed setup and validations: %v", err) } @@ -419,6 +423,10 @@ func (p *vsphereProvider) SetupAndValidateUpgradeCluster(ctx context.Context, cl return fmt.Errorf("validating vsphere machine configs datastore usage: %v", err) } + if err := p.validateMemoryUsageForUpgrade(ctx, vSphereClusterSpec, cluster); err != nil { + return fmt.Errorf("validating vsphere machine configs resource pool memory usage: %v", err) + } + if !p.skippedValidations[validations.VSphereUserPriv] { if err := p.validator.validateVsphereUserPrivs(ctx, vSphereClusterSpec); err != nil { return fmt.Errorf("validating vsphere user privileges: %v", err) @@ -592,6 +600,152 @@ func (p *vsphereProvider) validateDatastoreUsageForCreate(ctx context.Context, v return nil } +type memoryUsage struct { + availableMemoryMiB int + needMemoryMiB int +} + +func (p *vsphereProvider) getPrevMachineConfigMemoryUsage(ctx context.Context, mc *v1alpha1.VSphereMachineConfig, cluster *types.Cluster, count int) (memoryMiB int, err error) { + em, err := p.providerKubectlClient.GetEksaVSphereMachineConfig(ctx, mc.Name, cluster.KubeconfigFile, mc.GetNamespace()) + if err != nil { + return 0, err + } + if em != nil && em.Spec.ResourcePool == mc.Spec.ResourcePool { + return em.Spec.MemoryMiB * em.Spec.NumCPUs * count, nil + } + return 0, nil +} + +func (p *vsphereProvider) getMachineConfigMemoryRequirements(ctx context.Context, dc string, mc *v1alpha1.VSphereMachineConfig, count int) (available int, need int, err error) { + poolInfo, err := p.providerGovcClient.GetResourcePoolInfo(ctx, dc, mc.Spec.ResourcePool) + if err != nil { + return 0, 0, err + } + needMemoryMiB := mc.Spec.MemoryMiB * mc.Spec.NumCPUs * count + return poolInfo[MemoryAvailable], needMemoryMiB, nil +} + +func (p *vsphereProvider) calculateResourcePoolMemoryUsage(ctx context.Context, dc string, mc *v1alpha1.VSphereMachineConfig, cluster *types.Cluster, mu map[string]*memoryUsage, prevCount, newCount int) error { + availableMemoryMiB, needMemoryMiB, err := p.getMachineConfigMemoryRequirements(ctx, dc, mc, newCount) + if err != nil { + return err + } + + // the last old machine is deleted only after the desired number of new machines are rolled out, so reduce 1 count to accommodate for the last old machine which is not deleted until the end of rollout + prevUsage, err := p.getPrevMachineConfigMemoryUsage(ctx, mc, cluster, prevCount-1) + if err != nil { + return err + } + // update the available memory with previous usage only when memory limit is set + if availableMemoryMiB != -1 { + availableMemoryMiB += prevUsage + } + updateMemoryUsageMap(mc, needMemoryMiB, availableMemoryMiB, prevUsage, mu) + return nil +} + +func updateMemoryUsageMap(mc *v1alpha1.VSphereMachineConfig, needMiB, availableMiB, prevUsage int, mu map[string]*memoryUsage) { + if _, ok := mu[mc.Spec.ResourcePool]; ok && mu[mc.Spec.ResourcePool].availableMemoryMiB != -1 { + mu[mc.Spec.ResourcePool].needMemoryMiB += needMiB + mu[mc.Spec.ResourcePool].availableMemoryMiB += prevUsage + } else { + mu[mc.Spec.ResourcePool] = &memoryUsage{ + availableMemoryMiB: availableMiB, + needMemoryMiB: needMiB, + } + } +} + +func (p *vsphereProvider) validateMemoryUsageForCreate(ctx context.Context, clusterSpec *Spec) error { + memoryUsage := make(map[string]*memoryUsage) + datacenter := clusterSpec.VSphereDatacenter.Spec.Datacenter + cpMachineConfig := clusterSpec.controlPlaneMachineConfig() + controlPlaneAvailableMiB, controlPlaneNeedMiB, err := p.getMachineConfigMemoryRequirements(ctx, datacenter, cpMachineConfig, clusterSpec.Cluster.Spec.ControlPlaneConfiguration.Count) + if err != nil { + return fmt.Errorf("calculating memory usage for control plane: %v", err) + } + updateMemoryUsageMap(cpMachineConfig, controlPlaneNeedMiB, controlPlaneAvailableMiB, 0, memoryUsage) + for _, workerNodeGroupConfiguration := range clusterSpec.Cluster.Spec.WorkerNodeGroupConfigurations { + workerMachineConfig := clusterSpec.workerMachineConfig(workerNodeGroupConfiguration) + workerAvailableMiB, workerNeedMiB, err := p.getMachineConfigMemoryRequirements(ctx, datacenter, workerMachineConfig, *workerNodeGroupConfiguration.Count) + if err != nil { + return fmt.Errorf("calculating memory usage for worker node groups: %v", err) + } + updateMemoryUsageMap(workerMachineConfig, workerNeedMiB, workerAvailableMiB, 0, memoryUsage) + } + etcdMachineConfig := clusterSpec.etcdMachineConfig() + if etcdMachineConfig != nil { + etcdAvailableMiB, etcdNeedMiB, err := p.getMachineConfigMemoryRequirements(ctx, datacenter, etcdMachineConfig, clusterSpec.Cluster.Spec.ExternalEtcdConfiguration.Count) + if err != nil { + return err + } + updateMemoryUsageMap(etcdMachineConfig, etcdNeedMiB, etcdAvailableMiB, 0, memoryUsage) + } + for resourcePool, usage := range memoryUsage { + if usage.availableMemoryMiB != -1 { + if usage.needMemoryMiB > usage.availableMemoryMiB { + return fmt.Errorf("not enough memory avaialable in resource pool %v for given memoryMiB and count for respective machine groups", resourcePool) + } + } + } + logger.V(5).Info("Memory availability for machine configs in requested resource pool validated") + return nil +} + +func (p *vsphereProvider) validateMemoryUsageForUpgrade(ctx context.Context, currentClusterSpec *Spec, cluster *types.Cluster) error { + memoryUsage := make(map[string]*memoryUsage) + prevEksaCluster, err := p.providerKubectlClient.GetEksaCluster(ctx, cluster, currentClusterSpec.Cluster.GetName()) + if err != nil { + return err + } + + datacenter := currentClusterSpec.VSphereDatacenter.Spec.Datacenter + cpMachineConfig := currentClusterSpec.controlPlaneMachineConfig() + if err := p.calculateResourcePoolMemoryUsage(ctx, datacenter, cpMachineConfig, cluster, memoryUsage, prevEksaCluster.Spec.ControlPlaneConfiguration.Count, currentClusterSpec.Cluster.Spec.ControlPlaneConfiguration.Count); err != nil { + return fmt.Errorf("calculating memory usage for control plane: %v", err) + } + + prevMachineConfigRefs := machineRefSliceToMap(prevEksaCluster.MachineConfigRefs()) + if err := p.getWorkerNodeGroupMemoryUsage(ctx, datacenter, currentClusterSpec, cluster, memoryUsage, prevMachineConfigRefs); err != nil { + return fmt.Errorf("calculating memory usage for worker node groups: %v", err) + } + + etcdMachineConfig := currentClusterSpec.etcdMachineConfig() + if etcdMachineConfig != nil { + etcdAvailableMiB, etcdNeedMiB, err := p.getMachineConfigMemoryRequirements(ctx, datacenter, etcdMachineConfig, currentClusterSpec.Cluster.Spec.ExternalEtcdConfiguration.Count) + if err != nil { + return err + } + // older etcd machines are not deleted until the end of rollout so do not account for the previous usage + updateMemoryUsageMap(etcdMachineConfig, etcdNeedMiB, etcdAvailableMiB, 0, memoryUsage) + } + + for resourcePool, usage := range memoryUsage { + if usage.availableMemoryMiB != -1 && usage.needMemoryMiB > usage.availableMemoryMiB { + return fmt.Errorf("not enough memory avaialable in resource pool %v for given memoryMiB and count for respective machine groups", resourcePool) + } + } + logger.V(5).Info("Memory availability for machine configs in requested resource pool validated") + return nil +} + +func (p *vsphereProvider) getWorkerNodeGroupMemoryUsage(ctx context.Context, datacenter string, currentClusterSpec *Spec, cluster *types.Cluster, memoryUsage map[string]*memoryUsage, prevMachineConfigRefs map[string]v1alpha1.Ref) error { + for _, workerNodeGroupConfiguration := range currentClusterSpec.Cluster.Spec.WorkerNodeGroupConfigurations { + prevCount := 0 + workerMachineConfig := currentClusterSpec.workerMachineConfig(workerNodeGroupConfiguration) + if _, ok := prevMachineConfigRefs[workerNodeGroupConfiguration.MachineGroupRef.Name]; ok { + prevCount = *workerNodeGroupConfiguration.Count + } else { + // set count to 1 when no previous machine config was found for the current worker node group in the spec to avoid a negative count in calculation + prevCount = 1 + } + if err := p.calculateResourcePoolMemoryUsage(ctx, datacenter, workerMachineConfig, cluster, memoryUsage, prevCount, *workerNodeGroupConfiguration.Count); err != nil { + return fmt.Errorf("calculating memory usage: %v", err) + } + } + return nil +} + func (p *vsphereProvider) UpdateSecrets(ctx context.Context, cluster *types.Cluster, _ *cluster.Spec) error { var contents bytes.Buffer err := p.createSecret(ctx, cluster, &contents) diff --git a/pkg/providers/vsphere/vsphere_test.go b/pkg/providers/vsphere/vsphere_test.go index 2d0488e2b5e09..97d090f3c2e4b 100644 --- a/pkg/providers/vsphere/vsphere_test.go +++ b/pkg/providers/vsphere/vsphere_test.go @@ -155,6 +155,10 @@ func (pc *DummyProviderGovcClient) GetHardDiskSize(ctx context.Context, vm, data return map[string]float64{"Hard disk 1": 23068672}, nil } +func (pc *DummyProviderGovcClient) GetResourcePoolInfo(ctx context.Context, datacenter, resourcePool string, args ...string) (map[string]int, error) { + return map[string]int{"Memory_Available": -1}, nil +} + func (pc *DummyProviderGovcClient) GetTags(ctx context.Context, path string) (tags []string, err error) { return []string{eksd119ReleaseTag, eksd121ReleaseTag, pc.osTag}, nil } @@ -1469,8 +1473,8 @@ func TestSetupAndValidateUpgradeClusterMissingPrivError(t *testing.T) { provider.providerKubectlClient = kubectl setupContext(t) - kubectl.EXPECT().GetEksaCluster(ctx, cluster, clusterSpec.Cluster.GetName()).Return(clusterSpec.Cluster.DeepCopy(), nil).Times(1) - kubectl.EXPECT().GetEksaVSphereMachineConfig(ctx, gomock.Any(), cluster.KubeconfigFile, clusterSpec.Cluster.GetNamespace()).Times(3) + kubectl.EXPECT().GetEksaCluster(ctx, cluster, clusterSpec.Cluster.GetName()).Return(clusterSpec.Cluster.DeepCopy(), nil).Times(2) + kubectl.EXPECT().GetEksaVSphereMachineConfig(ctx, gomock.Any(), cluster.KubeconfigFile, clusterSpec.Cluster.GetNamespace()).Times(5) vscb := mocks.NewMockVSphereClientBuilder(mockCtrl) vscb.EXPECT().Build(ctx, gomock.Any(), gomock.Any(), gomock.Any(), gomock.Any(), clusterSpec.VSphereDatacenter.Spec.Datacenter).Return(nil, fmt.Errorf("error")) @@ -1735,8 +1739,8 @@ func TestSetupAndValidateUpgradeCluster(t *testing.T) { provider.providerKubectlClient = kubectl setupContext(t) - kubectl.EXPECT().GetEksaCluster(ctx, cluster, clusterSpec.Cluster.GetName()).Return(clusterSpec.Cluster.DeepCopy(), nil).Times(2) - kubectl.EXPECT().GetEksaVSphereMachineConfig(ctx, gomock.Any(), cluster.KubeconfigFile, clusterSpec.Cluster.GetNamespace()).Times(3) + kubectl.EXPECT().GetEksaCluster(ctx, cluster, clusterSpec.Cluster.GetName()).Return(clusterSpec.Cluster.DeepCopy(), nil).Times(3) + kubectl.EXPECT().GetEksaVSphereMachineConfig(ctx, gomock.Any(), cluster.KubeconfigFile, clusterSpec.Cluster.GetNamespace()).Times(5) err := provider.SetupAndValidateUpgradeCluster(ctx, cluster, clusterSpec, clusterSpec) if err != nil { t.Fatalf("unexpected failure %v", err) @@ -1799,8 +1803,8 @@ func TestSetupAndValidateUpgradeClusterCPSshNotExists(t *testing.T) { provider.providerKubectlClient = kubectl cluster := &types.Cluster{} - kubectl.EXPECT().GetEksaCluster(ctx, cluster, clusterSpec.Cluster.GetName()).Return(clusterSpec.Cluster.DeepCopy(), nil).Times(2) - kubectl.EXPECT().GetEksaVSphereMachineConfig(ctx, gomock.Any(), cluster.KubeconfigFile, clusterSpec.Cluster.GetNamespace()).Times(3) + kubectl.EXPECT().GetEksaCluster(ctx, cluster, clusterSpec.Cluster.GetName()).Return(clusterSpec.Cluster.DeepCopy(), nil).Times(3) + kubectl.EXPECT().GetEksaVSphereMachineConfig(ctx, gomock.Any(), cluster.KubeconfigFile, clusterSpec.Cluster.GetNamespace()).Times(5) err := provider.SetupAndValidateUpgradeCluster(ctx, cluster, clusterSpec, clusterSpec) if err != nil { t.Fatalf("unexpected failure %v", err) @@ -1820,8 +1824,8 @@ func TestSetupAndValidateUpgradeClusterWorkerSshNotExists(t *testing.T) { provider.providerKubectlClient = kubectl cluster := &types.Cluster{} - kubectl.EXPECT().GetEksaCluster(ctx, cluster, clusterSpec.Cluster.GetName()).Return(clusterSpec.Cluster.DeepCopy(), nil).Times(2) - kubectl.EXPECT().GetEksaVSphereMachineConfig(ctx, gomock.Any(), cluster.KubeconfigFile, clusterSpec.Cluster.GetNamespace()).Times(3) + kubectl.EXPECT().GetEksaCluster(ctx, cluster, clusterSpec.Cluster.GetName()).Return(clusterSpec.Cluster.DeepCopy(), nil).Times(3) + kubectl.EXPECT().GetEksaVSphereMachineConfig(ctx, gomock.Any(), cluster.KubeconfigFile, clusterSpec.Cluster.GetNamespace()).Times(5) err := provider.SetupAndValidateUpgradeCluster(ctx, cluster, clusterSpec, clusterSpec) if err != nil { @@ -1842,8 +1846,8 @@ func TestSetupAndValidateUpgradeClusterEtcdSshNotExists(t *testing.T) { provider.providerKubectlClient = kubectl cluster := &types.Cluster{} - kubectl.EXPECT().GetEksaCluster(ctx, cluster, clusterSpec.Cluster.GetName()).Return(clusterSpec.Cluster.DeepCopy(), nil).Times(2) - kubectl.EXPECT().GetEksaVSphereMachineConfig(ctx, gomock.Any(), cluster.KubeconfigFile, clusterSpec.Cluster.GetNamespace()).Times(3) + kubectl.EXPECT().GetEksaCluster(ctx, cluster, clusterSpec.Cluster.GetName()).Return(clusterSpec.Cluster.DeepCopy(), nil).Times(3) + kubectl.EXPECT().GetEksaVSphereMachineConfig(ctx, gomock.Any(), cluster.KubeconfigFile, clusterSpec.Cluster.GetNamespace()).Times(5) err := provider.SetupAndValidateUpgradeCluster(ctx, cluster, clusterSpec, clusterSpec) if err != nil { @@ -1862,13 +1866,15 @@ func TestSetupAndValidateUpgradeClusterSameMachineConfigforCPandEtcd(t *testing. mockCtrl := gomock.NewController(t) kubectl := mocks.NewMockProviderKubectlClient(mockCtrl) provider.providerKubectlClient = kubectl - cluster := &types.Cluster{} - kubectl.EXPECT().GetEksaCluster(ctx, cluster, clusterSpec.Cluster.GetName()).Return(clusterSpec.Cluster.DeepCopy(), nil).Times(2) + kubectl.EXPECT().GetEksaCluster(ctx, cluster, clusterSpec.Cluster.GetName()).Return(clusterSpec.Cluster.DeepCopy(), nil).Times(3) for _, mc := range clusterSpec.VSphereMachineConfigs { - kubectl.EXPECT().GetEksaVSphereMachineConfig(ctx, gomock.Any(), cluster.KubeconfigFile, clusterSpec.Cluster.GetNamespace()).Return(mc, nil) + kubectl.EXPECT().GetEksaVSphereMachineConfig(ctx, gomock.Any(), cluster.KubeconfigFile, clusterSpec.Cluster.GetNamespace()).Return(mc, nil).AnyTimes() } - + // resourcePoolResponse := map[string]int{ + // "Memory_Available": -1, + // } + // provider.govc.EXPECT().GetResourcePoolInfo(tt.ctx, tt.clusterSpec.VSphereDatacenter.Spec.Datacenter, tt.clusterSpec.VSphereMachineConfigs[controlPlaneMachineConfigName].Spec.ResourcePool).Return(resourcePoolResponse, nil) err := provider.SetupAndValidateUpgradeCluster(ctx, cluster, clusterSpec, clusterSpec) if err != nil { t.Fatalf("unexpected failure %v", err) @@ -2338,6 +2344,10 @@ func TestSetupAndValidateCreateClusterFullCloneDiskGiBLessThan20TemplateDiskSize tt.govc.EXPECT().GetWorkloadAvailableSpace(tt.ctx, tt.clusterSpec.VSphereMachineConfigs[controlPlaneMachineConfigName].Spec.Datastore).Return(100.0, nil) tt.ipValidator.EXPECT().ValidateControlPlaneIPUniqueness(tt.cluster) + resourcePoolResponse := map[string]int{ + "Memory_Available": -1, + } + tt.govc.EXPECT().GetResourcePoolInfo(tt.ctx, tt.clusterSpec.VSphereDatacenter.Spec.Datacenter, tt.clusterSpec.VSphereMachineConfigs[controlPlaneMachineConfigName].Spec.ResourcePool).Return(resourcePoolResponse, nil) err := tt.provider.SetupAndValidateCreateCluster(context.Background(), tt.clusterSpec) assert.NoError(t, err, "No error expected for provider.SetupAndValidateCreateCluster()") @@ -3472,6 +3482,121 @@ func TestValidateMachineConfigsDatastoreUsageUpgradeError(t *testing.T) { thenErrorExpected(t, fmt.Sprintf("not enough space in datastore %s for given diskGiB and count for respective machine groups", tt.clusterSpec.VSphereMachineConfigs[tt.clusterSpec.Cluster.Spec.ControlPlaneConfiguration.MachineGroupRef.Name].Spec.Datastore), err) } +func TestValidateMachineConfigsMemoryUsageCreateSuccess(t *testing.T) { + tt := newProviderTest(t) + machineConfigs := tt.clusterSpec.VSphereMachineConfigs + datacenter := tt.clusterSpec.VSphereDatacenter.Spec.Datacenter + machineConfigs[tt.clusterSpec.Cluster.Spec.ExternalEtcdConfiguration.MachineGroupRef.Name].Spec.ResourcePool = "test-resourcepool" + for _, config := range machineConfigs { + tt.govc.EXPECT().GetResourcePoolInfo(tt.ctx, datacenter, config.Spec.ResourcePool).Return(map[string]int{MemoryAvailable: -1}, nil) + } + vSpec := NewSpec(tt.clusterSpec) + err := tt.provider.validateMemoryUsageForCreate(tt.ctx, vSpec) + if err != nil { + t.Fatalf("unexpected failure %v", err) + } +} + +func TestValidateMachineConfigsMemoryUsageCreateError(t *testing.T) { + tt := newProviderTest(t) + machineConfigs := tt.clusterSpec.VSphereMachineConfigs + datacenter := tt.clusterSpec.VSphereDatacenter.Spec.Datacenter + for _, config := range machineConfigs { + tt.govc.EXPECT().GetResourcePoolInfo(tt.ctx, datacenter, config.Spec.ResourcePool).Return(map[string]int{MemoryAvailable: 100000}, nil) + } + vSpec := NewSpec(tt.clusterSpec) + err := tt.provider.validateMemoryUsageForCreate(tt.ctx, vSpec) + resourcePool := machineConfigs[tt.clusterSpec.Cluster.Spec.ExternalEtcdConfiguration.MachineGroupRef.Name].Spec.ResourcePool + thenErrorExpected(t, fmt.Sprintf("not enough memory avaialable in resource pool %v for given memoryMiB and count for respective machine groups", resourcePool), err) +} + +func TestSetupAndValidateCreateClusterMemoryUsageError(t *testing.T) { + tt := newProviderTest(t) + tt.setExpectationForSetup() + tt.setExpectationForVCenterValidation() + tt.setExpectationsForDefaultDiskAndCloneModeGovcCalls() + tt.setExpectationsForMachineConfigsVCenterValidation() + datacenter := tt.clusterSpec.VSphereDatacenter.Spec.Datacenter + cpMachineConfig := tt.machineConfigs[tt.clusterSpec.Cluster.Spec.ControlPlaneConfiguration.MachineGroupRef.Name] + for _, mc := range tt.machineConfigs { + tt.govc.EXPECT().SearchTemplate(tt.ctx, tt.datacenterConfig.Spec.Datacenter, mc.Spec.Template).Return(mc.Spec.Template, nil).AnyTimes() + } + tt.govc.EXPECT().GetTags(tt.ctx, cpMachineConfig.Spec.Template).Return([]string{eksd119ReleaseTag, ubuntuOSTag}, nil) + tt.govc.EXPECT().ListTags(tt.ctx) + tt.govc.EXPECT().GetWorkloadAvailableSpace(tt.ctx, cpMachineConfig.Spec.Datastore).Return(1000.0, nil).AnyTimes() + tt.govc.EXPECT().GetResourcePoolInfo(tt.ctx, datacenter, cpMachineConfig.Spec.ResourcePool).Return(nil, fmt.Errorf("error")) + err := tt.provider.SetupAndValidateCreateCluster(tt.ctx, tt.clusterSpec) + thenErrorExpected(t, "validating vsphere machine configs resource pool memory usage: calculating memory usage for control plane: error", err) +} + +func TestValidateMachineConfigsMemoryUsageUpgradeSuccess(t *testing.T) { + tt := newProviderTest(t) + cluster := &types.Cluster{ + Name: "test", + } + tt.kubectl.EXPECT().GetEksaCluster(tt.ctx, cluster, tt.clusterSpec.Cluster.GetName()).Return(tt.clusterSpec.Cluster.DeepCopy(), nil) + vSpec := NewSpec(tt.clusterSpec) + vSpec.Cluster.Spec.ControlPlaneConfiguration.Count += 2 + // change the worker node group to test there is no negative count scenario + wnMachineConfig := getMachineConfig(vSpec.Spec, "test-wn") + newMachineConfigName := "new-test-wn" + newWorkerMachineConfig := wnMachineConfig.DeepCopy() + newWorkerMachineConfig.Name = newMachineConfigName + vSpec.VSphereMachineConfigs[newMachineConfigName] = newWorkerMachineConfig + vSpec.Cluster.Spec.WorkerNodeGroupConfigurations[0].MachineGroupRef.Name = newMachineConfigName + machineConfigs := tt.clusterSpec.VSphereMachineConfigs + datacenter := tt.clusterSpec.VSphereDatacenter.Spec.Datacenter + for _, config := range machineConfigs { + tt.kubectl.EXPECT().GetEksaVSphereMachineConfig(tt.ctx, config.Name, cluster.KubeconfigFile, config.Namespace).Return(config, nil).AnyTimes() + tt.govc.EXPECT().GetResourcePoolInfo(tt.ctx, datacenter, config.Spec.ResourcePool).Return(map[string]int{MemoryAvailable: -1}, nil).AnyTimes() + } + err := tt.provider.validateMemoryUsageForUpgrade(tt.ctx, vSpec, cluster) + if err != nil { + t.Fatalf("unexpected failure %v", err) + } +} + +func TestValidateMachineConfigsMemoryUsageUpgradeError(t *testing.T) { + tt := newProviderTest(t) + cluster := &types.Cluster{ + Name: "test", + } + tt.kubectl.EXPECT().GetEksaCluster(tt.ctx, cluster, tt.clusterSpec.Cluster.GetName()).Return(tt.clusterSpec.Cluster.DeepCopy(), nil) + machineConfigs := tt.clusterSpec.VSphereMachineConfigs + datacenter := tt.clusterSpec.VSphereDatacenter.Spec.Datacenter + for _, config := range machineConfigs { + tt.kubectl.EXPECT().GetEksaVSphereMachineConfig(tt.ctx, config.Name, cluster.KubeconfigFile, config.Namespace).AnyTimes() + tt.govc.EXPECT().GetResourcePoolInfo(tt.ctx, datacenter, config.Spec.ResourcePool).Return(map[string]int{MemoryAvailable: 10000}, nil) + } + vSpec := NewSpec(tt.clusterSpec) + vSpec.Cluster.Spec.ControlPlaneConfiguration.Count += 2 + err := tt.provider.validateMemoryUsageForUpgrade(tt.ctx, vSpec, cluster) + resourcePool := machineConfigs[tt.clusterSpec.Cluster.Spec.ExternalEtcdConfiguration.MachineGroupRef.Name].Spec.ResourcePool + thenErrorExpected(t, fmt.Sprintf("not enough memory avaialable in resource pool %v for given memoryMiB and count for respective machine groups", resourcePool), err) +} + +func TestSetupAndValidateUpgradeClusterMemoryUsageError(t *testing.T) { + tt := newProviderTest(t) + cluster := &types.Cluster{ + Name: "test", + } + tt.setExpectationForSetup() + tt.setExpectationForVCenterValidation() + tt.setExpectationsForDefaultDiskAndCloneModeGovcCalls() + tt.setExpectationsForMachineConfigsVCenterValidation() + tt.kubectl.EXPECT().GetEksaCluster(tt.ctx, cluster, tt.clusterSpec.Cluster.GetName()).Return(tt.clusterSpec.Cluster.DeepCopy(), nil).Times(2) + tt.kubectl.EXPECT().GetEksaVSphereMachineConfig(tt.ctx, gomock.Any(), cluster.KubeconfigFile, tt.clusterSpec.Cluster.GetNamespace()).AnyTimes() + cpMachineConfig := tt.machineConfigs[tt.clusterSpec.Cluster.Spec.ControlPlaneConfiguration.MachineGroupRef.Name] + tt.govc.EXPECT().SearchTemplate(tt.ctx, tt.datacenterConfig.Spec.Datacenter, cpMachineConfig.Spec.Template).Return(cpMachineConfig.Spec.Template, nil).AnyTimes() + tt.govc.EXPECT().GetTags(tt.ctx, cpMachineConfig.Spec.Template).Return([]string{eksd119ReleaseTag, ubuntuOSTag}, nil) + tt.govc.EXPECT().ListTags(tt.ctx) + tt.govc.EXPECT().GetWorkloadAvailableSpace(tt.ctx, cpMachineConfig.Spec.Datastore).Return(1000.0, nil).AnyTimes() + datacenter := tt.clusterSpec.VSphereDatacenter.Spec.Datacenter + tt.govc.EXPECT().GetResourcePoolInfo(tt.ctx, datacenter, cpMachineConfig.Spec.ResourcePool).Return(nil, fmt.Errorf("error")) + err := tt.provider.SetupAndValidateUpgradeCluster(tt.ctx, cluster, tt.clusterSpec, tt.clusterSpec) + thenErrorExpected(t, "validating vsphere machine configs resource pool memory usage: calculating memory usage for control plane: error", err) +} + func TestValidateMachineConfigsNameUniquenessSuccess(t *testing.T) { tt := newProviderTest(t) cluster := &types.Cluster{