From f471099a58e97e8e473e02c40aa54a9edd6f397c Mon Sep 17 00:00:00 2001 From: Yannick Struyf Date: Mon, 27 Nov 2023 14:11:00 +0100 Subject: [PATCH 1/5] first version of failure domain support for CAPX --- api/v1alpha4/nutanixcluster_types.go | 41 +++++ api/v1alpha4/nutanixmachine_types.go | 5 +- api/v1alpha4/zz_generated.conversion.go | 42 +++++ api/v1alpha4/zz_generated.deepcopy.go | 30 +++ api/v1beta1/conditions.go | 10 + api/v1beta1/nutanixcluster_types.go | 41 +++++ api/v1beta1/nutanixmachine_types.go | 5 +- api/v1beta1/zz_generated.deepcopy.go | 30 +++ ...ture.cluster.x-k8s.io_nutanixclusters.yaml | 174 ++++++++++++++++++ ...ture.cluster.x-k8s.io_nutanixmachines.yaml | 6 - ...ster.x-k8s.io_nutanixmachinetemplates.yaml | 6 - controllers/helpers.go | 15 ++ controllers/nutanixcluster_controller.go | 25 +++ controllers/nutanixmachine_controller.go | 62 +++++-- test/e2e/config/nutanix.yaml | 11 ++ .../failure-domain-nmt.yaml | 18 ++ .../failure-domain-patch.yaml | 53 ++++++ .../kustomization.yaml | 12 ++ test/e2e/failure_domains_test.go | 109 +++++++++++ test/e2e/test_helpers.go | 47 +++++ 20 files changed, 710 insertions(+), 32 deletions(-) create mode 100644 test/e2e/data/infrastructure-nutanix/v1beta1/cluster-template-failure-domains/failure-domain-nmt.yaml create mode 100644 test/e2e/data/infrastructure-nutanix/v1beta1/cluster-template-failure-domains/failure-domain-patch.yaml create mode 100644 test/e2e/data/infrastructure-nutanix/v1beta1/cluster-template-failure-domains/kustomization.yaml create mode 100644 test/e2e/failure_domains_test.go diff --git a/api/v1alpha4/nutanixcluster_types.go b/api/v1alpha4/nutanixcluster_types.go index 00fe8b408b..db621f296b 100644 --- a/api/v1alpha4/nutanixcluster_types.go +++ b/api/v1alpha4/nutanixcluster_types.go @@ -50,6 +50,14 @@ type NutanixClusterSpec struct { // proxy spec.noProxy list. // +optional PrismCentral *credentialTypes.NutanixPrismEndpoint `json:"prismCentral"` + + // failureDomains configures failure domains information for the Nutanix platform. + // When set, the failure domains defined here may be used to spread Machines across + // prism element clusters to improve fault tolerance of the cluster. + // +listType=map + // +listMapKey=name + // +optional + FailureDomains []NutanixFailureDomain `json:"failureDomains"` } // NutanixClusterStatus defines the observed state of NutanixCluster @@ -90,6 +98,39 @@ type NutanixCluster struct { Status NutanixClusterStatus `json:"status,omitempty"` } +// NutanixFailureDomain configures failure domain information for Nutanix. +type NutanixFailureDomain struct { + // name defines the unique name of a failure domain. + // Name is required and must be at most 64 characters in length. + // It must consist of only lower case alphanumeric characters and hyphens (-). + // It must start and end with an alphanumeric character. + // This value is arbitrary and is used to identify the failure domain within the platform. + // +kubebuilder:validation:Required + // +kubebuilder:validation:MinLength=1 + // +kubebuilder:validation:MaxLength=64 + // +kubebuilder:validation:Pattern=`[a-z0-9]([-a-z0-9]*[a-z0-9])?` + Name string `json:"name"` + + // cluster is to identify the cluster (the Prism Element under management of the Prism Central), + // in which the Machine's VM will be created. The cluster identifier (uuid or name) can be obtained + // from the Prism Central console or using the prism_central API. + // +kubebuilder:validation:Required + Cluster NutanixResourceIdentifier `json:"cluster"` + + // subnets holds a list of identifiers (one or more) of the cluster's network subnets + // for the Machine's VM to connect to. The subnet identifiers (uuid or name) can be + // obtained from the Prism Central console or using the prism_central API. + // +kubebuilder:validation:Required + // +kubebuilder:validation:MinItems=1 + // +listType=map + // +listMapKey=type + Subnets []NutanixResourceIdentifier `json:"subnets"` + + // indicates if a failure domain is suited for control plane nodes + // +kubebuilder:validation:Required + ControlPlane bool `json:"controlPlane,omitempty"` +} + // GetConditions returns the set of conditions for this object. func (ncl *NutanixCluster) GetConditions() capiv1.Conditions { return ncl.Status.Conditions diff --git a/api/v1alpha4/nutanixmachine_types.go b/api/v1alpha4/nutanixmachine_types.go index bf289ee0d9..d2d6cbe974 100644 --- a/api/v1alpha4/nutanixmachine_types.go +++ b/api/v1alpha4/nutanixmachine_types.go @@ -61,13 +61,12 @@ type NutanixMachineSpec struct { // of the Prism Central), in which the Machine's VM will be created. // The cluster identifier (uuid or name) can be obtained from the Prism Central console // or using the prism_central API. - // +kubebuilder:validation:Required + // +kubebuilder:validation:Optional Cluster NutanixResourceIdentifier `json:"cluster"` // subnet is to identify the cluster's network subnet to use for the Machine's VM // The cluster identifier (uuid or name) can be obtained from the Prism Central console // or using the prism_central API. - // +kubebuilder:validation:Required - // +kubebuilder:validation:MinItems=1 + // +kubebuilder:validation:Optional Subnets []NutanixResourceIdentifier `json:"subnet"` // List of categories that need to be added to the machines. Categories must already exist in Prism Central // +kubebuilder:validation:Optional diff --git a/api/v1alpha4/zz_generated.conversion.go b/api/v1alpha4/zz_generated.conversion.go index 3f8c1ad47b..ce0280392e 100644 --- a/api/v1alpha4/zz_generated.conversion.go +++ b/api/v1alpha4/zz_generated.conversion.go @@ -75,6 +75,16 @@ func RegisterConversions(s *runtime.Scheme) error { }); err != nil { return err } + if err := s.AddGeneratedConversionFunc((*NutanixFailureDomain)(nil), (*v1beta1.NutanixFailureDomain)(nil), func(a, b interface{}, scope conversion.Scope) error { + return Convert_v1alpha4_NutanixFailureDomain_To_v1beta1_NutanixFailureDomain(a.(*NutanixFailureDomain), b.(*v1beta1.NutanixFailureDomain), scope) + }); err != nil { + return err + } + if err := s.AddGeneratedConversionFunc((*v1beta1.NutanixFailureDomain)(nil), (*NutanixFailureDomain)(nil), func(a, b interface{}, scope conversion.Scope) error { + return Convert_v1beta1_NutanixFailureDomain_To_v1alpha4_NutanixFailureDomain(a.(*v1beta1.NutanixFailureDomain), b.(*NutanixFailureDomain), scope) + }); err != nil { + return err + } if err := s.AddGeneratedConversionFunc((*NutanixMachine)(nil), (*v1beta1.NutanixMachine)(nil), func(a, b interface{}, scope conversion.Scope) error { return Convert_v1alpha4_NutanixMachine_To_v1beta1_NutanixMachine(a.(*NutanixMachine), b.(*v1beta1.NutanixMachine), scope) }); err != nil { @@ -304,6 +314,7 @@ func autoConvert_v1alpha4_NutanixClusterSpec_To_v1beta1_NutanixClusterSpec(in *N return err } out.PrismCentral = (*credentials.NutanixPrismEndpoint)(unsafe.Pointer(in.PrismCentral)) + out.FailureDomains = *(*[]v1beta1.NutanixFailureDomain)(unsafe.Pointer(&in.FailureDomains)) return nil } @@ -312,6 +323,7 @@ func autoConvert_v1beta1_NutanixClusterSpec_To_v1alpha4_NutanixClusterSpec(in *v return err } out.PrismCentral = (*credentials.NutanixPrismEndpoint)(unsafe.Pointer(in.PrismCentral)) + out.FailureDomains = *(*[]NutanixFailureDomain)(unsafe.Pointer(&in.FailureDomains)) return nil } @@ -338,6 +350,36 @@ func Convert_v1beta1_NutanixClusterStatus_To_v1alpha4_NutanixClusterStatus(in *v return autoConvert_v1beta1_NutanixClusterStatus_To_v1alpha4_NutanixClusterStatus(in, out, s) } +func autoConvert_v1alpha4_NutanixFailureDomain_To_v1beta1_NutanixFailureDomain(in *NutanixFailureDomain, out *v1beta1.NutanixFailureDomain, s conversion.Scope) error { + out.Name = in.Name + if err := Convert_v1alpha4_NutanixResourceIdentifier_To_v1beta1_NutanixResourceIdentifier(&in.Cluster, &out.Cluster, s); err != nil { + return err + } + out.Subnets = *(*[]v1beta1.NutanixResourceIdentifier)(unsafe.Pointer(&in.Subnets)) + out.ControlPlane = in.ControlPlane + return nil +} + +// Convert_v1alpha4_NutanixFailureDomain_To_v1beta1_NutanixFailureDomain is an autogenerated conversion function. +func Convert_v1alpha4_NutanixFailureDomain_To_v1beta1_NutanixFailureDomain(in *NutanixFailureDomain, out *v1beta1.NutanixFailureDomain, s conversion.Scope) error { + return autoConvert_v1alpha4_NutanixFailureDomain_To_v1beta1_NutanixFailureDomain(in, out, s) +} + +func autoConvert_v1beta1_NutanixFailureDomain_To_v1alpha4_NutanixFailureDomain(in *v1beta1.NutanixFailureDomain, out *NutanixFailureDomain, s conversion.Scope) error { + out.Name = in.Name + if err := Convert_v1beta1_NutanixResourceIdentifier_To_v1alpha4_NutanixResourceIdentifier(&in.Cluster, &out.Cluster, s); err != nil { + return err + } + out.Subnets = *(*[]NutanixResourceIdentifier)(unsafe.Pointer(&in.Subnets)) + out.ControlPlane = in.ControlPlane + return nil +} + +// Convert_v1beta1_NutanixFailureDomain_To_v1alpha4_NutanixFailureDomain is an autogenerated conversion function. +func Convert_v1beta1_NutanixFailureDomain_To_v1alpha4_NutanixFailureDomain(in *v1beta1.NutanixFailureDomain, out *NutanixFailureDomain, s conversion.Scope) error { + return autoConvert_v1beta1_NutanixFailureDomain_To_v1alpha4_NutanixFailureDomain(in, out, s) +} + func autoConvert_v1alpha4_NutanixMachine_To_v1beta1_NutanixMachine(in *NutanixMachine, out *v1beta1.NutanixMachine, s conversion.Scope) error { out.ObjectMeta = in.ObjectMeta if err := Convert_v1alpha4_NutanixMachineSpec_To_v1beta1_NutanixMachineSpec(&in.Spec, &out.Spec, s); err != nil { diff --git a/api/v1alpha4/zz_generated.deepcopy.go b/api/v1alpha4/zz_generated.deepcopy.go index df1ac46cff..afc6c94204 100644 --- a/api/v1alpha4/zz_generated.deepcopy.go +++ b/api/v1alpha4/zz_generated.deepcopy.go @@ -112,6 +112,13 @@ func (in *NutanixClusterSpec) DeepCopyInto(out *NutanixClusterSpec) { *out = new(credentials.NutanixPrismEndpoint) (*in).DeepCopyInto(*out) } + if in.FailureDomains != nil { + in, out := &in.FailureDomains, &out.FailureDomains + *out = make([]NutanixFailureDomain, len(*in)) + for i := range *in { + (*in)[i].DeepCopyInto(&(*out)[i]) + } + } } // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new NutanixClusterSpec. @@ -163,6 +170,29 @@ func (in *NutanixClusterStatus) DeepCopy() *NutanixClusterStatus { return out } +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *NutanixFailureDomain) DeepCopyInto(out *NutanixFailureDomain) { + *out = *in + in.Cluster.DeepCopyInto(&out.Cluster) + if in.Subnets != nil { + in, out := &in.Subnets, &out.Subnets + *out = make([]NutanixResourceIdentifier, len(*in)) + for i := range *in { + (*in)[i].DeepCopyInto(&(*out)[i]) + } + } +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new NutanixFailureDomain. +func (in *NutanixFailureDomain) DeepCopy() *NutanixFailureDomain { + if in == nil { + return nil + } + out := new(NutanixFailureDomain) + in.DeepCopyInto(out) + return out +} + // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *NutanixMachine) DeepCopyInto(out *NutanixMachine) { *out = *in diff --git a/api/v1beta1/conditions.go b/api/v1beta1/conditions.go index fbefdca9e5..54ab2183db 100644 --- a/api/v1beta1/conditions.go +++ b/api/v1beta1/conditions.go @@ -22,6 +22,16 @@ const ( DeletionFailed = "DeletionFailed" ) +const ( + // FailureDomainsReconciled indicates the status of the failure domain reconciliation + FailureDomainsReconciled capiv1.ConditionType = "FailureDomainsReconciled" + + // NoFailureDomainsReconciled indicates no failure domains have been defined + NoFailureDomainsReconciled capiv1.ConditionType = "NoFailureDomainsReconciled" + + FailureDomainsReconciliationFailed = "FailureDomainsReconciliationFailed" +) + const ( // ClusterCategoryCreatedCondition indicates the status of the category linked to the NutanixCluster ClusterCategoryCreatedCondition capiv1.ConditionType = "ClusterCategoryCreated" diff --git a/api/v1beta1/nutanixcluster_types.go b/api/v1beta1/nutanixcluster_types.go index fe92422ff2..b19dd24fb5 100644 --- a/api/v1beta1/nutanixcluster_types.go +++ b/api/v1beta1/nutanixcluster_types.go @@ -50,6 +50,14 @@ type NutanixClusterSpec struct { // proxy spec.noProxy list. // +optional PrismCentral *credentialTypes.NutanixPrismEndpoint `json:"prismCentral"` + + // failureDomains configures failure domains information for the Nutanix platform. + // When set, the failure domains defined here may be used to spread Machines across + // prism element clusters to improve fault tolerance of the cluster. + // +listType=map + // +listMapKey=name + // +optional + FailureDomains []NutanixFailureDomain `json:"failureDomains"` } // NutanixClusterStatus defines the observed state of NutanixCluster @@ -91,6 +99,39 @@ type NutanixCluster struct { Status NutanixClusterStatus `json:"status,omitempty"` } +// NutanixFailureDomain configures failure domain information for Nutanix. +type NutanixFailureDomain struct { + // name defines the unique name of a failure domain. + // Name is required and must be at most 64 characters in length. + // It must consist of only lower case alphanumeric characters and hyphens (-). + // It must start and end with an alphanumeric character. + // This value is arbitrary and is used to identify the failure domain within the platform. + // +kubebuilder:validation:Required + // +kubebuilder:validation:MinLength=1 + // +kubebuilder:validation:MaxLength=64 + // +kubebuilder:validation:Pattern=`[a-z0-9]([-a-z0-9]*[a-z0-9])?` + Name string `json:"name"` + + // cluster is to identify the cluster (the Prism Element under management of the Prism Central), + // in which the Machine's VM will be created. The cluster identifier (uuid or name) can be obtained + // from the Prism Central console or using the prism_central API. + // +kubebuilder:validation:Required + Cluster NutanixResourceIdentifier `json:"cluster"` + + // subnets holds a list of identifiers (one or more) of the cluster's network subnets + // for the Machine's VM to connect to. The subnet identifiers (uuid or name) can be + // obtained from the Prism Central console or using the prism_central API. + // +kubebuilder:validation:Required + // +kubebuilder:validation:MinItems=1 + // +listType=map + // +listMapKey=type + Subnets []NutanixResourceIdentifier `json:"subnets"` + + // indicates if a failure domain is suited for control plane nodes + // +kubebuilder:validation:Required + ControlPlane bool `json:"controlPlane,omitempty"` +} + // GetConditions returns the set of conditions for this object. func (ncl *NutanixCluster) GetConditions() capiv1.Conditions { return ncl.Status.Conditions diff --git a/api/v1beta1/nutanixmachine_types.go b/api/v1beta1/nutanixmachine_types.go index 821edb6e89..ff7d1523ef 100644 --- a/api/v1beta1/nutanixmachine_types.go +++ b/api/v1beta1/nutanixmachine_types.go @@ -61,13 +61,12 @@ type NutanixMachineSpec struct { // of the Prism Central), in which the Machine's VM will be created. // The cluster identifier (uuid or name) can be obtained from the Prism Central console // or using the prism_central API. - // +kubebuilder:validation:Required + // +kubebuilder:validation:Optional Cluster NutanixResourceIdentifier `json:"cluster"` // subnet is to identify the cluster's network subnet to use for the Machine's VM // The cluster identifier (uuid or name) can be obtained from the Prism Central console // or using the prism_central API. - // +kubebuilder:validation:Required - // +kubebuilder:validation:MinItems=1 + // +kubebuilder:validation:Optional Subnets []NutanixResourceIdentifier `json:"subnet"` // List of categories that need to be added to the machines. Categories must already exist in Prism Central // +kubebuilder:validation:Optional diff --git a/api/v1beta1/zz_generated.deepcopy.go b/api/v1beta1/zz_generated.deepcopy.go index 682b6617c8..ceeef4c9c5 100644 --- a/api/v1beta1/zz_generated.deepcopy.go +++ b/api/v1beta1/zz_generated.deepcopy.go @@ -112,6 +112,13 @@ func (in *NutanixClusterSpec) DeepCopyInto(out *NutanixClusterSpec) { *out = new(credentials.NutanixPrismEndpoint) (*in).DeepCopyInto(*out) } + if in.FailureDomains != nil { + in, out := &in.FailureDomains, &out.FailureDomains + *out = make([]NutanixFailureDomain, len(*in)) + for i := range *in { + (*in)[i].DeepCopyInto(&(*out)[i]) + } + } } // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new NutanixClusterSpec. @@ -163,6 +170,29 @@ func (in *NutanixClusterStatus) DeepCopy() *NutanixClusterStatus { return out } +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *NutanixFailureDomain) DeepCopyInto(out *NutanixFailureDomain) { + *out = *in + in.Cluster.DeepCopyInto(&out.Cluster) + if in.Subnets != nil { + in, out := &in.Subnets, &out.Subnets + *out = make([]NutanixResourceIdentifier, len(*in)) + for i := range *in { + (*in)[i].DeepCopyInto(&(*out)[i]) + } + } +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new NutanixFailureDomain. +func (in *NutanixFailureDomain) DeepCopy() *NutanixFailureDomain { + if in == nil { + return nil + } + out := new(NutanixFailureDomain) + in.DeepCopyInto(out) + return out +} + // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *NutanixGPU) DeepCopyInto(out *NutanixGPU) { *out = *in diff --git a/config/crd/bases/infrastructure.cluster.x-k8s.io_nutanixclusters.yaml b/config/crd/bases/infrastructure.cluster.x-k8s.io_nutanixclusters.yaml index 0d477c34c2..a4cee98f35 100644 --- a/config/crd/bases/infrastructure.cluster.x-k8s.io_nutanixclusters.yaml +++ b/config/crd/bases/infrastructure.cluster.x-k8s.io_nutanixclusters.yaml @@ -64,6 +64,93 @@ spec: - host - port type: object + failureDomains: + description: failureDomains configures failure domains information + for the Nutanix platform. When set, the failure domains defined + here may be used to spread Machines across prism element clusters + to improve fault tolerance of the cluster. + items: + description: NutanixFailureDomain configures failure domain information + for Nutanix. + properties: + cluster: + description: cluster is to identify the cluster (the Prism Element + under management of the Prism Central), in which the Machine's + VM will be created. The cluster identifier (uuid or name) + can be obtained from the Prism Central console or using the + prism_central API. + properties: + name: + description: name is the resource name in the PC + type: string + type: + description: Type is the identifier type to use for this + resource. + enum: + - uuid + - name + type: string + uuid: + description: uuid is the UUID of the resource in the PC. + type: string + required: + - type + type: object + controlPlane: + description: indicates if a failure domain is suited for control + plane nodes + type: boolean + name: + description: name defines the unique name of a failure domain. + Name is required and must be at most 64 characters in length. + It must consist of only lower case alphanumeric characters + and hyphens (-). It must start and end with an alphanumeric + character. This value is arbitrary and is used to identify + the failure domain within the platform. + maxLength: 64 + minLength: 1 + pattern: '[a-z0-9]([-a-z0-9]*[a-z0-9])?' + type: string + subnets: + description: subnets holds a list of identifiers (one or more) + of the cluster's network subnets for the Machine's VM to connect + to. The subnet identifiers (uuid or name) can be obtained + from the Prism Central console or using the prism_central + API. + items: + description: NutanixResourceIdentifier holds the identity + of a Nutanix PC resource (cluster, image, subnet, etc.) + properties: + name: + description: name is the resource name in the PC + type: string + type: + description: Type is the identifier type to use for this + resource. + enum: + - uuid + - name + type: string + uuid: + description: uuid is the UUID of the resource in the PC. + type: string + required: + - type + type: object + minItems: 1 + type: array + x-kubernetes-list-map-keys: + - type + x-kubernetes-list-type: map + required: + - cluster + - name + - subnets + type: object + type: array + x-kubernetes-list-map-keys: + - name + x-kubernetes-list-type: map prismCentral: description: prismCentral holds the endpoint address and port to access the Nutanix Prism Central. When a cluster-wide proxy is installed, @@ -263,6 +350,93 @@ spec: - host - port type: object + failureDomains: + description: failureDomains configures failure domains information + for the Nutanix platform. When set, the failure domains defined + here may be used to spread Machines across prism element clusters + to improve fault tolerance of the cluster. + items: + description: NutanixFailureDomain configures failure domain information + for Nutanix. + properties: + cluster: + description: cluster is to identify the cluster (the Prism Element + under management of the Prism Central), in which the Machine's + VM will be created. The cluster identifier (uuid or name) + can be obtained from the Prism Central console or using the + prism_central API. + properties: + name: + description: name is the resource name in the PC + type: string + type: + description: Type is the identifier type to use for this + resource. + enum: + - uuid + - name + type: string + uuid: + description: uuid is the UUID of the resource in the PC. + type: string + required: + - type + type: object + controlPlane: + description: indicates if a failure domain is suited for control + plane nodes + type: boolean + name: + description: name defines the unique name of a failure domain. + Name is required and must be at most 64 characters in length. + It must consist of only lower case alphanumeric characters + and hyphens (-). It must start and end with an alphanumeric + character. This value is arbitrary and is used to identify + the failure domain within the platform. + maxLength: 64 + minLength: 1 + pattern: '[a-z0-9]([-a-z0-9]*[a-z0-9])?' + type: string + subnets: + description: subnets holds a list of identifiers (one or more) + of the cluster's network subnets for the Machine's VM to connect + to. The subnet identifiers (uuid or name) can be obtained + from the Prism Central console or using the prism_central + API. + items: + description: NutanixResourceIdentifier holds the identity + of a Nutanix PC resource (cluster, image, subnet, etc.) + properties: + name: + description: name is the resource name in the PC + type: string + type: + description: Type is the identifier type to use for this + resource. + enum: + - uuid + - name + type: string + uuid: + description: uuid is the UUID of the resource in the PC. + type: string + required: + - type + type: object + minItems: 1 + type: array + x-kubernetes-list-map-keys: + - type + x-kubernetes-list-type: map + required: + - cluster + - name + - subnets + type: object + type: array + x-kubernetes-list-map-keys: + - name + x-kubernetes-list-type: map prismCentral: description: prismCentral holds the endpoint address and port to access the Nutanix Prism Central. When a cluster-wide proxy is installed, diff --git a/config/crd/bases/infrastructure.cluster.x-k8s.io_nutanixmachines.yaml b/config/crd/bases/infrastructure.cluster.x-k8s.io_nutanixmachines.yaml index e5af3bc341..c4ca53d223 100644 --- a/config/crd/bases/infrastructure.cluster.x-k8s.io_nutanixmachines.yaml +++ b/config/crd/bases/infrastructure.cluster.x-k8s.io_nutanixmachines.yaml @@ -207,7 +207,6 @@ spec: required: - type type: object - minItems: 1 type: array systemDiskSize: anyOf: @@ -229,11 +228,9 @@ spec: minimum: 1 type: integer required: - - cluster - image - memorySize - providerID - - subnet - systemDiskSize - vcpuSockets - vcpusPerSocket @@ -569,7 +566,6 @@ spec: required: - type type: object - minItems: 1 type: array systemDiskSize: anyOf: @@ -591,11 +587,9 @@ spec: minimum: 1 type: integer required: - - cluster - image - memorySize - providerID - - subnet - systemDiskSize - vcpuSockets - vcpusPerSocket diff --git a/config/crd/bases/infrastructure.cluster.x-k8s.io_nutanixmachinetemplates.yaml b/config/crd/bases/infrastructure.cluster.x-k8s.io_nutanixmachinetemplates.yaml index 2768525a6d..4157c4b1c5 100644 --- a/config/crd/bases/infrastructure.cluster.x-k8s.io_nutanixmachinetemplates.yaml +++ b/config/crd/bases/infrastructure.cluster.x-k8s.io_nutanixmachinetemplates.yaml @@ -229,7 +229,6 @@ spec: required: - type type: object - minItems: 1 type: array systemDiskSize: anyOf: @@ -253,11 +252,9 @@ spec: minimum: 1 type: integer required: - - cluster - image - memorySize - providerID - - subnet - systemDiskSize - vcpuSockets - vcpusPerSocket @@ -504,7 +501,6 @@ spec: required: - type type: object - minItems: 1 type: array systemDiskSize: anyOf: @@ -528,11 +524,9 @@ spec: minimum: 1 type: integer required: - - cluster - image - memorySize - providerID - - subnet - systemDiskSize - vcpuSockets - vcpusPerSocket diff --git a/controllers/helpers.go b/controllers/helpers.go index ede1634f3a..f4a1cfaa70 100644 --- a/controllers/helpers.go +++ b/controllers/helpers.go @@ -744,3 +744,18 @@ func GetGPUsForPE(ctx context.Context, client *nutanixClientV3.Client, peUUID st } return gpus, nil } + +func GetFailureDomain(failureDomainName string, nutanixCluster *infrav1.NutanixCluster) (*infrav1.NutanixFailureDomain, error) { + if failureDomainName == "" { + return nil, fmt.Errorf("failure domain name must be set when searching for failure domains on a Nutanix cluster object") + } + if nutanixCluster == nil { + return nil, fmt.Errorf("nutanixCluster cannot be nil when searching for failure domains") + } + for _, fd := range nutanixCluster.Spec.FailureDomains { + if fd.Name == failureDomainName { + return &fd, nil + } + } + return nil, fmt.Errorf("failed to find failure domain %s on nutanix cluster object", failureDomainName) +} diff --git a/controllers/nutanixcluster_controller.go b/controllers/nutanixcluster_controller.go index e57be72bf5..78a578db43 100644 --- a/controllers/nutanixcluster_controller.go +++ b/controllers/nutanixcluster_controller.go @@ -256,6 +256,12 @@ func (r *NutanixClusterReconciler) reconcileNormal(rctx *nctx.ClusterContext) (r ctrlutil.AddFinalizer(rctx.NutanixCluster, infrav1.NutanixClusterFinalizer) } + // Reconciling failure domains before Ready check to allow failure domains to be modified + if err := r.reconcileFailureDomains(rctx); err != nil { + log.Error(err, "failed to reconcile failure domains for cluster") + return reconcile.Result{}, err + } + if rctx.NutanixCluster.Status.Ready { log.Info("NutanixCluster is already in ready status.") return reconcile.Result{}, nil @@ -272,6 +278,25 @@ func (r *NutanixClusterReconciler) reconcileNormal(rctx *nctx.ClusterContext) (r return reconcile.Result{}, nil } +func (r *NutanixClusterReconciler) reconcileFailureDomains(rctx *nctx.ClusterContext) error { + log := ctrl.LoggerFrom(rctx.Context) + if len(rctx.NutanixCluster.Spec.FailureDomains) == 0 { + log.V(1).Info("no failure domains defined on cluster") + conditions.MarkTrue(rctx.NutanixCluster, infrav1.NoFailureDomainsReconciled) + return nil + } + log.V(1).Info("Reconciling failure domains for cluster") + // If failure domains is nil on status object, first create empty slice + if rctx.NutanixCluster.Status.FailureDomains == nil { + rctx.NutanixCluster.Status.FailureDomains = make(capiv1.FailureDomains, 0) + } + for _, fd := range rctx.NutanixCluster.Spec.FailureDomains { + rctx.NutanixCluster.Status.FailureDomains[fd.Name] = capiv1.FailureDomainSpec{ControlPlane: fd.ControlPlane} + } + conditions.MarkTrue(rctx.NutanixCluster, infrav1.FailureDomainsReconciled) + return nil +} + func (r *NutanixClusterReconciler) reconcileCategories(rctx *nctx.ClusterContext) error { log := ctrl.LoggerFrom(rctx.Context) log.Info("Reconciling categories for cluster") diff --git a/controllers/nutanixmachine_controller.go b/controllers/nutanixmachine_controller.go index c27404711d..68c588ed07 100644 --- a/controllers/nutanixmachine_controller.go +++ b/controllers/nutanixmachine_controller.go @@ -513,8 +513,8 @@ func (r *NutanixMachineReconciler) reconcileNode(rctx *nctx.MachineContext) (rec } func (r *NutanixMachineReconciler) validateMachineConfig(rctx *nctx.MachineContext) error { - if len(rctx.NutanixMachine.Spec.Subnets) == 0 { - return fmt.Errorf("atleast one subnet is needed to create the VM %s", rctx.NutanixMachine.Name) + if rctx.Machine.Spec.FailureDomain == nil && len(rctx.NutanixMachine.Spec.Subnets) == 0 { + return fmt.Errorf("atleast one subnet is needed to create the VM %s if no failure domain is set", rctx.NutanixMachine.Name) } diskSize := rctx.NutanixMachine.Spec.SystemDiskSize @@ -576,19 +576,10 @@ func (r *NutanixMachineReconciler) getOrCreateVM(rctx *nctx.MachineContext) (*nu return nil, err } - // Get PE UUID - peUUID, err := GetPEUUID(ctx, nc, rctx.NutanixMachine.Spec.Cluster.Name, rctx.NutanixMachine.Spec.Cluster.UUID) + peUUID, subnetUUIDs, err := r.GetSubnetAndPEUUIDs(rctx) if err != nil { - errorMsg := fmt.Errorf("failed to get the Prism Element Cluster UUID to create the VM %s. %v", vmName, err) - rctx.SetFailureStatus(capierrors.CreateMachineError, errorMsg) - return nil, err - } - - // Get Subnet UUIDs - subnetUUIDs, err := GetSubnetUUIDList(ctx, nc, rctx.NutanixMachine.Spec.Subnets, peUUID) - if err != nil { - errorMsg := fmt.Errorf("failed to get the subnet UUIDs to create the VM %s. %v", vmName, err) - rctx.SetFailureStatus(capierrors.CreateMachineError, errorMsg) + log.Error(err, fmt.Sprintf("failed to get the config for VM %s.", vmName)) + rctx.SetFailureStatus(capierrors.CreateMachineError, err) return nil, err } @@ -906,3 +897,46 @@ func (r *NutanixMachineReconciler) isGetRemoteClientConnectionError(err error) b const expectedErrString = "connect: connection refused" return strings.Contains(err.Error(), expectedErrString) } + +func (r *NutanixMachineReconciler) GetSubnetAndPEUUIDs(rctx *nctx.MachineContext) (string, []string, error) { + if rctx == nil { + return "", nil, fmt.Errorf("cannot create machine config if machine context is nil") + } + log := ctrl.LoggerFrom(rctx.Context) + if rctx.Machine.Spec.FailureDomain == nil || *rctx.Machine.Spec.FailureDomain == "" { + log.V(1).Info("no failure domain found on machine. Directly searching for Prism Element cluster") + if rctx.NutanixMachine.Spec.Cluster.Name == nil && rctx.NutanixMachine.Spec.Cluster.UUID == nil { + return "", nil, fmt.Errorf("cluster name or uuid must be passed if failure domain is not configured") + } + if len(rctx.NutanixMachine.Spec.Subnets) == 0 { + return "", nil, fmt.Errorf("subnets must be passed if failure domain is not configured") + } + peUUID, err := GetPEUUID(rctx.Context, rctx.NutanixClient, rctx.NutanixMachine.Spec.Cluster.Name, rctx.NutanixMachine.Spec.Cluster.UUID) + if err != nil { + return "", nil, err + } + subnetUUIDs, err := GetSubnetUUIDList(rctx.Context, rctx.NutanixClient, rctx.NutanixMachine.Spec.Subnets, peUUID) + if err != nil { + return "", nil, err + } + return peUUID, subnetUUIDs, nil + } + + log.V(1).Info("failure domain config found. Ignoring cluster config on machine object (if any)") + + failureDomainName := *rctx.Machine.Spec.FailureDomain + failureDomain, err := GetFailureDomain(failureDomainName, rctx.NutanixCluster) + if err != nil { + return "", nil, fmt.Errorf("failed to find failure domain %s", failureDomainName) + } + cUUID, err := GetPEUUID(rctx.Context, rctx.NutanixClient, failureDomain.Cluster.Name, failureDomain.Cluster.UUID) + if err != nil { + return "", nil, fmt.Errorf("failed to find prism element uuid for failure domain %s", failureDomainName) + } + subnetUUIDs, err := GetSubnetUUIDList(rctx.Context, rctx.NutanixClient, failureDomain.Subnets, cUUID) + if err != nil { + return "", nil, fmt.Errorf("failed to find subnet uuids for failure domain %s", failureDomainName) + } + + return cUUID, subnetUUIDs, nil +} diff --git a/test/e2e/config/nutanix.yaml b/test/e2e/config/nutanix.yaml index 5aa4a2b9be..d9ebf8dbe4 100644 --- a/test/e2e/config/nutanix.yaml +++ b/test/e2e/config/nutanix.yaml @@ -210,6 +210,7 @@ providers: - sourcePath: "../data/infrastructure-nutanix/v1beta1/cluster-template-kcp-remediation.yaml" - sourcePath: "../data/infrastructure-nutanix/v1beta1/cluster-template-kcp-scale-in.yaml" - sourcePath: "../data/infrastructure-nutanix/v1beta1/cluster-template-csi.yaml" + - sourcePath: "../data/infrastructure-nutanix/v1beta1/cluster-template-failure-domains.yaml" variables: # Default variables for the e2e test; those values could be overridden via env variables, thus @@ -260,6 +261,16 @@ variables: NUTANIX_PRISM_ELEMENT_CLUSTER_IP: "" NUTANIX_PRISM_ELEMENT_CLUSTER_USERNAME: "" NUTANIX_PRISM_ELEMENT_CLUSTER_PASSWORD: "" + # Note: Following parameters are required for failure domain testing + NUTANIX_FAILURE_DOMAIN_1_NAME: "failuredomain-1" + NUTANIX_FAILURE_DOMAIN_1_PRISM_ELEMENT_NAME: "" + NUTANIX_FAILURE_DOMAIN_1_SUBNET_NAME: "" + NUTANIX_FAILURE_DOMAIN_2_NAME: "failuredomain-2" + NUTANIX_FAILURE_DOMAIN_2_PRISM_ELEMENT_NAME: "" + NUTANIX_FAILURE_DOMAIN_2_SUBNET_NAME: "" + NUTANIX_FAILURE_DOMAIN_3_NAME: "failuredomain-3" + NUTANIX_FAILURE_DOMAIN_3_PRISM_ELEMENT_NAME: "" + NUTANIX_FAILURE_DOMAIN_3_SUBNET_NAME: "" # NOTE: INIT_WITH_BINARY and INIT_WITH_KUBERNETES_VERSION are only used by the clusterctl upgrade test to initialize # the management cluster to be upgraded. # NOTE: We test the latest release with a previous contract. diff --git a/test/e2e/data/infrastructure-nutanix/v1beta1/cluster-template-failure-domains/failure-domain-nmt.yaml b/test/e2e/data/infrastructure-nutanix/v1beta1/cluster-template-failure-domains/failure-domain-nmt.yaml new file mode 100644 index 0000000000..6780401ccd --- /dev/null +++ b/test/e2e/data/infrastructure-nutanix/v1beta1/cluster-template-failure-domains/failure-domain-nmt.yaml @@ -0,0 +1,18 @@ +--- +apiVersion: infrastructure.cluster.x-k8s.io/v1beta1 +kind: NutanixMachineTemplate +metadata: + name: "${CLUSTER_NAME}-mt-0" + namespace: "${NAMESPACE}" +spec: + template: + spec: + providerID: "nutanix://${CLUSTER_NAME}-m1" + bootType: ${NUTANIX_MACHINE_BOOT_TYPE=legacy} + vcpusPerSocket: ${NUTANIX_MACHINE_VCPU_PER_SOCKET=1} + vcpuSockets: ${NUTANIX_MACHINE_VCPU_SOCKET=2} + memorySize: "${NUTANIX_MACHINE_MEMORY_SIZE=4Gi}" + systemDiskSize: "${NUTANIX_SYSTEMDISK_SIZE=40Gi}" + image: + type: name + name: "${NUTANIX_MACHINE_TEMPLATE_IMAGE_NAME}" diff --git a/test/e2e/data/infrastructure-nutanix/v1beta1/cluster-template-failure-domains/failure-domain-patch.yaml b/test/e2e/data/infrastructure-nutanix/v1beta1/cluster-template-failure-domains/failure-domain-patch.yaml new file mode 100644 index 0000000000..3887db614e --- /dev/null +++ b/test/e2e/data/infrastructure-nutanix/v1beta1/cluster-template-failure-domains/failure-domain-patch.yaml @@ -0,0 +1,53 @@ +--- +apiVersion: infrastructure.cluster.x-k8s.io/v1beta1 +kind: NutanixCluster +metadata: + name: ${CLUSTER_NAME} + namespace: ${NAMESPACE} +spec: + failureDomains: + - name: ${NUTANIX_FAILURE_DOMAIN_1_NAME} + controlPlane: true + cluster: + name: ${NUTANIX_FAILURE_DOMAIN_1_PRISM_ELEMENT_NAME} + type: name + subnets: + - name: ${NUTANIX_FAILURE_DOMAIN_1_SUBNET_NAME} + type: name + - name: ${NUTANIX_FAILURE_DOMAIN_2_NAME} + controlPlane: true + cluster: + name: ${NUTANIX_FAILURE_DOMAIN_2_PRISM_ELEMENT_NAME} + type: name + subnets: + - name: ${NUTANIX_FAILURE_DOMAIN_2_SUBNET_NAME} + type: name + - name: ${NUTANIX_FAILURE_DOMAIN_3_NAME} + controlPlane: true + cluster: + name: ${NUTANIX_FAILURE_DOMAIN_3_PRISM_ELEMENT_NAME} + type: name + subnets: + - name: ${NUTANIX_FAILURE_DOMAIN_3_SUBNET_NAME} + type: name +--- +apiVersion: controlplane.cluster.x-k8s.io/v1beta1 +kind: KubeadmControlPlane +metadata: + name: ${CLUSTER_NAME}-kcp + namespace: ${NAMESPACE} +spec: + replicas: 3 +--- +apiVersion: cluster.x-k8s.io/v1beta1 +kind: MachineDeployment +metadata: + labels: + cluster.x-k8s.io/cluster-name: ${CLUSTER_NAME} + name: ${CLUSTER_NAME}-wmd + namespace: ${NAMESPACE} +spec: + replicas: 0 + template: + spec: + failureDomain: ${NUTANIX_FAILURE_DOMAIN_1_NAME} diff --git a/test/e2e/data/infrastructure-nutanix/v1beta1/cluster-template-failure-domains/kustomization.yaml b/test/e2e/data/infrastructure-nutanix/v1beta1/cluster-template-failure-domains/kustomization.yaml new file mode 100644 index 0000000000..2c20306fc0 --- /dev/null +++ b/test/e2e/data/infrastructure-nutanix/v1beta1/cluster-template-failure-domains/kustomization.yaml @@ -0,0 +1,12 @@ +bases: + - ../../../../../../templates/base/cluster-with-kcp.yaml + - ../../../../../../templates/base/secret.yaml + - ../../../../../../templates/base/cm.yaml + - ../../../../../../templates/base/md.yaml + - ../../../../../../templates/base/mhc.yaml + - ../base/crs.yaml + - failure-domain-nmt.yaml + +patchesStrategicMerge: + - ../base/cni-patch.yaml + - failure-domain-patch.yaml diff --git a/test/e2e/failure_domains_test.go b/test/e2e/failure_domains_test.go new file mode 100644 index 0000000000..b3803cb569 --- /dev/null +++ b/test/e2e/failure_domains_test.go @@ -0,0 +1,109 @@ +//go:build e2e +// +build e2e + +/* +Copyright 2023 Nutanix + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package e2e + +import ( + "context" + + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" + corev1 "k8s.io/api/core/v1" + clusterv1 "sigs.k8s.io/cluster-api/api/v1beta1" + "sigs.k8s.io/cluster-api/test/framework/clusterctl" + + infrav1 "github.com/nutanix-cloud-native/cluster-api-provider-nutanix/api/v1beta1" +) + +const ( + nutanixFailureDomain1NameEnv = "NUTANIX_FAILURE_DOMAIN_1_NAME" + nutanixFailureDomain2NameEnv = "NUTANIX_FAILURE_DOMAIN_2_NAME" + nutanixFailureDomain3NameEnv = "NUTANIX_FAILURE_DOMAIN_3_NAME" +) + +// Note: Still has "only-for-validation" label. +var _ = Describe("Nutanix failure domains", Label("capx-feature-test", "failure-domains", "only-for-validation", "slow", "network"), func() { + const specName = "failure-domains" + + var ( + namespace *corev1.Namespace + clusterName string + clusterResources *clusterctl.ApplyClusterTemplateAndWaitResult + cancelWatches context.CancelFunc + failureDomainNames []string + testHelper testHelperInterface + ) + + BeforeEach(func() { + testHelper = newTestHelper(e2eConfig) + failureDomainNames = []string{ + testHelper.getVariableFromE2eConfig(nutanixFailureDomain1NameEnv), + testHelper.getVariableFromE2eConfig(nutanixFailureDomain2NameEnv), + testHelper.getVariableFromE2eConfig(nutanixFailureDomain3NameEnv), + } + clusterName = testHelper.generateTestClusterName(specName) + clusterResources = new(clusterctl.ApplyClusterTemplateAndWaitResult) + Expect(bootstrapClusterProxy).NotTo(BeNil(), "BootstrapClusterProxy can't be nil") + namespace, cancelWatches = setupSpecNamespace(ctx, specName, bootstrapClusterProxy, artifactFolder) + }) + + AfterEach(func() { + dumpSpecResourcesAndCleanup(ctx, specName, bootstrapClusterProxy, artifactFolder, namespace, cancelWatches, clusterResources.Cluster, e2eConfig.GetIntervals, skipCleanup) + }) + + It("Create a cluster with multiple failure domains", func() { + const flavor = "failure-domains" + + Expect(namespace).NotTo(BeNil()) + + By("Creating a workload cluster") + testHelper.deployClusterAndWait( + deployClusterParams{ + clusterName: clusterName, + namespace: namespace, + flavor: flavor, + clusterctlConfigPath: clusterctlConfigPath, + artifactFolder: artifactFolder, + bootstrapClusterProxy: bootstrapClusterProxy, + }, clusterResources) + + By("Checking failure domain condition is true", func() { + testHelper.verifyConditionOnNutanixCluster(verifyConditionParams{ + clusterName: clusterName, + namespace: namespace, + bootstrapClusterProxy: bootstrapClusterProxy, + expectedCondition: clusterv1.Condition{ + Type: infrav1.FailureDomainsReconciled, + Status: corev1.ConditionTrue, + }, + }) + }) + + By("Checking if machines are spread across failure domains", func() { + testHelper.verifyFailureDomainsOnClusterMachines(ctx, verifyFailureDomainsOnClusterMachinesParams{ + clusterName: clusterName, + namespace: namespace, + bootstrapClusterProxy: bootstrapClusterProxy, + failureDomainNames: failureDomainNames, + }) + }) + + By("PASSED!") + }) +}) diff --git a/test/e2e/test_helpers.go b/test/e2e/test_helpers.go index bf7eab13fa..6326b261a1 100644 --- a/test/e2e/test_helpers.go +++ b/test/e2e/test_helpers.go @@ -147,6 +147,7 @@ type testHelperInterface interface { verifyCategoriesNutanixMachines(ctx context.Context, clusterName, namespace string, expectedCategories map[string]string) verifyConditionOnNutanixCluster(params verifyConditionParams) verifyConditionOnNutanixMachines(params verifyConditionParams) + verifyFailureDomainsOnClusterMachines(ctx context.Context, params verifyFailureDomainsOnClusterMachinesParams) verifyFailureMessageOnClusterMachines(ctx context.Context, params verifyFailureMessageOnClusterMachinesParams) verifyGPUNutanixMachines(ctx context.Context, params verifyGPUNutanixMachinesParams) verifyProjectNutanixMachines(ctx context.Context, params verifyProjectNutanixMachinesParams) @@ -648,6 +649,52 @@ func (t testHelper) verifyConditionOnNutanixMachines(params verifyConditionParam ) } +type verifyFailureDomainsOnClusterMachinesParams struct { + clusterName string + namespace *corev1.Namespace + failureDomainNames []string + bootstrapClusterProxy framework.ClusterProxy +} + +func (t testHelper) verifyFailureDomainsOnClusterMachines(ctx context.Context, params verifyFailureDomainsOnClusterMachinesParams) { + Eventually(func() bool { + nutanixCluster := t.getNutanixClusterByName(ctx, getNutanixClusterByNameInput{ + Getter: params.bootstrapClusterProxy.GetClient(), + Name: params.clusterName, + Namespace: params.namespace.Name, + }) + Expect(nutanixCluster).ToNot(BeNil()) + var match bool + for _, fdName := range params.failureDomainNames { + nutanixMachines := t.getMachinesForCluster(ctx, params.clusterName, params.namespace.Name, params.bootstrapClusterProxy) + for _, m := range nutanixMachines.Items { + machineSpec := m.Spec + if *machineSpec.FailureDomain == fdName { + // failure domain had a match + match = true + // Search for failure domain + fd, err := controllers.GetFailureDomain(fdName, nutanixCluster) + Expect(err).ShouldNot(HaveOccurred()) + Expect(fd).ToNot(BeNil()) + // Search for VM + machineVmUUID := t.stripNutanixIDFromProviderID(*machineSpec.ProviderID) + vm, err := t.nutanixClient.V3.GetVM(ctx, machineVmUUID) + Expect(err).ShouldNot(HaveOccurred()) + Expect(vm).ToNot(BeNil()) + // Check if correct PE and subnet are used + Expect(*vm.Spec.ClusterReference.Name).To(Equal(*fd.Cluster.Name)) + Expect(*vm.Spec.Resources.NicList[0].SubnetReference.Name).To(Equal(*fd.Subnets[0].Name)) + break + } + } + if !match { + return false + } + } + return true + }, defaultTimeout, defaultInterval).Should(BeTrue()) +} + type verifyFailureMessageOnClusterMachinesParams struct { clusterName string namespace *corev1.Namespace From a2a011a3fc7993d1e058bbbaebbf773ed2572b6b Mon Sep 17 00:00:00 2001 From: Yannick Struyf Date: Thu, 30 Nov 2023 09:37:46 +0100 Subject: [PATCH 2/5] Add unit tests for failure domains --- controllers/helpers.go | 5 +- controllers/helpers_test.go | 123 ++++++++++++ controllers/nutanixcluster_controller_test.go | 182 ++++++++++++++++-- controllers/nutanixmachine_controller_test.go | 136 ++++++++++++- 4 files changed, 417 insertions(+), 29 deletions(-) create mode 100644 controllers/helpers_test.go diff --git a/controllers/helpers.go b/controllers/helpers.go index f4a1cfaa70..7f335ce82b 100644 --- a/controllers/helpers.go +++ b/controllers/helpers.go @@ -185,8 +185,11 @@ func FindVMByName(ctx context.Context, client *nutanixClientV3.Client, vmName st // GetPEUUID returns the UUID of the Prism Element cluster with the given name func GetPEUUID(ctx context.Context, client *nutanixClientV3.Client, peName, peUUID *string) (string, error) { + if client == nil { + return "", fmt.Errorf("cannot retrieve Prism Element UUID if nutanix client is nil") + } if peUUID == nil && peName == nil { - return "", fmt.Errorf("cluster name or uuid must be passed in order to retrieve the pe") + return "", fmt.Errorf("cluster name or uuid must be passed in order to retrieve the Prism Element UUID") } if peUUID != nil && *peUUID != "" { peIntentResponse, err := client.V3.GetCluster(ctx, *peUUID) diff --git a/controllers/helpers_test.go b/controllers/helpers_test.go new file mode 100644 index 0000000000..c4a12bf4d0 --- /dev/null +++ b/controllers/helpers_test.go @@ -0,0 +1,123 @@ +/* +Copyright 2023 Nutanix + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package controllers + +import ( + "context" + "testing" + + credentialTypes "github.com/nutanix-cloud-native/prism-go-client/environment/credentials" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "sigs.k8s.io/cluster-api/util" + + infrav1 "github.com/nutanix-cloud-native/cluster-api-provider-nutanix/api/v1beta1" + + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" +) + +func TestControllerHelpers(t *testing.T) { + g := NewWithT(t) + + _ = Describe("ControllerHelpers", func() { + const ( + fd1Name = "fd-1" + fd2Name = "fd-2" + ) + + var ( + ntnxCluster *infrav1.NutanixCluster + ctx context.Context + ) + + BeforeEach(func() { + ctx = context.Background() + ntnxCluster = &infrav1.NutanixCluster{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test", + Namespace: "default", + }, + Spec: infrav1.NutanixClusterSpec{ + PrismCentral: &credentialTypes.NutanixPrismEndpoint{ + // Adding port info to override default value (0) + Port: 9440, + }, + }, + } + }) + + AfterEach(func() { + err := k8sClient.Delete(ctx, ntnxCluster) + Expect(err).NotTo(HaveOccurred()) + }) + + Context("Get failure domains", func() { + It("should error when passing empty failure domain name", func() { + g.Expect(k8sClient.Create(ctx, ntnxCluster)).To(Succeed()) + _, err := GetFailureDomain("", ntnxCluster) + Expect(err).To(HaveOccurred()) + }) + It("should error when passing nil cluster", func() { + g.Expect(k8sClient.Create(ctx, ntnxCluster)).To(Succeed()) + _, err := GetFailureDomain(fd1Name, nil) + Expect(err).To(HaveOccurred()) + }) + It("should error when no failure domain has been found", func() { + g.Expect(k8sClient.Create(ctx, ntnxCluster)).To(Succeed()) + _, err := GetFailureDomain(fd1Name, ntnxCluster) + Expect(err).To(HaveOccurred()) + }) + It("should return the correct failuredomain", func() { + r := util.RandomString(10) + fd1 := infrav1.NutanixFailureDomain{ + Name: fd1Name, + Cluster: infrav1.NutanixResourceIdentifier{ + Type: infrav1.NutanixIdentifierName, + Name: &r, + }, + Subnets: []infrav1.NutanixResourceIdentifier{ + { + Type: infrav1.NutanixIdentifierName, + Name: &r, + }, + }, + } + fd2 := infrav1.NutanixFailureDomain{ + Name: fd2Name, + Cluster: infrav1.NutanixResourceIdentifier{ + Type: infrav1.NutanixIdentifierName, + Name: &r, + }, + Subnets: []infrav1.NutanixResourceIdentifier{ + { + Type: infrav1.NutanixIdentifierName, + Name: &r, + }, + }, + } + ntnxCluster.Spec.FailureDomains = []infrav1.NutanixFailureDomain{ + fd1, + fd2, + } + g.Expect(k8sClient.Create(ctx, ntnxCluster)).To(Succeed()) + fd, err := GetFailureDomain(fd2Name, ntnxCluster) + Expect(err).ToNot(HaveOccurred()) + Expect(*fd).To(Equal(fd2)) + }) + }) + }) +} diff --git a/controllers/nutanixcluster_controller_test.go b/controllers/nutanixcluster_controller_test.go index dbfecd397c..266095d970 100644 --- a/controllers/nutanixcluster_controller_test.go +++ b/controllers/nutanixcluster_controller_test.go @@ -21,48 +21,81 @@ import ( "testing" credentialTypes "github.com/nutanix-cloud-native/prism-go-client/environment/credentials" + corev1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/runtime" + "sigs.k8s.io/cluster-api/util" ctrl "sigs.k8s.io/controller-runtime" "sigs.k8s.io/controller-runtime/pkg/client" infrav1 "github.com/nutanix-cloud-native/cluster-api-provider-nutanix/api/v1beta1" + nctx "github.com/nutanix-cloud-native/cluster-api-provider-nutanix/pkg/context" . "github.com/onsi/ginkgo/v2" . "github.com/onsi/gomega" + "github.com/onsi/gomega/gstruct" ) func TestNutanixClusterReconciler(t *testing.T) { g := NewWithT(t) _ = Describe("NutanixClusterReconciler", func() { - Context("Reconcile an NutanixCluster", func() { - It("should not error and not requeue the request", func() { - ctx := context.Background() - reconciler := &NutanixClusterReconciler{ - Client: k8sClient, - Scheme: runtime.NewScheme(), - } + const ( + fd1Name = "fd-1" + fd2Name = "fd-2" + ) + + var ( + ntnxCluster *infrav1.NutanixCluster + ctx context.Context + fd1 infrav1.NutanixFailureDomain + reconciler *NutanixClusterReconciler + r string + ) - ntnxCluster := &infrav1.NutanixCluster{ - ObjectMeta: metav1.ObjectMeta{ - Name: "test", - Namespace: "default", + BeforeEach(func() { + ctx = context.Background() + r := util.RandomString(10) + ntnxCluster = &infrav1.NutanixCluster{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test", + Namespace: "default", + }, + Spec: infrav1.NutanixClusterSpec{ + PrismCentral: &credentialTypes.NutanixPrismEndpoint{ + // Adding port info to override default value (0) + Port: 9440, }, - Spec: infrav1.NutanixClusterSpec{ - PrismCentral: &credentialTypes.NutanixPrismEndpoint{ - // Adding port info to override default value (0) - Port: 9440, - }, + }, + } + fd1 = infrav1.NutanixFailureDomain{ + Name: fd1Name, + Cluster: infrav1.NutanixResourceIdentifier{ + Type: infrav1.NutanixIdentifierName, + Name: &r, + }, + Subnets: []infrav1.NutanixResourceIdentifier{ + { + Type: infrav1.NutanixIdentifierName, + Name: &r, }, - } + }, + } + reconciler = &NutanixClusterReconciler{ + Client: k8sClient, + Scheme: runtime.NewScheme(), + } + }) + AfterEach(func() { + err := k8sClient.Delete(ctx, ntnxCluster) + Expect(err).NotTo(HaveOccurred()) + }) + + Context("Reconcile an NutanixCluster", func() { + It("should not error and not requeue the request", func() { // Create the NutanixCluster object and expect the Reconcile to be created g.Expect(k8sClient.Create(ctx, ntnxCluster)).To(Succeed()) - defer func() { - err := k8sClient.Delete(ctx, ntnxCluster) - Expect(err).NotTo(HaveOccurred()) - }() result, err := reconciler.Reconcile(ctx, ctrl.Request{ NamespacedName: client.ObjectKey{ @@ -75,5 +108,112 @@ func TestNutanixClusterReconciler(t *testing.T) { g.Expect(result.Requeue).To(BeFalse()) }) }) + + Context("ReconcileNormal for a NutanixCluster", func() { + It("should not requeue if failure message is set on nutanixCluster", func() { + g.Expect(k8sClient.Create(ctx, ntnxCluster)).To(Succeed()) + ntnxCluster.Status.FailureMessage = &r + result, err := reconciler.reconcileNormal(&nctx.ClusterContext{ + Context: ctx, + NutanixCluster: ntnxCluster, + }) + g.Expect(err).NotTo(HaveOccurred()) + g.Expect(result.RequeueAfter).To(BeZero()) + g.Expect(result.Requeue).To(BeFalse()) + }) + It("should not error and not requeue if no failure domains are configured and cluster is Ready", func() { + g.Expect(k8sClient.Create(ctx, ntnxCluster)).To(Succeed()) + ntnxCluster.Status.Ready = true + result, err := reconciler.reconcileNormal(&nctx.ClusterContext{ + Context: ctx, + NutanixCluster: ntnxCluster, + }) + g.Expect(err).NotTo(HaveOccurred()) + g.Expect(result.RequeueAfter).To(BeZero()) + g.Expect(result.Requeue).To(BeFalse()) + }) + It("should not error and not requeue if failure domains are configured and cluster is Ready", func() { + ntnxCluster.Spec.FailureDomains = []infrav1.NutanixFailureDomain{ + fd1, + } + g.Expect(k8sClient.Create(ctx, ntnxCluster)).To(Succeed()) + ntnxCluster.Status.Ready = true + result, err := reconciler.reconcileNormal(&nctx.ClusterContext{ + Context: ctx, + NutanixCluster: ntnxCluster, + }) + g.Expect(err).NotTo(HaveOccurred()) + g.Expect(result.RequeueAfter).To(BeZero()) + g.Expect(result.Requeue).To(BeFalse()) + }) + }) + + Context("Reconcile failure domains", func() { + It("sets the failure domains in the nutanixcluster status and failure domain reconciled condition", func() { + ntnxCluster.Spec.FailureDomains = []infrav1.NutanixFailureDomain{ + fd1, + } + + // Create the NutanixCluster object and expect the Reconcile to be created + g.Expect(k8sClient.Create(ctx, ntnxCluster)).To(Succeed()) + // Retrieve the applied nutanix cluster objects + appliedNtnxCluster := &infrav1.NutanixCluster{} + k8sClient.Get(ctx, client.ObjectKey{ + Namespace: ntnxCluster.Namespace, + Name: ntnxCluster.Name, + }, appliedNtnxCluster) + + err := reconciler.reconcileFailureDomains(&nctx.ClusterContext{ + Context: ctx, + NutanixCluster: appliedNtnxCluster, + }) + g.Expect(err).NotTo(HaveOccurred()) + + g.Expect(appliedNtnxCluster.Status.Conditions).To(ContainElement( + gstruct.MatchFields( + gstruct.IgnoreExtras, + gstruct.Fields{ + "Type": Equal(infrav1.FailureDomainsReconciled), + "Status": Equal(corev1.ConditionTrue), + }, + ), + )) + g.Expect(appliedNtnxCluster.Status.FailureDomains).To(HaveKey(fd1Name)) + g.Expect(appliedNtnxCluster.Status.FailureDomains[fd1Name]).To(gstruct.MatchFields( + gstruct.IgnoreExtras, + gstruct.Fields{ + "ControlPlane": Equal(fd1.ControlPlane), + }, + )) + }) + + It("sets the NoFailureDomainsReconciled condition when no failure domains are set", func() { + // Create the NutanixCluster object and expect the Reconcile to be created + g.Expect(k8sClient.Create(ctx, ntnxCluster)).To(Succeed()) + // Retrieve the applied nutanix cluster objects + appliedNtnxCluster := &infrav1.NutanixCluster{} + k8sClient.Get(ctx, client.ObjectKey{ + Namespace: ntnxCluster.Namespace, + Name: ntnxCluster.Name, + }, appliedNtnxCluster) + + err := reconciler.reconcileFailureDomains(&nctx.ClusterContext{ + Context: ctx, + NutanixCluster: appliedNtnxCluster, + }) + g.Expect(err).NotTo(HaveOccurred()) + + g.Expect(appliedNtnxCluster.Status.Conditions).To(ContainElement( + gstruct.MatchFields( + gstruct.IgnoreExtras, + gstruct.Fields{ + "Type": Equal(infrav1.NoFailureDomainsReconciled), + "Status": Equal(corev1.ConditionTrue), + }, + ), + )) + g.Expect(appliedNtnxCluster.Status.FailureDomains).To(BeEmpty()) + }) + }) }) } diff --git a/controllers/nutanixmachine_controller_test.go b/controllers/nutanixmachine_controller_test.go index ed01a7b1dc..640a40364f 100644 --- a/controllers/nutanixmachine_controller_test.go +++ b/controllers/nutanixmachine_controller_test.go @@ -22,10 +22,14 @@ import ( metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/runtime" + capiv1 "sigs.k8s.io/cluster-api/api/v1beta1" + "sigs.k8s.io/cluster-api/util" ctrl "sigs.k8s.io/controller-runtime" "sigs.k8s.io/controller-runtime/pkg/client" infrav1 "github.com/nutanix-cloud-native/cluster-api-provider-nutanix/api/v1beta1" + nctx "github.com/nutanix-cloud-native/cluster-api-provider-nutanix/pkg/context" + credentialTypes "github.com/nutanix-cloud-native/prism-go-client/environment/credentials" . "github.com/onsi/ginkgo/v2" . "github.com/onsi/gomega" ) @@ -34,16 +38,49 @@ func TestNutanixMachineReconciler(t *testing.T) { g := NewWithT(t) _ = Describe("NutanixMachineReconciler", func() { + var ( + reconciler *NutanixMachineReconciler + ctx context.Context + ntnxMachine *infrav1.NutanixMachine + machine *capiv1.Machine + ntnxCluster *infrav1.NutanixCluster + r string + ) + + BeforeEach(func() { + ctx = context.Background() + r = util.RandomString(10) + reconciler = &NutanixMachineReconciler{ + Client: k8sClient, + Scheme: runtime.NewScheme(), + } + + ntnxMachine = &infrav1.NutanixMachine{ObjectMeta: metav1.ObjectMeta{ + Name: "test", + Namespace: "default", + }} + machine = &capiv1.Machine{ObjectMeta: metav1.ObjectMeta{ + Name: "test", + Namespace: "default", + }} + + ntnxCluster = &infrav1.NutanixCluster{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test", + Namespace: "default", + }, + Spec: infrav1.NutanixClusterSpec{ + PrismCentral: &credentialTypes.NutanixPrismEndpoint{ + // Adding port info to override default value (0) + Port: 9440, + }, + }, + } + }) + Context("Reconcile an NutanixMachine", func() { It("should not error or requeue the request", func() { By("Calling reconcile") - reconciler := &NutanixMachineReconciler{ - Client: k8sClient, - Scheme: runtime.NewScheme(), - } - - ctx := context.Background() - ntnxMachine := &infrav1.NutanixMachine{ObjectMeta: metav1.ObjectMeta{Name: "test", Namespace: "default"}} result, err := reconciler.Reconcile(ctx, ctrl.Request{ NamespacedName: client.ObjectKey{ Namespace: ntnxMachine.Namespace, @@ -55,5 +92,90 @@ func TestNutanixMachineReconciler(t *testing.T) { g.Expect(result.Requeue).To(BeFalse()) }) }) + Context("Validates machine config", func() { + It("should error if no failure domain is present on machine and no subnets are passed", func() { + err := reconciler.validateMachineConfig(&nctx.MachineContext{ + Context: ctx, + NutanixMachine: ntnxMachine, + Machine: machine, + }) + g.Expect(err).To(HaveOccurred()) + }) + }) + + Context("Gets the subnet and PE UUIDs", func() { + It("should error if nil machine context is passed", func() { + _, _, err := reconciler.GetSubnetAndPEUUIDs(nil) + g.Expect(err).To(HaveOccurred()) + }) + + It("should error if machine has no failure domain and Prism Element info is missing on nutanix machine", func() { + _, _, err := reconciler.GetSubnetAndPEUUIDs(&nctx.MachineContext{ + Context: ctx, + NutanixMachine: ntnxMachine, + Machine: machine, + NutanixCluster: ntnxCluster, + }) + g.Expect(err).To(HaveOccurred()) + }) + It("should error if machine has no failure domain and subnet info is missing on nutanix machine", func() { + ntnxMachine.Spec.Cluster = infrav1.NutanixResourceIdentifier{ + Type: infrav1.NutanixIdentifierName, + Name: &r, + } + _, _, err := reconciler.GetSubnetAndPEUUIDs(&nctx.MachineContext{ + Context: ctx, + NutanixMachine: ntnxMachine, + Machine: machine, + NutanixCluster: ntnxCluster, + }) + g.Expect(err).To(HaveOccurred()) + }) + It("should error if machine has no failure domain and nutanixClient is nil", func() { + ntnxMachine.Spec.Cluster = infrav1.NutanixResourceIdentifier{ + Type: infrav1.NutanixIdentifierName, + Name: &r, + } + ntnxMachine.Spec.Subnets = []infrav1.NutanixResourceIdentifier{ + { + Type: infrav1.NutanixIdentifierName, + Name: &r, + }, + } + _, _, err := reconciler.GetSubnetAndPEUUIDs(&nctx.MachineContext{ + Context: ctx, + NutanixMachine: ntnxMachine, + Machine: machine, + NutanixCluster: ntnxCluster, + }) + g.Expect(err).To(HaveOccurred()) + }) + It("should error if machine has failure domain and but it is missing on nutanixCluster object", func() { + machine.Spec.FailureDomain = &r + + _, _, err := reconciler.GetSubnetAndPEUUIDs(&nctx.MachineContext{ + Context: ctx, + NutanixMachine: ntnxMachine, + Machine: machine, + NutanixCluster: ntnxCluster, + }) + g.Expect(err).To(HaveOccurred()) + }) + It("should error if machine and nutanixCluster have failure domain and but nutanixClient is nil", func() { + machine.Spec.FailureDomain = &r + ntnxCluster.Spec.FailureDomains = []infrav1.NutanixFailureDomain{ + { + Name: r, + }, + } + _, _, err := reconciler.GetSubnetAndPEUUIDs(&nctx.MachineContext{ + Context: ctx, + NutanixMachine: ntnxMachine, + Machine: machine, + NutanixCluster: ntnxCluster, + }) + g.Expect(err).To(HaveOccurred()) + }) + }) }) } From 77a2a36a4a1da96bbc077dbe2b24356a25fa1367 Mon Sep 17 00:00:00 2001 From: Yannick Struyf Date: Thu, 30 Nov 2023 12:07:27 +0100 Subject: [PATCH 3/5] Add description to e2e failure domain test case --- Makefile | 2 ++ test/e2e/failure_domains_test.go | 4 ++++ 2 files changed, 6 insertions(+) diff --git a/Makefile b/Makefile index 2ffe72323a..c74e0db6a7 100644 --- a/Makefile +++ b/Makefile @@ -303,6 +303,7 @@ cluster-e2e-templates-v1beta1: $(KUSTOMIZE) ## Generate cluster templates for v1 $(KUSTOMIZE) build $(NUTANIX_E2E_TEMPLATES)/v1beta1/cluster-template-kcp-remediation --load-restrictor LoadRestrictionsNone > $(NUTANIX_E2E_TEMPLATES)/v1beta1/cluster-template-kcp-remediation.yaml $(KUSTOMIZE) build $(NUTANIX_E2E_TEMPLATES)/v1beta1/cluster-template-kcp-scale-in --load-restrictor LoadRestrictionsNone > $(NUTANIX_E2E_TEMPLATES)/v1beta1/cluster-template-kcp-scale-in.yaml $(KUSTOMIZE) build $(NUTANIX_E2E_TEMPLATES)/v1beta1/cluster-template-csi --load-restrictor LoadRestrictionsNone > $(NUTANIX_E2E_TEMPLATES)/v1beta1/cluster-template-csi.yaml + $(KUSTOMIZE) build $(NUTANIX_E2E_TEMPLATES)/v1beta1/cluster-template-failure-domains --load-restrictor LoadRestrictionsNone > $(NUTANIX_E2E_TEMPLATES)/v1beta1/cluster-template-failure-domains.yaml cluster-e2e-templates-no-kubeproxy: $(KUSTOMIZE) ##Generate cluster templates without kubeproxy # v1alpha4 @@ -321,6 +322,7 @@ cluster-e2e-templates-no-kubeproxy: $(KUSTOMIZE) ##Generate cluster templates wi $(KUSTOMIZE) build $(NUTANIX_E2E_TEMPLATES)/v1beta1/no-kubeproxy/cluster-template-kcp-remediation --load-restrictor LoadRestrictionsNone > $(NUTANIX_E2E_TEMPLATES)/v1beta1/cluster-template-kcp-remediation.yaml $(KUSTOMIZE) build $(NUTANIX_E2E_TEMPLATES)/v1beta1/no-kubeproxy/cluster-template-kcp-scale-in --load-restrictor LoadRestrictionsNone > $(NUTANIX_E2E_TEMPLATES)/v1beta1/cluster-template-kcp-scale-in.yaml $(KUSTOMIZE) build $(NUTANIX_E2E_TEMPLATES)/v1beta1/no-kubeproxy/cluster-template-csi --load-restrictor LoadRestrictionsNone > $(NUTANIX_E2E_TEMPLATES)/v1beta1/cluster-template-csi.yaml + $(KUSTOMIZE) build $(NUTANIX_E2E_TEMPLATES)/v1beta1/no-kubeproxy/cluster-template-failure-domains --load-restrictor LoadRestrictionsNone > $(NUTANIX_E2E_TEMPLATES)/v1beta1/cluster-template-failure-domains.yaml cluster-templates: $(KUSTOMIZE) ## Generate cluster templates for all flavors $(KUSTOMIZE) build $(TEMPLATES_DIR)/base > $(TEMPLATES_DIR)/cluster-template.yaml diff --git a/test/e2e/failure_domains_test.go b/test/e2e/failure_domains_test.go index b3803cb569..107d37676b 100644 --- a/test/e2e/failure_domains_test.go +++ b/test/e2e/failure_domains_test.go @@ -38,6 +38,10 @@ const ( ) // Note: Still has "only-for-validation" label. +// Tests if: +// - the control plane nodes are spread across the defined failure domains +// - the VMs are deployed on the correct Prism Element cluster and subnet +// - the correct failure domain conditions are applied to the nutanixCluster object var _ = Describe("Nutanix failure domains", Label("capx-feature-test", "failure-domains", "only-for-validation", "slow", "network"), func() { const specName = "failure-domains" From 1eff6cbe9259b84f172baa28b0571d5c1b3ed649 Mon Sep 17 00:00:00 2001 From: Yannick Struyf Date: Fri, 1 Dec 2023 11:58:52 +0100 Subject: [PATCH 4/5] Add comments to the new function definitions and rename cUUID to peUUID parameter --- api/v1beta1/conditions.go | 1 + controllers/helpers.go | 1 + controllers/nutanixmachine_controller.go | 6 +++--- 3 files changed, 5 insertions(+), 3 deletions(-) diff --git a/api/v1beta1/conditions.go b/api/v1beta1/conditions.go index 54ab2183db..a089099ffa 100644 --- a/api/v1beta1/conditions.go +++ b/api/v1beta1/conditions.go @@ -29,6 +29,7 @@ const ( // NoFailureDomainsReconciled indicates no failure domains have been defined NoFailureDomainsReconciled capiv1.ConditionType = "NoFailureDomainsReconciled" + // FailureDomainsReconciliationFailed indicates the failure domain reconciliation failed FailureDomainsReconciliationFailed = "FailureDomainsReconciliationFailed" ) diff --git a/controllers/helpers.go b/controllers/helpers.go index 7f335ce82b..8ec4331f22 100644 --- a/controllers/helpers.go +++ b/controllers/helpers.go @@ -748,6 +748,7 @@ func GetGPUsForPE(ctx context.Context, client *nutanixClientV3.Client, peUUID st return gpus, nil } +// GetFailureDomain gets the failure domain with a given name from a NutanixCluster object. func GetFailureDomain(failureDomainName string, nutanixCluster *infrav1.NutanixCluster) (*infrav1.NutanixFailureDomain, error) { if failureDomainName == "" { return nil, fmt.Errorf("failure domain name must be set when searching for failure domains on a Nutanix cluster object") diff --git a/controllers/nutanixmachine_controller.go b/controllers/nutanixmachine_controller.go index 68c588ed07..7a17878dda 100644 --- a/controllers/nutanixmachine_controller.go +++ b/controllers/nutanixmachine_controller.go @@ -929,14 +929,14 @@ func (r *NutanixMachineReconciler) GetSubnetAndPEUUIDs(rctx *nctx.MachineContext if err != nil { return "", nil, fmt.Errorf("failed to find failure domain %s", failureDomainName) } - cUUID, err := GetPEUUID(rctx.Context, rctx.NutanixClient, failureDomain.Cluster.Name, failureDomain.Cluster.UUID) + peUUID, err := GetPEUUID(rctx.Context, rctx.NutanixClient, failureDomain.Cluster.Name, failureDomain.Cluster.UUID) if err != nil { return "", nil, fmt.Errorf("failed to find prism element uuid for failure domain %s", failureDomainName) } - subnetUUIDs, err := GetSubnetUUIDList(rctx.Context, rctx.NutanixClient, failureDomain.Subnets, cUUID) + subnetUUIDs, err := GetSubnetUUIDList(rctx.Context, rctx.NutanixClient, failureDomain.Subnets, peUUID) if err != nil { return "", nil, fmt.Errorf("failed to find subnet uuids for failure domain %s", failureDomainName) } - return cUUID, subnetUUIDs, nil + return peUUID, subnetUUIDs, nil } From a525379144169d4dedd8770e165ded42dae40dd3 Mon Sep 17 00:00:00 2001 From: Yannick Struyf Date: Tue, 5 Dec 2023 14:33:02 +0100 Subject: [PATCH 5/5] Add additional check for cluster in validateMachineConfig --- controllers/nutanixmachine_controller.go | 10 +++- controllers/nutanixmachine_controller_test.go | 57 +++++++++++++++++-- 2 files changed, 61 insertions(+), 6 deletions(-) diff --git a/controllers/nutanixmachine_controller.go b/controllers/nutanixmachine_controller.go index 7a17878dda..3dd2bcadf7 100644 --- a/controllers/nutanixmachine_controller.go +++ b/controllers/nutanixmachine_controller.go @@ -513,8 +513,14 @@ func (r *NutanixMachineReconciler) reconcileNode(rctx *nctx.MachineContext) (rec } func (r *NutanixMachineReconciler) validateMachineConfig(rctx *nctx.MachineContext) error { - if rctx.Machine.Spec.FailureDomain == nil && len(rctx.NutanixMachine.Spec.Subnets) == 0 { - return fmt.Errorf("atleast one subnet is needed to create the VM %s if no failure domain is set", rctx.NutanixMachine.Name) + if rctx.Machine.Spec.FailureDomain == nil { + if len(rctx.NutanixMachine.Spec.Subnets) == 0 { + return fmt.Errorf("atleast one subnet is needed to create the VM %s if no failure domain is set", rctx.NutanixMachine.Name) + } + if (rctx.NutanixMachine.Spec.Cluster.Name == nil || *rctx.NutanixMachine.Spec.Cluster.Name == "") && + (rctx.NutanixMachine.Spec.Cluster.UUID == nil || *rctx.NutanixMachine.Spec.Cluster.UUID == "") { + return fmt.Errorf("cluster name or uuid are required to create the VM %s if no failure domain is set", rctx.NutanixMachine.Name) + } } diskSize := rctx.NutanixMachine.Spec.SystemDiskSize diff --git a/controllers/nutanixmachine_controller_test.go b/controllers/nutanixmachine_controller_test.go index 640a40364f..6fc270e9a0 100644 --- a/controllers/nutanixmachine_controller_test.go +++ b/controllers/nutanixmachine_controller_test.go @@ -55,10 +55,18 @@ func TestNutanixMachineReconciler(t *testing.T) { Scheme: runtime.NewScheme(), } - ntnxMachine = &infrav1.NutanixMachine{ObjectMeta: metav1.ObjectMeta{ - Name: "test", - Namespace: "default", - }} + ntnxMachine = &infrav1.NutanixMachine{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test", + Namespace: "default", + }, + Spec: infrav1.NutanixMachineSpec{ + VCPUsPerSocket: int32(minVCPUsPerSocket), + MemorySize: minMachineMemorySize, + SystemDiskSize: minMachineSystemDiskSize, + VCPUSockets: int32(minVCPUSockets), + }, + } machine = &capiv1.Machine{ObjectMeta: metav1.ObjectMeta{ Name: "test", Namespace: "default", @@ -101,6 +109,47 @@ func TestNutanixMachineReconciler(t *testing.T) { }) g.Expect(err).To(HaveOccurred()) }) + It("should error if no failure domain is present on machine and no cluster name is passed", func() { + ntnxMachine.Spec.Subnets = []infrav1.NutanixResourceIdentifier{ + { + Type: infrav1.NutanixIdentifierName, + Name: &r, + }, + } + err := reconciler.validateMachineConfig(&nctx.MachineContext{ + Context: ctx, + NutanixMachine: ntnxMachine, + Machine: machine, + }) + g.Expect(err).To(HaveOccurred()) + }) + It("returns no error if valid machine config is passed without failure domain", func() { + ntnxMachine.Spec.Subnets = []infrav1.NutanixResourceIdentifier{ + { + Type: infrav1.NutanixIdentifierName, + Name: &r, + }, + } + ntnxMachine.Spec.Cluster = infrav1.NutanixResourceIdentifier{ + Type: infrav1.NutanixIdentifierName, + Name: &r, + } + err := reconciler.validateMachineConfig(&nctx.MachineContext{ + Context: ctx, + NutanixMachine: ntnxMachine, + Machine: machine, + }) + g.Expect(err).ToNot(HaveOccurred()) + }) + It("returns no error if valid machine config is passed with failure domain", func() { + machine.Spec.FailureDomain = &r + err := reconciler.validateMachineConfig(&nctx.MachineContext{ + Context: ctx, + NutanixMachine: ntnxMachine, + Machine: machine, + }) + g.Expect(err).ToNot(HaveOccurred()) + }) }) Context("Gets the subnet and PE UUIDs", func() {