diff --git a/api/v1alpha4/conditions.go b/api/v1alpha4/conditions.go index cc5d358017..9d006b0698 100644 --- a/api/v1alpha4/conditions.go +++ b/api/v1alpha4/conditions.go @@ -30,7 +30,7 @@ const ( ) const ( - //PrismCentralClientCondition indicates the status of the client used to connect to Prism Central + // PrismCentralClientCondition indicates the status of the client used to connect to Prism Central PrismCentralClientCondition capiv1.ConditionType = "PrismClientInit" PrismCentralClientInitializationFailed = "PrismClientInitFailed" diff --git a/api/v1alpha4/nutanixcluster_types.go b/api/v1alpha4/nutanixcluster_types.go index 00fe8b408b..db621f296b 100644 --- a/api/v1alpha4/nutanixcluster_types.go +++ b/api/v1alpha4/nutanixcluster_types.go @@ -50,6 +50,14 @@ type NutanixClusterSpec struct { // proxy spec.noProxy list. // +optional PrismCentral *credentialTypes.NutanixPrismEndpoint `json:"prismCentral"` + + // failureDomains configures failure domains information for the Nutanix platform. + // When set, the failure domains defined here may be used to spread Machines across + // prism element clusters to improve fault tolerance of the cluster. + // +listType=map + // +listMapKey=name + // +optional + FailureDomains []NutanixFailureDomain `json:"failureDomains"` } // NutanixClusterStatus defines the observed state of NutanixCluster @@ -90,6 +98,39 @@ type NutanixCluster struct { Status NutanixClusterStatus `json:"status,omitempty"` } +// NutanixFailureDomain configures failure domain information for Nutanix. +type NutanixFailureDomain struct { + // name defines the unique name of a failure domain. + // Name is required and must be at most 64 characters in length. + // It must consist of only lower case alphanumeric characters and hyphens (-). + // It must start and end with an alphanumeric character. + // This value is arbitrary and is used to identify the failure domain within the platform. + // +kubebuilder:validation:Required + // +kubebuilder:validation:MinLength=1 + // +kubebuilder:validation:MaxLength=64 + // +kubebuilder:validation:Pattern=`[a-z0-9]([-a-z0-9]*[a-z0-9])?` + Name string `json:"name"` + + // cluster is to identify the cluster (the Prism Element under management of the Prism Central), + // in which the Machine's VM will be created. The cluster identifier (uuid or name) can be obtained + // from the Prism Central console or using the prism_central API. + // +kubebuilder:validation:Required + Cluster NutanixResourceIdentifier `json:"cluster"` + + // subnets holds a list of identifiers (one or more) of the cluster's network subnets + // for the Machine's VM to connect to. The subnet identifiers (uuid or name) can be + // obtained from the Prism Central console or using the prism_central API. + // +kubebuilder:validation:Required + // +kubebuilder:validation:MinItems=1 + // +listType=map + // +listMapKey=type + Subnets []NutanixResourceIdentifier `json:"subnets"` + + // indicates if a failure domain is suited for control plane nodes + // +kubebuilder:validation:Required + ControlPlane bool `json:"controlPlane,omitempty"` +} + // GetConditions returns the set of conditions for this object. func (ncl *NutanixCluster) GetConditions() capiv1.Conditions { return ncl.Status.Conditions diff --git a/api/v1alpha4/nutanixmachine_types.go b/api/v1alpha4/nutanixmachine_types.go index bf289ee0d9..9d4eb01e9e 100644 --- a/api/v1alpha4/nutanixmachine_types.go +++ b/api/v1alpha4/nutanixmachine_types.go @@ -61,12 +61,12 @@ type NutanixMachineSpec struct { // of the Prism Central), in which the Machine's VM will be created. // The cluster identifier (uuid or name) can be obtained from the Prism Central console // or using the prism_central API. - // +kubebuilder:validation:Required + // +kubebuilder:validation:Optional Cluster NutanixResourceIdentifier `json:"cluster"` // subnet is to identify the cluster's network subnet to use for the Machine's VM // The cluster identifier (uuid or name) can be obtained from the Prism Central console // or using the prism_central API. - // +kubebuilder:validation:Required + // +kubebuilder:validation:Optional // +kubebuilder:validation:MinItems=1 Subnets []NutanixResourceIdentifier `json:"subnet"` // List of categories that need to be added to the machines. Categories must already exist in Prism Central diff --git a/api/v1alpha4/zz_generated.conversion.go b/api/v1alpha4/zz_generated.conversion.go index 3f8c1ad47b..ce0280392e 100644 --- a/api/v1alpha4/zz_generated.conversion.go +++ b/api/v1alpha4/zz_generated.conversion.go @@ -75,6 +75,16 @@ func RegisterConversions(s *runtime.Scheme) error { }); err != nil { return err } + if err := s.AddGeneratedConversionFunc((*NutanixFailureDomain)(nil), (*v1beta1.NutanixFailureDomain)(nil), func(a, b interface{}, scope conversion.Scope) error { + return Convert_v1alpha4_NutanixFailureDomain_To_v1beta1_NutanixFailureDomain(a.(*NutanixFailureDomain), b.(*v1beta1.NutanixFailureDomain), scope) + }); err != nil { + return err + } + if err := s.AddGeneratedConversionFunc((*v1beta1.NutanixFailureDomain)(nil), (*NutanixFailureDomain)(nil), func(a, b interface{}, scope conversion.Scope) error { + return Convert_v1beta1_NutanixFailureDomain_To_v1alpha4_NutanixFailureDomain(a.(*v1beta1.NutanixFailureDomain), b.(*NutanixFailureDomain), scope) + }); err != nil { + return err + } if err := s.AddGeneratedConversionFunc((*NutanixMachine)(nil), (*v1beta1.NutanixMachine)(nil), func(a, b interface{}, scope conversion.Scope) error { return Convert_v1alpha4_NutanixMachine_To_v1beta1_NutanixMachine(a.(*NutanixMachine), b.(*v1beta1.NutanixMachine), scope) }); err != nil { @@ -304,6 +314,7 @@ func autoConvert_v1alpha4_NutanixClusterSpec_To_v1beta1_NutanixClusterSpec(in *N return err } out.PrismCentral = (*credentials.NutanixPrismEndpoint)(unsafe.Pointer(in.PrismCentral)) + out.FailureDomains = *(*[]v1beta1.NutanixFailureDomain)(unsafe.Pointer(&in.FailureDomains)) return nil } @@ -312,6 +323,7 @@ func autoConvert_v1beta1_NutanixClusterSpec_To_v1alpha4_NutanixClusterSpec(in *v return err } out.PrismCentral = (*credentials.NutanixPrismEndpoint)(unsafe.Pointer(in.PrismCentral)) + out.FailureDomains = *(*[]NutanixFailureDomain)(unsafe.Pointer(&in.FailureDomains)) return nil } @@ -338,6 +350,36 @@ func Convert_v1beta1_NutanixClusterStatus_To_v1alpha4_NutanixClusterStatus(in *v return autoConvert_v1beta1_NutanixClusterStatus_To_v1alpha4_NutanixClusterStatus(in, out, s) } +func autoConvert_v1alpha4_NutanixFailureDomain_To_v1beta1_NutanixFailureDomain(in *NutanixFailureDomain, out *v1beta1.NutanixFailureDomain, s conversion.Scope) error { + out.Name = in.Name + if err := Convert_v1alpha4_NutanixResourceIdentifier_To_v1beta1_NutanixResourceIdentifier(&in.Cluster, &out.Cluster, s); err != nil { + return err + } + out.Subnets = *(*[]v1beta1.NutanixResourceIdentifier)(unsafe.Pointer(&in.Subnets)) + out.ControlPlane = in.ControlPlane + return nil +} + +// Convert_v1alpha4_NutanixFailureDomain_To_v1beta1_NutanixFailureDomain is an autogenerated conversion function. +func Convert_v1alpha4_NutanixFailureDomain_To_v1beta1_NutanixFailureDomain(in *NutanixFailureDomain, out *v1beta1.NutanixFailureDomain, s conversion.Scope) error { + return autoConvert_v1alpha4_NutanixFailureDomain_To_v1beta1_NutanixFailureDomain(in, out, s) +} + +func autoConvert_v1beta1_NutanixFailureDomain_To_v1alpha4_NutanixFailureDomain(in *v1beta1.NutanixFailureDomain, out *NutanixFailureDomain, s conversion.Scope) error { + out.Name = in.Name + if err := Convert_v1beta1_NutanixResourceIdentifier_To_v1alpha4_NutanixResourceIdentifier(&in.Cluster, &out.Cluster, s); err != nil { + return err + } + out.Subnets = *(*[]NutanixResourceIdentifier)(unsafe.Pointer(&in.Subnets)) + out.ControlPlane = in.ControlPlane + return nil +} + +// Convert_v1beta1_NutanixFailureDomain_To_v1alpha4_NutanixFailureDomain is an autogenerated conversion function. +func Convert_v1beta1_NutanixFailureDomain_To_v1alpha4_NutanixFailureDomain(in *v1beta1.NutanixFailureDomain, out *NutanixFailureDomain, s conversion.Scope) error { + return autoConvert_v1beta1_NutanixFailureDomain_To_v1alpha4_NutanixFailureDomain(in, out, s) +} + func autoConvert_v1alpha4_NutanixMachine_To_v1beta1_NutanixMachine(in *NutanixMachine, out *v1beta1.NutanixMachine, s conversion.Scope) error { out.ObjectMeta = in.ObjectMeta if err := Convert_v1alpha4_NutanixMachineSpec_To_v1beta1_NutanixMachineSpec(&in.Spec, &out.Spec, s); err != nil { diff --git a/api/v1alpha4/zz_generated.deepcopy.go b/api/v1alpha4/zz_generated.deepcopy.go index df1ac46cff..afc6c94204 100644 --- a/api/v1alpha4/zz_generated.deepcopy.go +++ b/api/v1alpha4/zz_generated.deepcopy.go @@ -112,6 +112,13 @@ func (in *NutanixClusterSpec) DeepCopyInto(out *NutanixClusterSpec) { *out = new(credentials.NutanixPrismEndpoint) (*in).DeepCopyInto(*out) } + if in.FailureDomains != nil { + in, out := &in.FailureDomains, &out.FailureDomains + *out = make([]NutanixFailureDomain, len(*in)) + for i := range *in { + (*in)[i].DeepCopyInto(&(*out)[i]) + } + } } // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new NutanixClusterSpec. @@ -163,6 +170,29 @@ func (in *NutanixClusterStatus) DeepCopy() *NutanixClusterStatus { return out } +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *NutanixFailureDomain) DeepCopyInto(out *NutanixFailureDomain) { + *out = *in + in.Cluster.DeepCopyInto(&out.Cluster) + if in.Subnets != nil { + in, out := &in.Subnets, &out.Subnets + *out = make([]NutanixResourceIdentifier, len(*in)) + for i := range *in { + (*in)[i].DeepCopyInto(&(*out)[i]) + } + } +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new NutanixFailureDomain. +func (in *NutanixFailureDomain) DeepCopy() *NutanixFailureDomain { + if in == nil { + return nil + } + out := new(NutanixFailureDomain) + in.DeepCopyInto(out) + return out +} + // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *NutanixMachine) DeepCopyInto(out *NutanixMachine) { *out = *in diff --git a/api/v1beta1/conditions.go b/api/v1beta1/conditions.go index fbefdca9e5..54ab2183db 100644 --- a/api/v1beta1/conditions.go +++ b/api/v1beta1/conditions.go @@ -22,6 +22,16 @@ const ( DeletionFailed = "DeletionFailed" ) +const ( + // FailureDomainsReconciled indicates the status of the failure domain reconciliation + FailureDomainsReconciled capiv1.ConditionType = "FailureDomainsReconciled" + + // NoFailureDomainsReconciled indicates no failure domains have been defined + NoFailureDomainsReconciled capiv1.ConditionType = "NoFailureDomainsReconciled" + + FailureDomainsReconciliationFailed = "FailureDomainsReconciliationFailed" +) + const ( // ClusterCategoryCreatedCondition indicates the status of the category linked to the NutanixCluster ClusterCategoryCreatedCondition capiv1.ConditionType = "ClusterCategoryCreated" diff --git a/api/v1beta1/nutanixcluster_types.go b/api/v1beta1/nutanixcluster_types.go index fe92422ff2..b19dd24fb5 100644 --- a/api/v1beta1/nutanixcluster_types.go +++ b/api/v1beta1/nutanixcluster_types.go @@ -50,6 +50,14 @@ type NutanixClusterSpec struct { // proxy spec.noProxy list. // +optional PrismCentral *credentialTypes.NutanixPrismEndpoint `json:"prismCentral"` + + // failureDomains configures failure domains information for the Nutanix platform. + // When set, the failure domains defined here may be used to spread Machines across + // prism element clusters to improve fault tolerance of the cluster. + // +listType=map + // +listMapKey=name + // +optional + FailureDomains []NutanixFailureDomain `json:"failureDomains"` } // NutanixClusterStatus defines the observed state of NutanixCluster @@ -91,6 +99,39 @@ type NutanixCluster struct { Status NutanixClusterStatus `json:"status,omitempty"` } +// NutanixFailureDomain configures failure domain information for Nutanix. +type NutanixFailureDomain struct { + // name defines the unique name of a failure domain. + // Name is required and must be at most 64 characters in length. + // It must consist of only lower case alphanumeric characters and hyphens (-). + // It must start and end with an alphanumeric character. + // This value is arbitrary and is used to identify the failure domain within the platform. + // +kubebuilder:validation:Required + // +kubebuilder:validation:MinLength=1 + // +kubebuilder:validation:MaxLength=64 + // +kubebuilder:validation:Pattern=`[a-z0-9]([-a-z0-9]*[a-z0-9])?` + Name string `json:"name"` + + // cluster is to identify the cluster (the Prism Element under management of the Prism Central), + // in which the Machine's VM will be created. The cluster identifier (uuid or name) can be obtained + // from the Prism Central console or using the prism_central API. + // +kubebuilder:validation:Required + Cluster NutanixResourceIdentifier `json:"cluster"` + + // subnets holds a list of identifiers (one or more) of the cluster's network subnets + // for the Machine's VM to connect to. The subnet identifiers (uuid or name) can be + // obtained from the Prism Central console or using the prism_central API. + // +kubebuilder:validation:Required + // +kubebuilder:validation:MinItems=1 + // +listType=map + // +listMapKey=type + Subnets []NutanixResourceIdentifier `json:"subnets"` + + // indicates if a failure domain is suited for control plane nodes + // +kubebuilder:validation:Required + ControlPlane bool `json:"controlPlane,omitempty"` +} + // GetConditions returns the set of conditions for this object. func (ncl *NutanixCluster) GetConditions() capiv1.Conditions { return ncl.Status.Conditions diff --git a/api/v1beta1/nutanixmachine_types.go b/api/v1beta1/nutanixmachine_types.go index 821edb6e89..e586335d4c 100644 --- a/api/v1beta1/nutanixmachine_types.go +++ b/api/v1beta1/nutanixmachine_types.go @@ -61,12 +61,12 @@ type NutanixMachineSpec struct { // of the Prism Central), in which the Machine's VM will be created. // The cluster identifier (uuid or name) can be obtained from the Prism Central console // or using the prism_central API. - // +kubebuilder:validation:Required + // +kubebuilder:validation:Optional Cluster NutanixResourceIdentifier `json:"cluster"` // subnet is to identify the cluster's network subnet to use for the Machine's VM // The cluster identifier (uuid or name) can be obtained from the Prism Central console // or using the prism_central API. - // +kubebuilder:validation:Required + // +kubebuilder:validation:Optional // +kubebuilder:validation:MinItems=1 Subnets []NutanixResourceIdentifier `json:"subnet"` // List of categories that need to be added to the machines. Categories must already exist in Prism Central diff --git a/api/v1beta1/zz_generated.deepcopy.go b/api/v1beta1/zz_generated.deepcopy.go index 682b6617c8..ceeef4c9c5 100644 --- a/api/v1beta1/zz_generated.deepcopy.go +++ b/api/v1beta1/zz_generated.deepcopy.go @@ -112,6 +112,13 @@ func (in *NutanixClusterSpec) DeepCopyInto(out *NutanixClusterSpec) { *out = new(credentials.NutanixPrismEndpoint) (*in).DeepCopyInto(*out) } + if in.FailureDomains != nil { + in, out := &in.FailureDomains, &out.FailureDomains + *out = make([]NutanixFailureDomain, len(*in)) + for i := range *in { + (*in)[i].DeepCopyInto(&(*out)[i]) + } + } } // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new NutanixClusterSpec. @@ -163,6 +170,29 @@ func (in *NutanixClusterStatus) DeepCopy() *NutanixClusterStatus { return out } +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *NutanixFailureDomain) DeepCopyInto(out *NutanixFailureDomain) { + *out = *in + in.Cluster.DeepCopyInto(&out.Cluster) + if in.Subnets != nil { + in, out := &in.Subnets, &out.Subnets + *out = make([]NutanixResourceIdentifier, len(*in)) + for i := range *in { + (*in)[i].DeepCopyInto(&(*out)[i]) + } + } +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new NutanixFailureDomain. +func (in *NutanixFailureDomain) DeepCopy() *NutanixFailureDomain { + if in == nil { + return nil + } + out := new(NutanixFailureDomain) + in.DeepCopyInto(out) + return out +} + // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *NutanixGPU) DeepCopyInto(out *NutanixGPU) { *out = *in diff --git a/config/crd/bases/infrastructure.cluster.x-k8s.io_nutanixclusters.yaml b/config/crd/bases/infrastructure.cluster.x-k8s.io_nutanixclusters.yaml index 0d477c34c2..a4cee98f35 100644 --- a/config/crd/bases/infrastructure.cluster.x-k8s.io_nutanixclusters.yaml +++ b/config/crd/bases/infrastructure.cluster.x-k8s.io_nutanixclusters.yaml @@ -64,6 +64,93 @@ spec: - host - port type: object + failureDomains: + description: failureDomains configures failure domains information + for the Nutanix platform. When set, the failure domains defined + here may be used to spread Machines across prism element clusters + to improve fault tolerance of the cluster. + items: + description: NutanixFailureDomain configures failure domain information + for Nutanix. + properties: + cluster: + description: cluster is to identify the cluster (the Prism Element + under management of the Prism Central), in which the Machine's + VM will be created. The cluster identifier (uuid or name) + can be obtained from the Prism Central console or using the + prism_central API. + properties: + name: + description: name is the resource name in the PC + type: string + type: + description: Type is the identifier type to use for this + resource. + enum: + - uuid + - name + type: string + uuid: + description: uuid is the UUID of the resource in the PC. + type: string + required: + - type + type: object + controlPlane: + description: indicates if a failure domain is suited for control + plane nodes + type: boolean + name: + description: name defines the unique name of a failure domain. + Name is required and must be at most 64 characters in length. + It must consist of only lower case alphanumeric characters + and hyphens (-). It must start and end with an alphanumeric + character. This value is arbitrary and is used to identify + the failure domain within the platform. + maxLength: 64 + minLength: 1 + pattern: '[a-z0-9]([-a-z0-9]*[a-z0-9])?' + type: string + subnets: + description: subnets holds a list of identifiers (one or more) + of the cluster's network subnets for the Machine's VM to connect + to. The subnet identifiers (uuid or name) can be obtained + from the Prism Central console or using the prism_central + API. + items: + description: NutanixResourceIdentifier holds the identity + of a Nutanix PC resource (cluster, image, subnet, etc.) + properties: + name: + description: name is the resource name in the PC + type: string + type: + description: Type is the identifier type to use for this + resource. + enum: + - uuid + - name + type: string + uuid: + description: uuid is the UUID of the resource in the PC. + type: string + required: + - type + type: object + minItems: 1 + type: array + x-kubernetes-list-map-keys: + - type + x-kubernetes-list-type: map + required: + - cluster + - name + - subnets + type: object + type: array + x-kubernetes-list-map-keys: + - name + x-kubernetes-list-type: map prismCentral: description: prismCentral holds the endpoint address and port to access the Nutanix Prism Central. When a cluster-wide proxy is installed, @@ -263,6 +350,93 @@ spec: - host - port type: object + failureDomains: + description: failureDomains configures failure domains information + for the Nutanix platform. When set, the failure domains defined + here may be used to spread Machines across prism element clusters + to improve fault tolerance of the cluster. + items: + description: NutanixFailureDomain configures failure domain information + for Nutanix. + properties: + cluster: + description: cluster is to identify the cluster (the Prism Element + under management of the Prism Central), in which the Machine's + VM will be created. The cluster identifier (uuid or name) + can be obtained from the Prism Central console or using the + prism_central API. + properties: + name: + description: name is the resource name in the PC + type: string + type: + description: Type is the identifier type to use for this + resource. + enum: + - uuid + - name + type: string + uuid: + description: uuid is the UUID of the resource in the PC. + type: string + required: + - type + type: object + controlPlane: + description: indicates if a failure domain is suited for control + plane nodes + type: boolean + name: + description: name defines the unique name of a failure domain. + Name is required and must be at most 64 characters in length. + It must consist of only lower case alphanumeric characters + and hyphens (-). It must start and end with an alphanumeric + character. This value is arbitrary and is used to identify + the failure domain within the platform. + maxLength: 64 + minLength: 1 + pattern: '[a-z0-9]([-a-z0-9]*[a-z0-9])?' + type: string + subnets: + description: subnets holds a list of identifiers (one or more) + of the cluster's network subnets for the Machine's VM to connect + to. The subnet identifiers (uuid or name) can be obtained + from the Prism Central console or using the prism_central + API. + items: + description: NutanixResourceIdentifier holds the identity + of a Nutanix PC resource (cluster, image, subnet, etc.) + properties: + name: + description: name is the resource name in the PC + type: string + type: + description: Type is the identifier type to use for this + resource. + enum: + - uuid + - name + type: string + uuid: + description: uuid is the UUID of the resource in the PC. + type: string + required: + - type + type: object + minItems: 1 + type: array + x-kubernetes-list-map-keys: + - type + x-kubernetes-list-type: map + required: + - cluster + - name + - subnets + type: object + type: array + x-kubernetes-list-map-keys: + - name + x-kubernetes-list-type: map prismCentral: description: prismCentral holds the endpoint address and port to access the Nutanix Prism Central. When a cluster-wide proxy is installed, diff --git a/config/crd/bases/infrastructure.cluster.x-k8s.io_nutanixmachines.yaml b/config/crd/bases/infrastructure.cluster.x-k8s.io_nutanixmachines.yaml index e5af3bc341..044ef5325d 100644 --- a/config/crd/bases/infrastructure.cluster.x-k8s.io_nutanixmachines.yaml +++ b/config/crd/bases/infrastructure.cluster.x-k8s.io_nutanixmachines.yaml @@ -229,11 +229,9 @@ spec: minimum: 1 type: integer required: - - cluster - image - memorySize - providerID - - subnet - systemDiskSize - vcpuSockets - vcpusPerSocket @@ -591,11 +589,9 @@ spec: minimum: 1 type: integer required: - - cluster - image - memorySize - providerID - - subnet - systemDiskSize - vcpuSockets - vcpusPerSocket diff --git a/config/crd/bases/infrastructure.cluster.x-k8s.io_nutanixmachinetemplates.yaml b/config/crd/bases/infrastructure.cluster.x-k8s.io_nutanixmachinetemplates.yaml index 2768525a6d..93d3fe4088 100644 --- a/config/crd/bases/infrastructure.cluster.x-k8s.io_nutanixmachinetemplates.yaml +++ b/config/crd/bases/infrastructure.cluster.x-k8s.io_nutanixmachinetemplates.yaml @@ -253,11 +253,9 @@ spec: minimum: 1 type: integer required: - - cluster - image - memorySize - providerID - - subnet - systemDiskSize - vcpuSockets - vcpusPerSocket @@ -528,11 +526,9 @@ spec: minimum: 1 type: integer required: - - cluster - image - memorySize - providerID - - subnet - systemDiskSize - vcpuSockets - vcpusPerSocket diff --git a/controllers/helpers.go b/controllers/helpers.go index ede1634f3a..f4a1cfaa70 100644 --- a/controllers/helpers.go +++ b/controllers/helpers.go @@ -744,3 +744,18 @@ func GetGPUsForPE(ctx context.Context, client *nutanixClientV3.Client, peUUID st } return gpus, nil } + +func GetFailureDomain(failureDomainName string, nutanixCluster *infrav1.NutanixCluster) (*infrav1.NutanixFailureDomain, error) { + if failureDomainName == "" { + return nil, fmt.Errorf("failure domain name must be set when searching for failure domains on a Nutanix cluster object") + } + if nutanixCluster == nil { + return nil, fmt.Errorf("nutanixCluster cannot be nil when searching for failure domains") + } + for _, fd := range nutanixCluster.Spec.FailureDomains { + if fd.Name == failureDomainName { + return &fd, nil + } + } + return nil, fmt.Errorf("failed to find failure domain %s on nutanix cluster object", failureDomainName) +} diff --git a/controllers/nutanixcluster_controller.go b/controllers/nutanixcluster_controller.go index e57be72bf5..78a578db43 100644 --- a/controllers/nutanixcluster_controller.go +++ b/controllers/nutanixcluster_controller.go @@ -256,6 +256,12 @@ func (r *NutanixClusterReconciler) reconcileNormal(rctx *nctx.ClusterContext) (r ctrlutil.AddFinalizer(rctx.NutanixCluster, infrav1.NutanixClusterFinalizer) } + // Reconciling failure domains before Ready check to allow failure domains to be modified + if err := r.reconcileFailureDomains(rctx); err != nil { + log.Error(err, "failed to reconcile failure domains for cluster") + return reconcile.Result{}, err + } + if rctx.NutanixCluster.Status.Ready { log.Info("NutanixCluster is already in ready status.") return reconcile.Result{}, nil @@ -272,6 +278,25 @@ func (r *NutanixClusterReconciler) reconcileNormal(rctx *nctx.ClusterContext) (r return reconcile.Result{}, nil } +func (r *NutanixClusterReconciler) reconcileFailureDomains(rctx *nctx.ClusterContext) error { + log := ctrl.LoggerFrom(rctx.Context) + if len(rctx.NutanixCluster.Spec.FailureDomains) == 0 { + log.V(1).Info("no failure domains defined on cluster") + conditions.MarkTrue(rctx.NutanixCluster, infrav1.NoFailureDomainsReconciled) + return nil + } + log.V(1).Info("Reconciling failure domains for cluster") + // If failure domains is nil on status object, first create empty slice + if rctx.NutanixCluster.Status.FailureDomains == nil { + rctx.NutanixCluster.Status.FailureDomains = make(capiv1.FailureDomains, 0) + } + for _, fd := range rctx.NutanixCluster.Spec.FailureDomains { + rctx.NutanixCluster.Status.FailureDomains[fd.Name] = capiv1.FailureDomainSpec{ControlPlane: fd.ControlPlane} + } + conditions.MarkTrue(rctx.NutanixCluster, infrav1.FailureDomainsReconciled) + return nil +} + func (r *NutanixClusterReconciler) reconcileCategories(rctx *nctx.ClusterContext) error { log := ctrl.LoggerFrom(rctx.Context) log.Info("Reconciling categories for cluster") diff --git a/controllers/nutanixmachine_controller.go b/controllers/nutanixmachine_controller.go index c27404711d..68c588ed07 100644 --- a/controllers/nutanixmachine_controller.go +++ b/controllers/nutanixmachine_controller.go @@ -513,8 +513,8 @@ func (r *NutanixMachineReconciler) reconcileNode(rctx *nctx.MachineContext) (rec } func (r *NutanixMachineReconciler) validateMachineConfig(rctx *nctx.MachineContext) error { - if len(rctx.NutanixMachine.Spec.Subnets) == 0 { - return fmt.Errorf("atleast one subnet is needed to create the VM %s", rctx.NutanixMachine.Name) + if rctx.Machine.Spec.FailureDomain == nil && len(rctx.NutanixMachine.Spec.Subnets) == 0 { + return fmt.Errorf("atleast one subnet is needed to create the VM %s if no failure domain is set", rctx.NutanixMachine.Name) } diskSize := rctx.NutanixMachine.Spec.SystemDiskSize @@ -576,19 +576,10 @@ func (r *NutanixMachineReconciler) getOrCreateVM(rctx *nctx.MachineContext) (*nu return nil, err } - // Get PE UUID - peUUID, err := GetPEUUID(ctx, nc, rctx.NutanixMachine.Spec.Cluster.Name, rctx.NutanixMachine.Spec.Cluster.UUID) + peUUID, subnetUUIDs, err := r.GetSubnetAndPEUUIDs(rctx) if err != nil { - errorMsg := fmt.Errorf("failed to get the Prism Element Cluster UUID to create the VM %s. %v", vmName, err) - rctx.SetFailureStatus(capierrors.CreateMachineError, errorMsg) - return nil, err - } - - // Get Subnet UUIDs - subnetUUIDs, err := GetSubnetUUIDList(ctx, nc, rctx.NutanixMachine.Spec.Subnets, peUUID) - if err != nil { - errorMsg := fmt.Errorf("failed to get the subnet UUIDs to create the VM %s. %v", vmName, err) - rctx.SetFailureStatus(capierrors.CreateMachineError, errorMsg) + log.Error(err, fmt.Sprintf("failed to get the config for VM %s.", vmName)) + rctx.SetFailureStatus(capierrors.CreateMachineError, err) return nil, err } @@ -906,3 +897,46 @@ func (r *NutanixMachineReconciler) isGetRemoteClientConnectionError(err error) b const expectedErrString = "connect: connection refused" return strings.Contains(err.Error(), expectedErrString) } + +func (r *NutanixMachineReconciler) GetSubnetAndPEUUIDs(rctx *nctx.MachineContext) (string, []string, error) { + if rctx == nil { + return "", nil, fmt.Errorf("cannot create machine config if machine context is nil") + } + log := ctrl.LoggerFrom(rctx.Context) + if rctx.Machine.Spec.FailureDomain == nil || *rctx.Machine.Spec.FailureDomain == "" { + log.V(1).Info("no failure domain found on machine. Directly searching for Prism Element cluster") + if rctx.NutanixMachine.Spec.Cluster.Name == nil && rctx.NutanixMachine.Spec.Cluster.UUID == nil { + return "", nil, fmt.Errorf("cluster name or uuid must be passed if failure domain is not configured") + } + if len(rctx.NutanixMachine.Spec.Subnets) == 0 { + return "", nil, fmt.Errorf("subnets must be passed if failure domain is not configured") + } + peUUID, err := GetPEUUID(rctx.Context, rctx.NutanixClient, rctx.NutanixMachine.Spec.Cluster.Name, rctx.NutanixMachine.Spec.Cluster.UUID) + if err != nil { + return "", nil, err + } + subnetUUIDs, err := GetSubnetUUIDList(rctx.Context, rctx.NutanixClient, rctx.NutanixMachine.Spec.Subnets, peUUID) + if err != nil { + return "", nil, err + } + return peUUID, subnetUUIDs, nil + } + + log.V(1).Info("failure domain config found. Ignoring cluster config on machine object (if any)") + + failureDomainName := *rctx.Machine.Spec.FailureDomain + failureDomain, err := GetFailureDomain(failureDomainName, rctx.NutanixCluster) + if err != nil { + return "", nil, fmt.Errorf("failed to find failure domain %s", failureDomainName) + } + cUUID, err := GetPEUUID(rctx.Context, rctx.NutanixClient, failureDomain.Cluster.Name, failureDomain.Cluster.UUID) + if err != nil { + return "", nil, fmt.Errorf("failed to find prism element uuid for failure domain %s", failureDomainName) + } + subnetUUIDs, err := GetSubnetUUIDList(rctx.Context, rctx.NutanixClient, failureDomain.Subnets, cUUID) + if err != nil { + return "", nil, fmt.Errorf("failed to find subnet uuids for failure domain %s", failureDomainName) + } + + return cUUID, subnetUUIDs, nil +} diff --git a/test/e2e/config/nutanix.yaml b/test/e2e/config/nutanix.yaml index 5aa4a2b9be..d9ebf8dbe4 100644 --- a/test/e2e/config/nutanix.yaml +++ b/test/e2e/config/nutanix.yaml @@ -210,6 +210,7 @@ providers: - sourcePath: "../data/infrastructure-nutanix/v1beta1/cluster-template-kcp-remediation.yaml" - sourcePath: "../data/infrastructure-nutanix/v1beta1/cluster-template-kcp-scale-in.yaml" - sourcePath: "../data/infrastructure-nutanix/v1beta1/cluster-template-csi.yaml" + - sourcePath: "../data/infrastructure-nutanix/v1beta1/cluster-template-failure-domains.yaml" variables: # Default variables for the e2e test; those values could be overridden via env variables, thus @@ -260,6 +261,16 @@ variables: NUTANIX_PRISM_ELEMENT_CLUSTER_IP: "" NUTANIX_PRISM_ELEMENT_CLUSTER_USERNAME: "" NUTANIX_PRISM_ELEMENT_CLUSTER_PASSWORD: "" + # Note: Following parameters are required for failure domain testing + NUTANIX_FAILURE_DOMAIN_1_NAME: "failuredomain-1" + NUTANIX_FAILURE_DOMAIN_1_PRISM_ELEMENT_NAME: "" + NUTANIX_FAILURE_DOMAIN_1_SUBNET_NAME: "" + NUTANIX_FAILURE_DOMAIN_2_NAME: "failuredomain-2" + NUTANIX_FAILURE_DOMAIN_2_PRISM_ELEMENT_NAME: "" + NUTANIX_FAILURE_DOMAIN_2_SUBNET_NAME: "" + NUTANIX_FAILURE_DOMAIN_3_NAME: "failuredomain-3" + NUTANIX_FAILURE_DOMAIN_3_PRISM_ELEMENT_NAME: "" + NUTANIX_FAILURE_DOMAIN_3_SUBNET_NAME: "" # NOTE: INIT_WITH_BINARY and INIT_WITH_KUBERNETES_VERSION are only used by the clusterctl upgrade test to initialize # the management cluster to be upgraded. # NOTE: We test the latest release with a previous contract. diff --git a/test/e2e/data/infrastructure-nutanix/v1beta1/cluster-template-failure-domains/failure-domain-nmt.yaml b/test/e2e/data/infrastructure-nutanix/v1beta1/cluster-template-failure-domains/failure-domain-nmt.yaml new file mode 100644 index 0000000000..6780401ccd --- /dev/null +++ b/test/e2e/data/infrastructure-nutanix/v1beta1/cluster-template-failure-domains/failure-domain-nmt.yaml @@ -0,0 +1,18 @@ +--- +apiVersion: infrastructure.cluster.x-k8s.io/v1beta1 +kind: NutanixMachineTemplate +metadata: + name: "${CLUSTER_NAME}-mt-0" + namespace: "${NAMESPACE}" +spec: + template: + spec: + providerID: "nutanix://${CLUSTER_NAME}-m1" + bootType: ${NUTANIX_MACHINE_BOOT_TYPE=legacy} + vcpusPerSocket: ${NUTANIX_MACHINE_VCPU_PER_SOCKET=1} + vcpuSockets: ${NUTANIX_MACHINE_VCPU_SOCKET=2} + memorySize: "${NUTANIX_MACHINE_MEMORY_SIZE=4Gi}" + systemDiskSize: "${NUTANIX_SYSTEMDISK_SIZE=40Gi}" + image: + type: name + name: "${NUTANIX_MACHINE_TEMPLATE_IMAGE_NAME}" diff --git a/test/e2e/data/infrastructure-nutanix/v1beta1/cluster-template-failure-domains/failure-domain-patch.yaml b/test/e2e/data/infrastructure-nutanix/v1beta1/cluster-template-failure-domains/failure-domain-patch.yaml new file mode 100644 index 0000000000..3887db614e --- /dev/null +++ b/test/e2e/data/infrastructure-nutanix/v1beta1/cluster-template-failure-domains/failure-domain-patch.yaml @@ -0,0 +1,53 @@ +--- +apiVersion: infrastructure.cluster.x-k8s.io/v1beta1 +kind: NutanixCluster +metadata: + name: ${CLUSTER_NAME} + namespace: ${NAMESPACE} +spec: + failureDomains: + - name: ${NUTANIX_FAILURE_DOMAIN_1_NAME} + controlPlane: true + cluster: + name: ${NUTANIX_FAILURE_DOMAIN_1_PRISM_ELEMENT_NAME} + type: name + subnets: + - name: ${NUTANIX_FAILURE_DOMAIN_1_SUBNET_NAME} + type: name + - name: ${NUTANIX_FAILURE_DOMAIN_2_NAME} + controlPlane: true + cluster: + name: ${NUTANIX_FAILURE_DOMAIN_2_PRISM_ELEMENT_NAME} + type: name + subnets: + - name: ${NUTANIX_FAILURE_DOMAIN_2_SUBNET_NAME} + type: name + - name: ${NUTANIX_FAILURE_DOMAIN_3_NAME} + controlPlane: true + cluster: + name: ${NUTANIX_FAILURE_DOMAIN_3_PRISM_ELEMENT_NAME} + type: name + subnets: + - name: ${NUTANIX_FAILURE_DOMAIN_3_SUBNET_NAME} + type: name +--- +apiVersion: controlplane.cluster.x-k8s.io/v1beta1 +kind: KubeadmControlPlane +metadata: + name: ${CLUSTER_NAME}-kcp + namespace: ${NAMESPACE} +spec: + replicas: 3 +--- +apiVersion: cluster.x-k8s.io/v1beta1 +kind: MachineDeployment +metadata: + labels: + cluster.x-k8s.io/cluster-name: ${CLUSTER_NAME} + name: ${CLUSTER_NAME}-wmd + namespace: ${NAMESPACE} +spec: + replicas: 0 + template: + spec: + failureDomain: ${NUTANIX_FAILURE_DOMAIN_1_NAME} diff --git a/test/e2e/data/infrastructure-nutanix/v1beta1/cluster-template-failure-domains/kustomization.yaml b/test/e2e/data/infrastructure-nutanix/v1beta1/cluster-template-failure-domains/kustomization.yaml new file mode 100644 index 0000000000..2c20306fc0 --- /dev/null +++ b/test/e2e/data/infrastructure-nutanix/v1beta1/cluster-template-failure-domains/kustomization.yaml @@ -0,0 +1,12 @@ +bases: + - ../../../../../../templates/base/cluster-with-kcp.yaml + - ../../../../../../templates/base/secret.yaml + - ../../../../../../templates/base/cm.yaml + - ../../../../../../templates/base/md.yaml + - ../../../../../../templates/base/mhc.yaml + - ../base/crs.yaml + - failure-domain-nmt.yaml + +patchesStrategicMerge: + - ../base/cni-patch.yaml + - failure-domain-patch.yaml diff --git a/test/e2e/failure_domains_test.go b/test/e2e/failure_domains_test.go new file mode 100644 index 0000000000..b3803cb569 --- /dev/null +++ b/test/e2e/failure_domains_test.go @@ -0,0 +1,109 @@ +//go:build e2e +// +build e2e + +/* +Copyright 2023 Nutanix + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package e2e + +import ( + "context" + + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" + corev1 "k8s.io/api/core/v1" + clusterv1 "sigs.k8s.io/cluster-api/api/v1beta1" + "sigs.k8s.io/cluster-api/test/framework/clusterctl" + + infrav1 "github.com/nutanix-cloud-native/cluster-api-provider-nutanix/api/v1beta1" +) + +const ( + nutanixFailureDomain1NameEnv = "NUTANIX_FAILURE_DOMAIN_1_NAME" + nutanixFailureDomain2NameEnv = "NUTANIX_FAILURE_DOMAIN_2_NAME" + nutanixFailureDomain3NameEnv = "NUTANIX_FAILURE_DOMAIN_3_NAME" +) + +// Note: Still has "only-for-validation" label. +var _ = Describe("Nutanix failure domains", Label("capx-feature-test", "failure-domains", "only-for-validation", "slow", "network"), func() { + const specName = "failure-domains" + + var ( + namespace *corev1.Namespace + clusterName string + clusterResources *clusterctl.ApplyClusterTemplateAndWaitResult + cancelWatches context.CancelFunc + failureDomainNames []string + testHelper testHelperInterface + ) + + BeforeEach(func() { + testHelper = newTestHelper(e2eConfig) + failureDomainNames = []string{ + testHelper.getVariableFromE2eConfig(nutanixFailureDomain1NameEnv), + testHelper.getVariableFromE2eConfig(nutanixFailureDomain2NameEnv), + testHelper.getVariableFromE2eConfig(nutanixFailureDomain3NameEnv), + } + clusterName = testHelper.generateTestClusterName(specName) + clusterResources = new(clusterctl.ApplyClusterTemplateAndWaitResult) + Expect(bootstrapClusterProxy).NotTo(BeNil(), "BootstrapClusterProxy can't be nil") + namespace, cancelWatches = setupSpecNamespace(ctx, specName, bootstrapClusterProxy, artifactFolder) + }) + + AfterEach(func() { + dumpSpecResourcesAndCleanup(ctx, specName, bootstrapClusterProxy, artifactFolder, namespace, cancelWatches, clusterResources.Cluster, e2eConfig.GetIntervals, skipCleanup) + }) + + It("Create a cluster with multiple failure domains", func() { + const flavor = "failure-domains" + + Expect(namespace).NotTo(BeNil()) + + By("Creating a workload cluster") + testHelper.deployClusterAndWait( + deployClusterParams{ + clusterName: clusterName, + namespace: namespace, + flavor: flavor, + clusterctlConfigPath: clusterctlConfigPath, + artifactFolder: artifactFolder, + bootstrapClusterProxy: bootstrapClusterProxy, + }, clusterResources) + + By("Checking failure domain condition is true", func() { + testHelper.verifyConditionOnNutanixCluster(verifyConditionParams{ + clusterName: clusterName, + namespace: namespace, + bootstrapClusterProxy: bootstrapClusterProxy, + expectedCondition: clusterv1.Condition{ + Type: infrav1.FailureDomainsReconciled, + Status: corev1.ConditionTrue, + }, + }) + }) + + By("Checking if machines are spread across failure domains", func() { + testHelper.verifyFailureDomainsOnClusterMachines(ctx, verifyFailureDomainsOnClusterMachinesParams{ + clusterName: clusterName, + namespace: namespace, + bootstrapClusterProxy: bootstrapClusterProxy, + failureDomainNames: failureDomainNames, + }) + }) + + By("PASSED!") + }) +}) diff --git a/test/e2e/test_helpers.go b/test/e2e/test_helpers.go index 19018d74db..0dd9b9dfc6 100644 --- a/test/e2e/test_helpers.go +++ b/test/e2e/test_helpers.go @@ -101,6 +101,7 @@ type testHelperInterface interface { verifyCategoriesNutanixMachines(ctx context.Context, clusterName, namespace string, expectedCategories map[string]string) verifyConditionOnNutanixCluster(params verifyConditionParams) verifyConditionOnNutanixMachines(params verifyConditionParams) + verifyFailureDomainsOnClusterMachines(ctx context.Context, params verifyFailureDomainsOnClusterMachinesParams) verifyFailureMessageOnClusterMachines(ctx context.Context, params verifyFailureMessageOnClusterMachinesParams) verifyGPUNutanixMachines(ctx context.Context, params verifyGPUNutanixMachinesParams) verifyProjectNutanixMachines(ctx context.Context, params verifyProjectNutanixMachinesParams) @@ -601,6 +602,52 @@ func (t testHelper) verifyConditionOnNutanixMachines(params verifyConditionParam ) } +type verifyFailureDomainsOnClusterMachinesParams struct { + clusterName string + namespace *corev1.Namespace + failureDomainNames []string + bootstrapClusterProxy framework.ClusterProxy +} + +func (t testHelper) verifyFailureDomainsOnClusterMachines(ctx context.Context, params verifyFailureDomainsOnClusterMachinesParams) { + Eventually(func() bool { + nutanixCluster := t.getNutanixClusterByName(ctx, getNutanixClusterByNameInput{ + Getter: params.bootstrapClusterProxy.GetClient(), + Name: params.clusterName, + Namespace: params.namespace.Name, + }) + Expect(nutanixCluster).ToNot(BeNil()) + var match bool + for _, fdName := range params.failureDomainNames { + nutanixMachines := t.getMachinesForCluster(ctx, params.clusterName, params.namespace.Name, params.bootstrapClusterProxy) + for _, m := range nutanixMachines.Items { + machineSpec := m.Spec + if *machineSpec.FailureDomain == fdName { + // failure domain had a match + match = true + // Search for failure domain + fd, err := controllers.GetFailureDomain(fdName, nutanixCluster) + Expect(err).ShouldNot(HaveOccurred()) + Expect(fd).ToNot(BeNil()) + // Search for VM + machineVmUUID := t.stripNutanixIDFromProviderID(*machineSpec.ProviderID) + vm, err := t.nutanixClient.V3.GetVM(ctx, machineVmUUID) + Expect(err).ShouldNot(HaveOccurred()) + Expect(vm).ToNot(BeNil()) + // Check if correct PE and subnet are used + Expect(*vm.Spec.ClusterReference.Name).To(Equal(*fd.Cluster.Name)) + Expect(*vm.Spec.Resources.NicList[0].SubnetReference.Name).To(Equal(*fd.Subnets[0].Name)) + break + } + } + if !match { + return false + } + } + return true + }, defaultTimeout, defaultInterval).Should(BeTrue()) +} + type verifyFailureMessageOnClusterMachinesParams struct { clusterName string namespace *corev1.Namespace