From 56f695099f97dab6d51044fe538ac665b139239b Mon Sep 17 00:00:00 2001 From: Yannick Struyf Date: Tue, 12 Apr 2022 18:54:36 +0200 Subject: [PATCH] CAPI controller modification: network and reconciliation loop (#15) * add debug mode for manager client * modified vm uuid string type * refactor wait for tasks * modify subnet spec to list for NutanixMachine * Modify nutanix machine controller to use subnet list * added kube-vip and subnet list to cluster-template --- api/v1alpha4/nutanixmachine_types.go | 5 +- api/v1alpha4/zz_generated.conversion.go | 12 +- api/v1alpha4/zz_generated.deepcopy.go | 13 +- api/v1beta1/nutanixmachine_types.go | 5 +- api/v1beta1/zz_generated.deepcopy.go | 13 +- cluster-template.yaml | 67 ++- ...ture.cluster.x-k8s.io_nutanixmachines.yaml | 74 +-- ...ster.x-k8s.io_nutanixmachinetemplates.yaml | 80 +-- controllers/helpers.go | 331 +++++++++++ controllers/nutanixmachine_controller.go | 517 +++++++----------- pkg/client/client.go | 21 +- pkg/client/state.go | 37 ++ 12 files changed, 748 insertions(+), 427 deletions(-) create mode 100644 controllers/helpers.go diff --git a/api/v1alpha4/nutanixmachine_types.go b/api/v1alpha4/nutanixmachine_types.go index 272016d30b..e6e73b48b4 100644 --- a/api/v1alpha4/nutanixmachine_types.go +++ b/api/v1alpha4/nutanixmachine_types.go @@ -66,7 +66,8 @@ type NutanixMachineSpec struct { // The cluster identifier (uuid or name) can be obtained from the Prism Central console // or using the prism_central API. // +kubebuilder:validation:Required - Subnet NutanixResourceIdentifier `json:"subnet"` + // +kubebuilder:validation:MinItems=1 + Subnets []NutanixResourceIdentifier `json:"subnet"` // systemDiskSize is size (in Quantity format) of the system disk of the VM // The minimum systemDiskSize is 20Gi bytes @@ -94,7 +95,7 @@ type NutanixMachineStatus struct { // The Nutanix VM's UUID // +optional - VmUUID *string `json:"vmUUID,omitempty"` + VmUUID string `json:"vmUUID,omitempty"` // NodeRef is a reference to the corresponding workload cluster Node if it exists. // +optional diff --git a/api/v1alpha4/zz_generated.conversion.go b/api/v1alpha4/zz_generated.conversion.go index 32347c110c..e48ff2ec95 100644 --- a/api/v1alpha4/zz_generated.conversion.go +++ b/api/v1alpha4/zz_generated.conversion.go @@ -383,9 +383,7 @@ func autoConvert_v1alpha4_NutanixMachineSpec_To_v1beta1_NutanixMachineSpec(in *N if err := Convert_v1alpha4_NutanixResourceIdentifier_To_v1beta1_NutanixResourceIdentifier(&in.Cluster, &out.Cluster, s); err != nil { return err } - if err := Convert_v1alpha4_NutanixResourceIdentifier_To_v1beta1_NutanixResourceIdentifier(&in.Subnet, &out.Subnet, s); err != nil { - return err - } + out.Subnets = *(*[]v1beta1.NutanixResourceIdentifier)(unsafe.Pointer(&in.Subnets)) out.SystemDiskSize = in.SystemDiskSize out.BootstrapRef = (*v1.ObjectReference)(unsafe.Pointer(in.BootstrapRef)) return nil @@ -402,9 +400,7 @@ func autoConvert_v1beta1_NutanixMachineSpec_To_v1alpha4_NutanixMachineSpec(in *v if err := Convert_v1beta1_NutanixResourceIdentifier_To_v1alpha4_NutanixResourceIdentifier(&in.Cluster, &out.Cluster, s); err != nil { return err } - if err := Convert_v1beta1_NutanixResourceIdentifier_To_v1alpha4_NutanixResourceIdentifier(&in.Subnet, &out.Subnet, s); err != nil { - return err - } + out.Subnets = *(*[]NutanixResourceIdentifier)(unsafe.Pointer(&in.Subnets)) out.SystemDiskSize = in.SystemDiskSize out.BootstrapRef = (*v1.ObjectReference)(unsafe.Pointer(in.BootstrapRef)) return nil @@ -413,7 +409,7 @@ func autoConvert_v1beta1_NutanixMachineSpec_To_v1alpha4_NutanixMachineSpec(in *v func autoConvert_v1alpha4_NutanixMachineStatus_To_v1beta1_NutanixMachineStatus(in *NutanixMachineStatus, out *v1beta1.NutanixMachineStatus, s conversion.Scope) error { out.Ready = in.Ready out.Addresses = *(*[]apiv1beta1.MachineAddress)(unsafe.Pointer(&in.Addresses)) - out.VmUUID = (*string)(unsafe.Pointer(in.VmUUID)) + out.VmUUID = in.VmUUID out.NodeRef = (*v1.ObjectReference)(unsafe.Pointer(in.NodeRef)) out.Conditions = *(*apiv1beta1.Conditions)(unsafe.Pointer(&in.Conditions)) return nil @@ -422,7 +418,7 @@ func autoConvert_v1alpha4_NutanixMachineStatus_To_v1beta1_NutanixMachineStatus(i func autoConvert_v1beta1_NutanixMachineStatus_To_v1alpha4_NutanixMachineStatus(in *v1beta1.NutanixMachineStatus, out *NutanixMachineStatus, s conversion.Scope) error { out.Ready = in.Ready out.Addresses = *(*[]apiv1alpha4.MachineAddress)(unsafe.Pointer(&in.Addresses)) - out.VmUUID = (*string)(unsafe.Pointer(in.VmUUID)) + out.VmUUID = in.VmUUID out.NodeRef = (*v1.ObjectReference)(unsafe.Pointer(in.NodeRef)) out.Conditions = *(*apiv1alpha4.Conditions)(unsafe.Pointer(&in.Conditions)) return nil diff --git a/api/v1alpha4/zz_generated.deepcopy.go b/api/v1alpha4/zz_generated.deepcopy.go index 16c7fb6796..25d6764284 100644 --- a/api/v1alpha4/zz_generated.deepcopy.go +++ b/api/v1alpha4/zz_generated.deepcopy.go @@ -196,7 +196,13 @@ func (in *NutanixMachineSpec) DeepCopyInto(out *NutanixMachineSpec) { out.MemorySize = in.MemorySize.DeepCopy() in.Image.DeepCopyInto(&out.Image) in.Cluster.DeepCopyInto(&out.Cluster) - in.Subnet.DeepCopyInto(&out.Subnet) + if in.Subnets != nil { + in, out := &in.Subnets, &out.Subnets + *out = make([]NutanixResourceIdentifier, len(*in)) + for i := range *in { + (*in)[i].DeepCopyInto(&(*out)[i]) + } + } out.SystemDiskSize = in.SystemDiskSize.DeepCopy() if in.BootstrapRef != nil { in, out := &in.BootstrapRef, &out.BootstrapRef @@ -223,11 +229,6 @@ func (in *NutanixMachineStatus) DeepCopyInto(out *NutanixMachineStatus) { *out = make([]apiv1alpha4.MachineAddress, len(*in)) copy(*out, *in) } - if in.VmUUID != nil { - in, out := &in.VmUUID, &out.VmUUID - *out = new(string) - **out = **in - } if in.NodeRef != nil { in, out := &in.NodeRef, &out.NodeRef *out = new(v1.ObjectReference) diff --git a/api/v1beta1/nutanixmachine_types.go b/api/v1beta1/nutanixmachine_types.go index 0f9eb644d2..945376f850 100644 --- a/api/v1beta1/nutanixmachine_types.go +++ b/api/v1beta1/nutanixmachine_types.go @@ -66,7 +66,8 @@ type NutanixMachineSpec struct { // The cluster identifier (uuid or name) can be obtained from the Prism Central console // or using the prism_central API. // +kubebuilder:validation:Required - Subnet NutanixResourceIdentifier `json:"subnet"` + // +kubebuilder:validation:MinItems=1 + Subnets []NutanixResourceIdentifier `json:"subnet"` // systemDiskSize is size (in Quantity format) of the system disk of the VM // The minimum systemDiskSize is 20Gi bytes @@ -94,7 +95,7 @@ type NutanixMachineStatus struct { // The Nutanix VM's UUID // +optional - VmUUID *string `json:"vmUUID,omitempty"` + VmUUID string `json:"vmUUID,omitempty"` // NodeRef is a reference to the corresponding workload cluster Node if it exists. // +optional diff --git a/api/v1beta1/zz_generated.deepcopy.go b/api/v1beta1/zz_generated.deepcopy.go index 62edc83ad0..cf5201c123 100644 --- a/api/v1beta1/zz_generated.deepcopy.go +++ b/api/v1beta1/zz_generated.deepcopy.go @@ -196,7 +196,13 @@ func (in *NutanixMachineSpec) DeepCopyInto(out *NutanixMachineSpec) { out.MemorySize = in.MemorySize.DeepCopy() in.Image.DeepCopyInto(&out.Image) in.Cluster.DeepCopyInto(&out.Cluster) - in.Subnet.DeepCopyInto(&out.Subnet) + if in.Subnets != nil { + in, out := &in.Subnets, &out.Subnets + *out = make([]NutanixResourceIdentifier, len(*in)) + for i := range *in { + (*in)[i].DeepCopyInto(&(*out)[i]) + } + } out.SystemDiskSize = in.SystemDiskSize.DeepCopy() if in.BootstrapRef != nil { in, out := &in.BootstrapRef, &out.BootstrapRef @@ -223,11 +229,6 @@ func (in *NutanixMachineStatus) DeepCopyInto(out *NutanixMachineStatus) { *out = make([]apiv1beta1.MachineAddress, len(*in)) copy(*out, *in) } - if in.VmUUID != nil { - in, out := &in.VmUUID, &out.VmUUID - *out = new(string) - **out = **in - } if in.NodeRef != nil { in, out := &in.NodeRef, &out.NodeRef *out = new(v1.ObjectReference) diff --git a/cluster-template.yaml b/cluster-template.yaml index 93b9d64f42..1a3e3d77de 100644 --- a/cluster-template.yaml +++ b/cluster-template.yaml @@ -6,7 +6,7 @@ metadata: spec: controlPlaneEndpoint: host: "${CONTROL_PLANE_ENDPOINT_IP}" - port: ${CONTROLPLANE_ENDPOINT_PORT=6443} + port: ${CONTROL_PLANE_ENDPOINT_PORT=6443} --- apiVersion: cluster.x-k8s.io/v1beta1 @@ -53,8 +53,8 @@ spec: type: name name: "${NUTANIX_PRISM_ELEMENT_CLUSTER_NAME}" subnet: - type: name - name: "${NUTANIX_SUBNET_NAME}" + - type: name + name: "${NUTANIX_SUBNET_NAME}" --- apiVersion: controlplane.cluster.x-k8s.io/v1beta1 @@ -80,6 +80,66 @@ spec: controllerManager: extraArgs: enable-hostpath-provisioner: "true" + files: + - content: | + apiVersion: v1 + kind: Pod + metadata: + name: kube-vip + namespace: kube-system + spec: + containers: + - name: kube-vip + image: ghcr.io/kube-vip/kube-vip:v0.4.2 + imagePullPolicy: IfNotPresent + args: + - manager + env: + - name: vip_arp + value: "true" + - name: address + value: "${CONTROL_PLANE_ENDPOINT_IP}" + - name: port + value: "${CONTROL_PLANE_ENDPOINT_PORT=6443}" + - name: vip_cidr + value: "32" + - name: cp_enable + value: "true" + - name: cp_namespace + value: kube-system + - name: vip_ddns + value: "false" + - name: vip_leaderelection + value: "true" + - name: vip_leaseduration + value: "15" + - name: vip_renewdeadline + value: "10" + - name: vip_retryperiod + value: "2" + securityContext: + capabilities: + add: + - NET_ADMIN + - SYS_TIME + - NET_RAW + volumeMounts: + - mountPath: /etc/kubernetes/admin.conf + name: kubeconfig + resources: {} + hostNetwork: true + hostAliases: + - hostnames: + - kubernetes + ip: 127.0.0.1 + volumes: + - name: kubeconfig + hostPath: + type: FileOrCreate + path: /etc/kubernetes/admin.conf + status: {} + owner: root:root + path: /etc/kubernetes/manifests/kube-vip.yaml initConfiguration: nodeRegistration: kubeletExtraArgs: @@ -98,6 +158,7 @@ spec: postKubeadmCommands: - echo export KUBECONFIG=/etc/kubernetes/admin.conf >> /root/.bashrc - echo "after kubeadm call" > /var/log/postkubeadm.log + useExperimentalRetryJoin: true verbosity: 10 --- diff --git a/config/crd/bases/infrastructure.cluster.x-k8s.io_nutanixmachines.yaml b/config/crd/bases/infrastructure.cluster.x-k8s.io_nutanixmachines.yaml index 3fca435782..038be0668f 100644 --- a/config/crd/bases/infrastructure.cluster.x-k8s.io_nutanixmachines.yaml +++ b/config/crd/bases/infrastructure.cluster.x-k8s.io_nutanixmachines.yaml @@ -150,22 +150,27 @@ spec: use for the Machine's VM The cluster identifier (uuid or name) can be obtained from the Prism Central console or using the prism_central API. - properties: - name: - description: name is the resource name in the PC - type: string - type: - description: Type is the identifier type to use for this resource. - enum: - - uuid - - name - type: string - uuid: - description: uuid is the UUID of the resource in the PC. - type: string - required: - - type - type: object + items: + description: NutanixResourceIdentifier holds the identity of a Nutanix + PC resource (cluster, image, subnet, etc.) + properties: + name: + description: name is the resource name in the PC + type: string + type: + description: Type is the identifier type to use for this resource. + enum: + - uuid + - name + type: string + uuid: + description: uuid is the UUID of the resource in the PC. + type: string + required: + - type + type: object + minItems: 1 + type: array systemDiskSize: anyOf: - type: integer @@ -441,22 +446,27 @@ spec: use for the Machine's VM The cluster identifier (uuid or name) can be obtained from the Prism Central console or using the prism_central API. - properties: - name: - description: name is the resource name in the PC - type: string - type: - description: Type is the identifier type to use for this resource. - enum: - - uuid - - name - type: string - uuid: - description: uuid is the UUID of the resource in the PC. - type: string - required: - - type - type: object + items: + description: NutanixResourceIdentifier holds the identity of a Nutanix + PC resource (cluster, image, subnet, etc.) + properties: + name: + description: name is the resource name in the PC + type: string + type: + description: Type is the identifier type to use for this resource. + enum: + - uuid + - name + type: string + uuid: + description: uuid is the UUID of the resource in the PC. + type: string + required: + - type + type: object + minItems: 1 + type: array systemDiskSize: anyOf: - type: integer diff --git a/config/crd/bases/infrastructure.cluster.x-k8s.io_nutanixmachinetemplates.yaml b/config/crd/bases/infrastructure.cluster.x-k8s.io_nutanixmachinetemplates.yaml index 507d246776..415215ad65 100644 --- a/config/crd/bases/infrastructure.cluster.x-k8s.io_nutanixmachinetemplates.yaml +++ b/config/crd/bases/infrastructure.cluster.x-k8s.io_nutanixmachinetemplates.yaml @@ -168,23 +168,29 @@ spec: to use for the Machine's VM The cluster identifier (uuid or name) can be obtained from the Prism Central console or using the prism_central API. - properties: - name: - description: name is the resource name in the PC - type: string - type: - description: Type is the identifier type to use for this - resource. - enum: - - uuid - - name - type: string - uuid: - description: uuid is the UUID of the resource in the PC. - type: string - required: - - type - type: object + items: + description: NutanixResourceIdentifier holds the identity + of a Nutanix PC resource (cluster, image, subnet, etc.) + properties: + name: + description: name is the resource name in the PC + type: string + type: + description: Type is the identifier type to use for + this resource. + enum: + - uuid + - name + type: string + uuid: + description: uuid is the UUID of the resource in the + PC. + type: string + required: + - type + type: object + minItems: 1 + type: array systemDiskSize: anyOf: - type: integer @@ -373,23 +379,29 @@ spec: to use for the Machine's VM The cluster identifier (uuid or name) can be obtained from the Prism Central console or using the prism_central API. - properties: - name: - description: name is the resource name in the PC - type: string - type: - description: Type is the identifier type to use for this - resource. - enum: - - uuid - - name - type: string - uuid: - description: uuid is the UUID of the resource in the PC. - type: string - required: - - type - type: object + items: + description: NutanixResourceIdentifier holds the identity + of a Nutanix PC resource (cluster, image, subnet, etc.) + properties: + name: + description: name is the resource name in the PC + type: string + type: + description: Type is the identifier type to use for + this resource. + enum: + - uuid + - name + type: string + uuid: + description: uuid is the UUID of the resource in the + PC. + type: string + required: + - type + type: object + minItems: 1 + type: array systemDiskSize: anyOf: - type: integer diff --git a/controllers/helpers.go b/controllers/helpers.go new file mode 100644 index 0000000000..eb8e633e1f --- /dev/null +++ b/controllers/helpers.go @@ -0,0 +1,331 @@ +/* +Copyright 2021. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package controllers + +import ( + "fmt" + "reflect" + "strings" + + infrav1 "github.com/nutanix-core/cluster-api-provider-nutanix/api/v1beta1" + nutanixClient "github.com/nutanix-core/cluster-api-provider-nutanix/pkg/client" + nutanixClientV3 "github.com/nutanix-core/cluster-api-provider-nutanix/pkg/nutanix/v3" + "github.com/nutanix-core/cluster-api-provider-nutanix/pkg/utils" + "k8s.io/apimachinery/pkg/api/resource" + "k8s.io/klog/v2" +) + +const ( + taskSucceededMessage = "SUCCEEDED" +) + +// deleteVM deletes a VM and is invoked by the NutanixMachineReconciler +func deleteVM(client *nutanixClientV3.Client, vmName, vmUUID string) (string, error) { + var err error + + if vmUUID == "" { + klog.Warning(fmt.Sprintf("VmUUID was empty. Skipping delete")) + return "", nil + } + + klog.Infof("Deleting VM %s with UUID: %s", vmName, vmUUID) + vmDeleteResponse, err := client.V3.DeleteVM(vmUUID) + if err != nil { + klog.Infof("Error deleting machine %s", vmName) + return "", err + } + deleteTaskUUID := vmDeleteResponse.Status.ExecutionContext.TaskUUID.(string) + + return deleteTaskUUID, nil +} + +// findVMByUUID retrieves the VM with the given vm UUID. Returns nil if not found +func findVMByUUID(client *nutanixClientV3.Client, uuid string) (*nutanixClientV3.VMIntentResponse, error) { + + klog.Infof("Checking if VM with UUID %s exists.", uuid) + + response, err := client.V3.GetVM(uuid) + if err != nil { + if strings.Contains(fmt.Sprint(err), "ENTITY_NOT_FOUND") { + klog.Infof("vm with uuid %s does not exist.", uuid) + return nil, nil + } else { + klog.Errorf("Failed to find VM by vmUUID %s. error: %v", uuid, err) + return nil, err + } + } + + return response, nil +} + +func findVM(client *nutanixClientV3.Client, nutanixMachine *infrav1.NutanixMachine) (*nutanixClientV3.VMIntentResponse, error) { + vmName := nutanixMachine.Name + vmUUID := nutanixMachine.Status.VmUUID + // Search via uuid if it is present + if vmUUID != "" { + klog.Info("Searching for VM %s using UUID %s", vmName, vmUUID) + vm, err := findVMByUUID(client, nutanixMachine.Status.VmUUID) + if err != nil { + klog.Errorf("error occurred finding VM with uuid %s: %v", nutanixMachine.Status.VmUUID, err) + return nil, err + } + if vm == nil { + errorMsg := fmt.Sprintf("no vm %s found with UUID %s but was expected to be present", vmName, vmUUID) + klog.Error(errorMsg) + return nil, fmt.Errorf(errorMsg) + } + return vm, nil + // otherwise search via name + } else { + klog.Infof("Searching for VM %s using name", vmName) + vm, err := findVMByName(client, vmName) + if err != nil { + klog.Errorf("error occurred finding VM %s by name: %v", vmName, err) + return nil, err + } + return vm, nil + } +} + +// findVMByName retrieves the VM with the given vm name +func findVMByName(client *nutanixClientV3.Client, vmName string) (*nutanixClientV3.VMIntentResponse, error) { + klog.Infof("Checking if VM with name %s exists.", vmName) + + res, err := client.V3.ListVM(&nutanixClientV3.DSMetadata{ + Filter: utils.StringPtr(fmt.Sprintf("vm_name==%s", vmName))}) + if err != nil { + errorMsg := fmt.Errorf("error occurred when searching for VM by name %s. error: %v", vmName, err) + klog.Error(errorMsg) + return nil, errorMsg + } + + if len(res.Entities) > 1 { + errorMsg := fmt.Sprintf("Found more than one (%v) vms with name %s.", len(res.Entities), vmName) + klog.Errorf(errorMsg) + return nil, fmt.Errorf(errorMsg) + } + + if len(res.Entities) == 0 { + return nil, nil + } + + return findVMByUUID(client, *res.Entities[0].Metadata.UUID) +} + +func getPEUUID(client *nutanixClientV3.Client, peName, peUUID *string) (string, error) { + var foundPEUUID string + if peUUID == nil && peName == nil { + return "", fmt.Errorf("cluster name or uuid must be passed in order to retrieve the pe") + } + if peUUID != nil { + peIntentResponse, err := client.V3.GetCluster(*peUUID) + if err != nil { + if strings.Contains(fmt.Sprint(err), "ENTITY_NOT_FOUND") { + return "", fmt.Errorf("failed to find Prism Element cluster with UUID %s: %v", *peUUID, err) + } + } + foundPEUUID = *peIntentResponse.Metadata.UUID + } else if peName != nil { + + responsePEs, err := client.V3.ListAllCluster() + if err != nil { + return "", err + } + foundPEs := make([]*nutanixClientV3.ClusterIntentResource, 0) + for _, s := range responsePEs.Entities { + peSpec := s.Spec + if *peSpec.Name == *peName { + foundPEs = append(foundPEs, s) + } + } + if len(foundPEs) == 0 { + return "", fmt.Errorf("failed to retrieve Prism Element cluster by name %s", *peName) + } else if len(foundPEs) > 1 { + return "", fmt.Errorf("more than one Prism Element cluster found with name %s", *peName) + } else { + foundPEUUID = *foundPEs[0].Metadata.UUID + } + if foundPEUUID == "" { + return "", fmt.Errorf("failed to retrieve Prism Element cluster by name or uuid. Verify input parameters.") + } + } + return foundPEUUID, nil +} + +// getMibValueOfQuantity returns the given quantity value in Mib +func getMibValueOfQuantity(quantity resource.Quantity) int64 { + return quantity.Value() / (1024 * 1024) +} + +func createSystemDiskSpec(imageUUID string, systemDiskSize int64) (*nutanixClientV3.VMDisk, error) { + if imageUUID == "" { + return nil, fmt.Errorf("image UUID must be set when creating system disk") + } + if systemDiskSize <= 0 { + return nil, fmt.Errorf("Invalid system disk size: %d. Provide in XXGi (for example 70Gi) format instead", systemDiskSize) + } + systemDisk := &nutanixClientV3.VMDisk{ + DataSourceReference: &nutanixClientV3.Reference{ + Kind: utils.StringPtr("image"), + UUID: utils.StringPtr(imageUUID), + }, + DiskSizeMib: utils.Int64Ptr(systemDiskSize)} + return systemDisk, nil + +} + +func getSubnetUUID(client *nutanixClientV3.Client, peUUID string, subnetName, subnetUUID *string) (string, error) { + var foundSubnetUUID string + if subnetUUID == nil && subnetName == nil { + return "", fmt.Errorf("subnet name or subnet uuid must be passed in order to retrieve the subnet") + } + if subnetUUID != nil { + subnetIntentResponse, err := client.V3.GetSubnet(*subnetUUID) + if err != nil { + if strings.Contains(fmt.Sprint(err), "ENTITY_NOT_FOUND") { + return "", fmt.Errorf("failed to find subnet with UUID %s: %v", *subnetUUID, err) + } + } + foundSubnetUUID = *subnetIntentResponse.Metadata.UUID + } else if subnetName != nil { + + responseSubnets, err := client.V3.ListAllSubnet() + if err != nil { + return "", err + } + foundSubnets := make([]*nutanixClientV3.SubnetIntentResponse, 0) + for _, s := range responseSubnets.Entities { + subnetSpec := s.Spec + if *subnetSpec.Name == *subnetName && *subnetSpec.ClusterReference.UUID == peUUID { + foundSubnets = append(foundSubnets, s) + } + } + if len(foundSubnets) == 0 { + return "", fmt.Errorf("failed to retrieve subnet by name %s", *subnetName) + } else if len(foundSubnets) > 1 { + return "", fmt.Errorf("more than one subnet found with name %s", *subnetName) + } else { + foundSubnetUUID = *foundSubnets[0].Metadata.UUID + } + if foundSubnetUUID == "" { + return "", fmt.Errorf("failed to retrieve subnet by name or uuid. Verify input parameters.") + } + } + return foundSubnetUUID, nil +} + +func getImageUUID(client *nutanixClientV3.Client, imageName, imageUUID *string) (string, error) { + var foundImageUUID string + + if imageUUID == nil && imageName == nil { + return "", fmt.Errorf("image name or image uuid must be passed in order to retrieve the image") + } + if imageUUID != nil { + imageIntentResponse, err := client.V3.GetImage(*imageUUID) + if err != nil { + if strings.Contains(fmt.Sprint(err), "ENTITY_NOT_FOUND") { + return "", fmt.Errorf("failed to find image with UUID %s: %v", *imageUUID, err) + } + } + foundImageUUID = *imageIntentResponse.Metadata.UUID + } else if imageName != nil { + responseImages, err := client.V3.ListAllImage() + if err != nil { + return "", err + } + foundImages := make([]*nutanixClientV3.ImageIntentResponse, 0) + for _, s := range responseImages.Entities { + imageSpec := s.Spec + if *imageSpec.Name == *imageName { + foundImages = append(foundImages, s) + } + } + if len(foundImages) == 0 { + return "", fmt.Errorf("failed to retrieve image by name %s", *imageName) + } else if len(foundImages) > 1 { + return "", fmt.Errorf("more than one image found with name %s", *imageName) + } else { + foundImageUUID = *foundImages[0].Metadata.UUID + } + if foundImageUUID == "" { + return "", fmt.Errorf("failed to retrieve image by name or uuid. Verify input parameters.") + } + } + return foundImageUUID, nil +} + +func isExistingVM(client *nutanixClientV3.Client, vmUUID string) (bool, error) { + vm, err := findVMByUUID(client, vmUUID) + if err != nil { + errorMsg := fmt.Errorf("error finding vm with uuid %s: %v", vmUUID, err) + klog.Error(errorMsg) + return false, errorMsg + } + + return vm == nil, nil + +} + +func hasTaskInProgress(client *nutanixClientV3.Client, taskUUID string) (bool, error) { + taskStatus, err := nutanixClient.GetTaskState(client, taskUUID) + if err != nil { + return false, err + } + if taskStatus != taskSucceededMessage { + klog.Infof("VM task with UUID %s still in progress: %s. Requeuing", taskUUID, taskStatus) + return true, nil + } + return false, nil +} + +func getTaskUUIDFromVM(vm *nutanixClientV3.VMIntentResponse) (string, error) { + if vm == nil { + return "", fmt.Errorf("cannot extract task uuid from empty vm object") + } + taskInterface := vm.Status.ExecutionContext.TaskUUID + vmName := *vm.Spec.Name + + switch t := reflect.TypeOf(taskInterface).Kind(); t { + case reflect.Slice: + l := taskInterface.([]interface{}) + if len(l) != 1 { + return "", fmt.Errorf("Did not find expected amount of task UUIDs for VM %s", vmName) + } + return l[0].(string), nil + case reflect.String: + return taskInterface.(string), nil + default: + return "", fmt.Errorf("Invalid type found for task uuid extracted from vm %s: %v", vmName, t) + } +} + +func getSubnetUUIDList(client *nutanixClientV3.Client, machineSubnets []infrav1.NutanixResourceIdentifier, peUUID string) ([]string, error) { + subnetUUIDs := make([]string, 0) + for _, machineSubnet := range machineSubnets { + subnetUUID, err := getSubnetUUID( + client, + peUUID, + machineSubnet.Name, + machineSubnet.UUID, + ) + if err != nil { + return subnetUUIDs, err + } + subnetUUIDs = append(subnetUUIDs, subnetUUID) + } + return subnetUUIDs, nil +} diff --git a/controllers/nutanixmachine_controller.go b/controllers/nutanixmachine_controller.go index 27983c7203..6cb23fbb6c 100644 --- a/controllers/nutanixmachine_controller.go +++ b/controllers/nutanixmachine_controller.go @@ -20,13 +20,11 @@ import ( "context" "encoding/base64" "fmt" - "strings" "time" "github.com/pkg/errors" corev1 "k8s.io/api/core/v1" apierrors "k8s.io/apimachinery/pkg/api/errors" - "k8s.io/apimachinery/pkg/api/resource" "k8s.io/apimachinery/pkg/runtime" apitypes "k8s.io/apimachinery/pkg/types" kerrors "k8s.io/apimachinery/pkg/util/errors" @@ -154,6 +152,11 @@ func (r *NutanixMachineReconciler) Reconcile(ctx context.Context, req ctrl.Reque return ctrl.Result{Requeue: true}, nil } + client, err := nutanixClient.Client(nutanixClient.ClientOptions{}) + if err != nil { + return ctrl.Result{Requeue: true}, fmt.Errorf("Client Auth error: %v", err) + } + rctx := &nctx.MachineContext{ Context: ctx, Cluster: cluster, @@ -161,16 +164,21 @@ func (r *NutanixMachineReconciler) Reconcile(ctx context.Context, req ctrl.Reque NutanixCluster: ntxCluster, NutanixMachine: ntxMachine, LogPrefix: logPrefix, + NutanixClient: client, } defer func() { - // Always attempt to Patch the NutanixMachine object and its status after each reconciliation. - if err := patchHelper.Patch(ctx, ntxMachine); err != nil { - klog.Errorf("%s Failed to patch NutanixMachine. %v", rctx.LogPrefix, err) - reterr = kerrors.NewAggregate([]error{reterr, err}) + if err == nil { + // Always attempt to Patch the NutanixMachine object and its status after each reconciliation. + if err := patchHelper.Patch(ctx, ntxMachine); err != nil { + klog.Errorf("%s Failed to patch NutanixMachine. %v", rctx.LogPrefix, err) + reterr = kerrors.NewAggregate([]error{reterr, err}) + } + klog.Infof("%s Patched NutanixMachine. Spec: %+v. Status: %+v.", + rctx.LogPrefix, ntxMachine.Spec, ntxMachine.Status) + } else { + klog.Infof("%s Not patching vm since error occurred: %v", rctx.LogPrefix, err) } - klog.Infof("%s Patched NutanixMachine. Status: %+v", - rctx.LogPrefix, ntxMachine.Status) }() // Handle deleted machines @@ -183,17 +191,54 @@ func (r *NutanixMachineReconciler) Reconcile(ctx context.Context, req ctrl.Reque } func (r *NutanixMachineReconciler) reconcileDelete(rctx *nctx.MachineContext) (reconcile.Result, error) { - + client := rctx.NutanixClient vmName := rctx.NutanixMachine.Name klog.Infof("%s Handling NutanixMachine deletion of VM: %s", rctx.LogPrefix, vmName) - // Delete the VM - err := deleteVM(rctx) - if err != nil { - klog.Errorf("%s Failed to delete VM %s: %v", rctx.LogPrefix, vmName, err) - return reconcile.Result{}, err + //Check if VMUUID is absent + if rctx.NutanixMachine.Status.VmUUID == "" { + klog.Warningf("%s VMUUID was not found in spec for VM %s. Skipping delete", rctx.LogPrefix, vmName) + } else { + //Search for VM by UUID + vmUUID := rctx.NutanixMachine.Status.VmUUID + vm, err := findVMByUUID(client, vmUUID) + // Error while finding VM + if err != nil { + errorMsg := fmt.Errorf("%v: error finding vm %s with uuid %s: %v", rctx.LogPrefix, vmName, vmUUID, err) + klog.Error(errorMsg) + return reconcile.Result{}, errorMsg + } + // Vm not found + if vm == nil { + klog.Infof("%s No vm found with UUID %s ... Already deleted? Skipping delete", rctx.LogPrefix, vmUUID) + } else { + klog.Infof("%s VM %s with UUID %s was found.", rctx.LogPrefix, vmName, vmUUID) + lastTaskUUID, err := getTaskUUIDFromVM(vm) + if err != nil { + errorMsg := fmt.Errorf("error occurred fetching task UUID from vm: %v", err) + klog.Error(errorMsg) + return reconcile.Result{}, errorMsg + } + klog.Infof("%s checking if VM %s with UUID %s has in progress tasks", rctx.LogPrefix, vmName, vmUUID) + taskInProgress, err := hasTaskInProgress(rctx.NutanixClient, lastTaskUUID) + if err != nil { + klog.Warningf("%s error occurred while checking task %s for VM %s... err: %v ....Trying to delete VM", rctx.LogPrefix, lastTaskUUID, vmName, vmUUID, err) + } + if taskInProgress { + klog.Infof("VM %s task with UUID %s still in progress. Requeuing", vmName, vmUUID) + return reconcile.Result{RequeueAfter: 5 * time.Second}, nil + } + klog.Infof("%s No running tasks anymore... Initiating delete for vm %s with UUID %s", rctx.LogPrefix, vmName, vmUUID) + // Delete the VM since the VM was found (err was nil) + deleteTaskUUID, err := deleteVM(client, vmName, vmUUID) + if err != nil { + klog.Errorf("%s Failed to delete VM %s with UUID %s: %v", rctx.LogPrefix, vmName, vmUUID, err) + return reconcile.Result{}, err + } + klog.Infof("%s Deletion task with UUID %s received for vm %s with UUID %s. Requeueing", rctx.LogPrefix, deleteTaskUUID, vmName, vmUUID) + return reconcile.Result{RequeueAfter: 5 * time.Second}, nil + } } - // Remove the finalizer from the NutanixMachine object klog.Errorf("%s Removing finalizers for VM %s during delete reconciliation", rctx.LogPrefix, vmName) ctrlutil.RemoveFinalizer(rctx.NutanixMachine, infrav1.NutanixMachineFinalizer) @@ -211,6 +256,7 @@ func (r *NutanixMachineReconciler) reconcileNormal(rctx *nctx.MachineContext) (r ctrlutil.AddFinalizer(rctx.NutanixMachine, infrav1.NutanixMachineFinalizer) } + klog.Infof("%s Checking current machine status for machine %s: Status %+v Spec %+v", rctx.LogPrefix, rctx.NutanixMachine.Name, rctx.NutanixMachine.Status, rctx.NutanixMachine.Spec) if rctx.NutanixMachine.Status.Ready { if !rctx.Machine.Status.InfrastructureReady || rctx.Machine.Spec.ProviderID == nil { klog.Infof("%s The NutanixMachine is ready, wait for the owner Machine's update.", rctx.LogPrefix) @@ -230,6 +276,7 @@ func (r *NutanixMachineReconciler) reconcileNormal(rctx *nctx.MachineContext) (r } // Make sure Cluster.Status.InfrastructureReady is true + klog.Infof("%s Checking if cluster infrastructure is ready", rctx.LogPrefix) if !rctx.Cluster.Status.InfrastructureReady { klog.Infof("%s The cluster infrastructure is not ready yet", rctx.LogPrefix) return reconcile.Result{}, nil @@ -256,14 +303,34 @@ func (r *NutanixMachineReconciler) reconcileNormal(rctx *nctx.MachineContext) (r klog.Infof("%s Added the spec.bootstrapRef to NutanixMachine object: %v", rctx.LogPrefix, rctx.NutanixMachine.Spec.BootstrapRef) } - // Create the VM - err = r.createVM(rctx) + // Create the or get existing VM + vm, err := r.getOrCreateVM(rctx) if err != nil { klog.Errorf("%s Failed to create VM %s.", rctx.LogPrefix, rctx.NutanixMachine.Name) return reconcile.Result{}, err } - klog.Infof("%s Created VM with name: %s, vmUUID: %s", rctx.LogPrefix, rctx.NutanixMachine.Name, *rctx.NutanixMachine.Status.VmUUID) - + klog.Infof("%s Found VM with name: %s, vmUUID: %s", rctx.LogPrefix, rctx.NutanixMachine.Name, *vm.Metadata.UUID) + rctx.NutanixMachine.Status.VmUUID = *vm.Metadata.UUID + klog.Infof("%s Patching machine post creation name: %s, vmUUID: %s", rctx.LogPrefix, rctx.NutanixMachine.Name, rctx.NutanixMachine.Status.VmUUID) + err = r.patchMachine(rctx) + if err != nil { + errorMsg := fmt.Errorf("%s Failed to patch NutanixMachine %s after creation. %v", rctx.LogPrefix, rctx.NutanixMachine.Name, err) + klog.Error(errorMsg) + return reconcile.Result{}, errorMsg + } + klog.Infof("%s Assigning IP addresses to VM with name: %s, vmUUID: %s", rctx.LogPrefix, rctx.NutanixMachine.Name, rctx.NutanixMachine.Status.VmUUID) + err = r.assignAddressesToMachine(rctx, vm) + if err != nil { + errorMsg := fmt.Errorf("Failed to assign addresses to VM %s with UUID %s...: %v", rctx.NutanixMachine.Name, rctx.NutanixMachine.Status.VmUUID, err) + klog.Error(errorMsg) + return reconcile.Result{}, errorMsg + } + // Update the NutanixMachine Spec.ProviderID + rctx.NutanixMachine.Spec.ProviderID = fmt.Sprintf(provideridFmt, rctx.NutanixMachine.Status.VmUUID) + rctx.NutanixMachine.Status.Ready = true + klog.Infof("%s Created VM %s for cluster %s, update NutanixMachine spec.providerID to %s, and machinespec %+v, vmUuid: %s", + rctx.LogPrefix, rctx.NutanixMachine.Name, rctx.NutanixCluster.Name, rctx.NutanixMachine.Spec.ProviderID, + rctx.NutanixMachine, rctx.NutanixMachine.Status.VmUUID) return reconcile.Result{}, nil } @@ -298,8 +365,9 @@ func (r *NutanixMachineReconciler) reconcileNode(rctx *nctx.MachineContext) erro } if apierrors.IsNotFound(err) { - klog.Infof("%s Wait for the workload node %s to get ready ...", rctx.LogPrefix, nodeName) - time.Sleep(5 * time.Second) + errorMessage := fmt.Sprintf("%s workload node %s not yet ready ... Retrying", rctx.LogPrefix, nodeName) + klog.Errorf(errorMessage) + return fmt.Errorf(errorMessage) } else { klog.Errorf("%s Failed to retrieve the remote workload cluster node %s", rctx.LogPrefix, nodeName) return err @@ -335,56 +403,52 @@ func (r *NutanixMachineReconciler) reconcileNode(rctx *nctx.MachineContext) erro return nil } -// CreateVM creates a VM and is invoked by the NutanixMachineReconciler -func (r *NutanixMachineReconciler) createVM(rctx *nctx.MachineContext) error { +// GetOrCreateVM creates a VM and is invoked by the NutanixMachineReconciler +func (r *NutanixMachineReconciler) getOrCreateVM(rctx *nctx.MachineContext) (*nutanixClientV3.VMIntentResponse, error) { var err error - - client, err := nutanixClient.Client(nutanixClient.ClientOptions{}) - if err != nil { - return fmt.Errorf("Client Auth error: %v", err) - } - rctx.NutanixClient = client - var vm *nutanixClientV3.VMIntentResponse - var vmUuid string vmName := rctx.NutanixMachine.Name + client := rctx.NutanixClient // Check if the VM already exists - if rctx.NutanixMachine.Status.VmUUID != nil { - // Try to find the vm by uuid - vm, err = findVMByUUID(rctx, *rctx.NutanixMachine.Status.VmUUID) - if err == nil { - klog.Infof("%s The VM with UUID %s already exists. No need to create one.", rctx.LogPrefix, *rctx.NutanixMachine.Status.VmUUID) - vmUuid = *vm.Metadata.UUID - } + vm, err = findVM(client, rctx.NutanixMachine) + if err != nil { + klog.Errorf("%s error occurred finding VM %s by name or uuid %s: %v", rctx.LogPrefix, vmName, err) + return nil, err } - - if len(vmUuid) == 0 { - klog.Infof("%s Starting creation process of VM %s.", rctx.LogPrefix, vmName) + if vm != nil { + klog.Infof("%s vm %s found with UUID %s", rctx.LogPrefix, *vm.Spec.Name, rctx.NutanixMachine.Status.VmUUID) + return vm, nil + } else { + klog.Infof("%s No existing VM found. Starting creation process of VM %s.", rctx.LogPrefix, vmName) // Get PE UUID - peUUID, err := getPEUUID(rctx) + peUUID, err := getPEUUID(client, rctx.NutanixMachine.Spec.Cluster.Name, rctx.NutanixMachine.Spec.Cluster.UUID) if err != nil { klog.Errorf("%s Failed to get the Prism Element Cluster UUID to create the VM %s. %v", rctx.LogPrefix, vmName, err) - return err + return nil, err } - // Get Subnet UUID - subnetUUID, err := getSubnetUUID(rctx, peUUID) + // Get Subnet UUIDs + subnetUUIDs, err := getSubnetUUIDList(client, rctx.NutanixMachine.Spec.Subnets, peUUID) if err != nil { - klog.Errorf("%s Failed to get the subnet UUID to create the VM %s. %v", rctx.LogPrefix, vmName, err) - return err + klog.Errorf("%s Failed to get the subnet UUIDs to create the VM %s. %v", rctx.LogPrefix, vmName, err) + return nil, err } // Get Image UUID - imageUUID, err := getImageUUID(rctx) + imageUUID, err := getImageUUID( + client, + rctx.NutanixMachine.Spec.Image.Name, + rctx.NutanixMachine.Spec.Image.UUID, + ) if err != nil { klog.Errorf("%s Failed to get the image UUID to create the VM %s. %v", rctx.LogPrefix, vmName, err) - return err + return nil, err } // Get the bootstrapData from the referenced secret bootstrapData, err := r.getBootstrapData(rctx) if err != nil { klog.Errorf("%s Failed to get the bootstrap data for create the VM %s. %v", rctx.LogPrefix, vmName, err) - return err + return nil, err } // Encode the bootstrapData by base64 bsdataEncoded := base64.StdEncoding.EncodeToString(bootstrapData) @@ -396,23 +460,19 @@ func (r *NutanixMachineReconciler) createVM(rctx *nctx.MachineContext) error { rctx.NutanixMachine.Name, rctx.NutanixCluster.Name) vmInput := nutanixClientV3.VMIntentInput{} vmSpec := nutanixClientV3.VM{Name: utils.StringPtr(vmName)} - vmNic := &nutanixClientV3.VMNic{ - SubnetReference: &nutanixClientV3.Reference{ - UUID: utils.StringPtr(subnetUUID), - Kind: utils.StringPtr("subnet"), - }} - nicList := []*nutanixClientV3.VMNic{vmNic} - // If this is controlplane node Machine, use the cluster's spec.controlPlaneEndpoint host IP to create VM - if nctx.IsControlPlaneMachine(rctx.NutanixMachine) { - vmNic.IPEndpointList = []*nutanixClientV3.IPAddress{&nutanixClientV3.IPAddress{ - //Type: utils.StringPtr("ASSIGNED"), - IP: utils.StringPtr(rctx.NutanixCluster.Spec.ControlPlaneEndpoint.Host)}} + nicList := []*nutanixClientV3.VMNic{} + for _, subnetUUID := range subnetUUIDs { + nicList = append(nicList, &nutanixClientV3.VMNic{ + SubnetReference: &nutanixClientV3.Reference{ + UUID: utils.StringPtr(subnetUUID), + Kind: utils.StringPtr("subnet"), + }}) } diskSize := rctx.NutanixMachine.Spec.SystemDiskSize - diskSizeMib := GetMibValueOfQuantity(diskSize) + diskSizeMib := getMibValueOfQuantity(diskSize) systemDisk, err := createSystemDiskSpec(imageUUID, diskSizeMib) if err != nil { - return fmt.Errorf("error occurred while creating system disk spec: %v", err) + return nil, fmt.Errorf("error occurred while creating system disk spec: %v", err) } diskList := []*nutanixClientV3.VMDisk{ systemDisk, @@ -426,7 +486,7 @@ func (r *NutanixMachineReconciler) createVM(rctx *nctx.MachineContext) error { HardwareClockTimezone: utils.StringPtr("UTC"), NumVcpusPerSocket: utils.Int64Ptr(int64(rctx.NutanixMachine.Spec.VCPUsPerSocket)), NumSockets: utils.Int64Ptr(int64(rctx.NutanixMachine.Spec.VCPUSockets)), - MemorySizeMib: utils.Int64Ptr(GetMibValueOfQuantity(rctx.NutanixMachine.Spec.MemorySize)), + MemorySizeMib: utils.Int64Ptr(getMibValueOfQuantity(rctx.NutanixMachine.Spec.MemorySize)), NicList: nicList, DiskList: diskList, GuestCustomization: &nutanixClientV3.GuestCustomization{ @@ -440,124 +500,40 @@ func (r *NutanixMachineReconciler) createVM(rctx *nctx.MachineContext) error { vmInput.Spec = &vmSpec vmInput.Metadata = &vmMetadata - vm, err = client.V3.CreateVM(&vmInput) + vmResponse, err := client.V3.CreateVM(&vmInput) if err != nil { klog.Errorf("%s Failed to create VM %s. error: %v", rctx.LogPrefix, vmName, err) - return err + return nil, err } - vmUuid = *vm.Metadata.UUID + vmUuid := *vmResponse.Metadata.UUID klog.Infof("%s Sent the post request to create VM %s. Got the vm UUID: %s, status.state: %s", rctx.LogPrefix, - rctx.NutanixMachine.Name, vmUuid, *vm.Status.State) - // Wait for some time for the VM getting ready - time.Sleep(10 * time.Second) - } - - //Let's wait to vm's state to become "COMPLETE" - err = nutanixClient.WaitForGetVMComplete(client, vmUuid) - if err != nil { - klog.Errorf("%s Failed to get the vm with UUID %s. error: %v", rctx.LogPrefix, vmUuid, err) - return fmt.Errorf("Error retriving the created vm %s", rctx.NutanixMachine.Name) - } - - vm, err = findVMByUUID(rctx, vmUuid) - for err != nil { - klog.Errorf("%s Failed to find the vm with UUID %s. %v", rctx.LogPrefix, vmUuid, err) - return err - } - klog.Infof("%s The vm is ready. vmUUID: %s, state: %s", rctx.LogPrefix, vmUuid, *vm.Status.State) - - // Update the NutanixMachine status - rctx.NutanixMachine.Status.VmUUID = vm.Metadata.UUID - rctx.NutanixMachine.Status.Addresses = []capiv1.MachineAddress{} - rctx.IP = *vm.Status.Resources.NicList[0].IPEndpointList[0].IP - rctx.NutanixMachine.Status.Addresses = append(rctx.NutanixMachine.Status.Addresses, capiv1.MachineAddress{ - Type: capiv1.MachineInternalIP, - Address: rctx.IP, - }) - rctx.NutanixMachine.Status.Addresses = append(rctx.NutanixMachine.Status.Addresses, capiv1.MachineAddress{ - Type: capiv1.MachineHostName, - Address: *vm.Spec.Name, - }) - - // Update the NutanixMachine Spec.ProviderID - rctx.NutanixMachine.Spec.ProviderID = fmt.Sprintf(provideridFmt, *rctx.NutanixMachine.Status.VmUUID) - rctx.NutanixMachine.Status.Ready = true - klog.Infof("%s Created VM %s for cluster %s, update NutanixMachine spec.providerID to %s, and status %+v, vmUuid: %s", - rctx.LogPrefix, rctx.NutanixMachine.Name, rctx.NutanixCluster.Name, rctx.NutanixMachine.Spec.ProviderID, - rctx.NutanixMachine.Status, *rctx.NutanixMachine.Status.VmUUID) - - return nil -} - -// findVMByUUID retrieves the VM with the given vm UUID -func findVMByUUID(rctx *nctx.MachineContext, uuid string) (*nutanixClientV3.VMIntentResponse, error) { - - klog.Infof("%s Checking if VM with UUID %s exists.", rctx.LogPrefix, uuid) - - response, err := rctx.NutanixClient.V3.GetVM(uuid) - if err != nil { - klog.Errorf("%s Failed to find VM by vmUUID %s. error: %v", rctx.LogPrefix, uuid, err) - return nil, err - } - - return response, nil -} - -// findVMByName retrieves the VM with the given vm name -func findVMByName(rctx *nctx.MachineContext, vmName string) (*nutanixClientV3.VMIntentResource, error) { - klog.Infof("%s Checking if VM with name %s exists.", rctx.LogPrefix, vmName) - - res, err := rctx.NutanixClient.V3.ListVM(&nutanixClientV3.DSMetadata{ - Filter: utils.StringPtr(fmt.Sprintf("vm_name==%s", vmName))}) - if err != nil || len(res.Entities) == 0 { - klog.Errorf("%s Failed to find VM by name %s. error: %v", rctx.LogPrefix, vmName, err) - return nil, fmt.Errorf("Failed to find VM by name %s. error: %v", vmName, err) - } - - if len(res.Entities) > 1 { - klog.Warningf("%s Found more than one (%v) vms with name %s.", rctx.LogPrefix, len(res.Entities), vmName) - } - - return res.Entities[0], nil -} - -// deleteVM deletes a VM and is invoked by the NutanixMachineReconciler -//func deleteVM(ctx context.Context, cluster *infrav1.NutanixCluster, machine *infrav1.NutanixMachine, logPrefix string) error { -func deleteVM(rctx *nctx.MachineContext) error { - klog.Infof("Deleting VM %v for cluster %v.", rctx.NutanixMachine.Name, rctx.NutanixCluster.Name) - var err error - - client, err := nutanixClient.Client(nutanixClient.ClientOptions{}) - if err != nil { - return fmt.Errorf("Client Auth error: %v", err) - } - - if rctx.NutanixMachine.Status.VmUUID == nil { - klog.Warning(fmt.Sprintf("VmUUID not found in Status. Skipping delete")) - return nil - } - uuid := utils.StringValue(rctx.NutanixMachine.Status.VmUUID) - vmName := rctx.NutanixMachine.Name - klog.Infof("Deleting VM %s with UUID: %s", vmName, uuid) - _, err = client.V3.DeleteVM(uuid) - if err != nil { - klog.Infof("Error deleting machine %s", rctx.NutanixMachine.Name) - return err - } - - err = nutanixClient.WaitForGetVMDelete(client, uuid) - if err != nil { - klog.Errorf("VM %s failed to delete. %s", vmName, err.Error()) - // TODO find a better way to error check instead of string search comparison - if strings.Contains(err.Error(), "does not exist") { - klog.Infof("Successfully deleted vm %s with uuid %s", rctx.NutanixMachine.Name, uuid) - return nil + rctx.NutanixMachine.Name, vmUuid, *vmResponse.Status.State) + klog.Infof("%s Getting task uuid for VM %s", rctx.LogPrefix, + rctx.NutanixMachine.Name) + lastTaskUUID, err := getTaskUUIDFromVM(vmResponse) + if err != nil { + errorMsg := fmt.Errorf("%s error occurred fetching task UUID from vm %s after creation: %v", rctx.LogPrefix, rctx.NutanixMachine.Name, err) + klog.Error(errorMsg) + return nil, errorMsg + } + klog.Infof("%s Waiting for task %s to get completed for VM %s", rctx.LogPrefix, + lastTaskUUID, rctx.NutanixMachine.Name) + err = nutanixClient.WaitForTaskCompletion(client, lastTaskUUID) + if err != nil { + errorMsg := fmt.Errorf("%s error occurred while waiting for task %s to start: %v", rctx.LogPrefix, lastTaskUUID, err) + klog.Error(errorMsg) + return nil, errorMsg + } + klog.Infof("%s Fetching VM after creation %s", rctx.LogPrefix, + lastTaskUUID, rctx.NutanixMachine.Name) + vm, err = findVMByUUID(client, vmUuid) + if err != nil { + errorMsg := fmt.Errorf("%s error occurred while getting VM %s after creation: %v", rctx.LogPrefix, rctx.NutanixMachine.Name, err) + klog.Error(errorMsg) + return nil, errorMsg } - - return err } - - return nil + return vm, nil } // getBootstrapData returns the Bootstrap data from the ref secret @@ -588,170 +564,47 @@ func (r *NutanixMachineReconciler) getBootstrapData(rctx *nctx.MachineContext) ( return value, nil } -func getImageUUID(rctx *nctx.MachineContext) (string, error) { - client, err := nutanixClient.Client(nutanixClient.ClientOptions{Debug: true}) +func (r *NutanixMachineReconciler) patchMachine(rctx *nctx.MachineContext) error { + patchHelper, err := patch.NewHelper(rctx.NutanixMachine, r.Client) if err != nil { - klog.Errorf("%s Failed to create the nutanix client. %v", rctx.LogPrefix, err) - return "", fmt.Errorf("Client Auth error: %v", err) - } - rctx.NutanixClient = client - machineSpec := rctx.NutanixMachine.Spec - var foundImageUUID string - imageUUID := machineSpec.Image.UUID - imageName := machineSpec.Image.Name - if imageUUID == nil && imageName == nil { - return "", fmt.Errorf("image name or image uuid must be passed in order to retrieve the image") - } - if imageUUID != nil { - imageIntentResponse, err := client.V3.GetImage(*imageUUID) - if err != nil { - if strings.Contains(fmt.Sprint(err), "ENTITY_NOT_FOUND") { - return "", fmt.Errorf("failed to find image with UUID %s: %v", *imageUUID, err) - } - } - foundImageUUID = *imageIntentResponse.Metadata.UUID - } else if imageName != nil { - responseImages, err := client.V3.ListAllImage() - if err != nil { - return "", err - } - foundImages := make([]*nutanixClientV3.ImageIntentResponse, 0) - for _, s := range responseImages.Entities { - imageSpec := s.Spec - if *imageSpec.Name == *imageName { - foundImages = append(foundImages, s) - } - } - if len(foundImages) == 0 { - return "", fmt.Errorf("failed to retrieve image by name %s", *imageName) - } else if len(foundImages) > 1 { - return "", fmt.Errorf("more than one image found with name %s", *imageName) - } else { - foundImageUUID = *foundImages[0].Metadata.UUID - } - if foundImageUUID == "" { - return "", fmt.Errorf("failed to retrieve image by name or uuid. Verify input parameters.") - } + errorMsg := fmt.Errorf("%s Failed to create patch helper to patch machine %s: %v", rctx.LogPrefix, rctx.NutanixMachine.Name, err) + klog.Error(errorMsg) + return errorMsg } - return foundImageUUID, nil -} - -func getSubnetUUID(rctx *nctx.MachineContext, peUUID string) (string, error) { - client, err := nutanixClient.Client(nutanixClient.ClientOptions{Debug: true}) + err = patchHelper.Patch(rctx.Context, rctx.NutanixMachine) if err != nil { - klog.Errorf("%s Failed to create the nutanix client. %v", rctx.LogPrefix, err) - return "", fmt.Errorf("Client Auth error: %v", err) - } - rctx.NutanixClient = client - machineSpec := rctx.NutanixMachine.Spec - var foundSubnetUUID string - subnetUUID := machineSpec.Subnet.UUID - subnetName := machineSpec.Subnet.Name - if subnetUUID == nil && subnetName == nil { - return "", fmt.Errorf("subnet name or subnet uuid must be passed in order to retrieve the subnet") - } - if subnetUUID != nil { - subnetIntentResponse, err := client.V3.GetSubnet(*subnetUUID) - if err != nil { - if strings.Contains(fmt.Sprint(err), "ENTITY_NOT_FOUND") { - return "", fmt.Errorf("failed to find subnet with UUID %s: %v", *subnetUUID, err) - } - } - foundSubnetUUID = *subnetIntentResponse.Metadata.UUID - } else if subnetName != nil { - - responseSubnets, err := client.V3.ListAllSubnet() - if err != nil { - return "", err - } - foundSubnets := make([]*nutanixClientV3.SubnetIntentResponse, 0) - for _, s := range responseSubnets.Entities { - subnetSpec := s.Spec - if *subnetSpec.Name == *subnetName && *subnetSpec.ClusterReference.UUID == peUUID { - foundSubnets = append(foundSubnets, s) - } - } - if len(foundSubnets) == 0 { - return "", fmt.Errorf("failed to retrieve subnet by name %s", *subnetName) - } else if len(foundSubnets) > 1 { - return "", fmt.Errorf("more than one subnet found with name %s", *subnetName) - } else { - foundSubnetUUID = *foundSubnets[0].Metadata.UUID - } - if foundSubnetUUID == "" { - return "", fmt.Errorf("failed to retrieve subnet by name or uuid. Verify input parameters.") - } + errorMsg := fmt.Errorf("%s Failed to patch machine %s: %v", rctx.LogPrefix, rctx.NutanixMachine.Name, err) + klog.Error(errorMsg) + return errorMsg } - return foundSubnetUUID, nil + klog.Infof("%s Patched machine %s: Status %+v Spec %+v", rctx.LogPrefix, rctx.NutanixMachine.Name, rctx.NutanixMachine.Status, rctx.NutanixMachine.Spec) + return nil } -func getPEUUID(rctx *nctx.MachineContext) (string, error) { - client, err := nutanixClient.Client(nutanixClient.ClientOptions{Debug: true}) - if err != nil { - klog.Errorf("%s Failed to create the nutanix client. %v", rctx.LogPrefix, err) - return "", fmt.Errorf("Client Auth error: %v", err) - } - rctx.NutanixClient = client - machineSpec := rctx.NutanixMachine.Spec - var foundPEUUID string - peUUID := machineSpec.Cluster.UUID - peName := machineSpec.Cluster.Name - if peUUID == nil && peName == nil { - return "", fmt.Errorf("cluster name or uuid must be passed in order to retrieve the pe") - } - if peUUID != nil { - peIntentResponse, err := client.V3.GetCluster(*peUUID) - if err != nil { - if strings.Contains(fmt.Sprint(err), "ENTITY_NOT_FOUND") { - return "", fmt.Errorf("failed to find Prism Element cluster with UUID %s: %v", *peUUID, err) - } - } - foundPEUUID = *peIntentResponse.Metadata.UUID - } else if peName != nil { - - responsePEs, err := client.V3.ListAllCluster() - if err != nil { - return "", err - } - foundPEs := make([]*nutanixClientV3.ClusterIntentResource, 0) - for _, s := range responsePEs.Entities { - peSpec := s.Spec - if *peSpec.Name == *peName { - foundPEs = append(foundPEs, s) +func (r *NutanixMachineReconciler) assignAddressesToMachine(rctx *nctx.MachineContext, vm *nutanixClientV3.VMIntentResponse) error { + rctx.NutanixMachine.Status.Addresses = []capiv1.MachineAddress{} + if vm.Status == nil || vm.Status.Resources == nil { + return fmt.Errorf("unable to fetch network interfaces from VM. Retrying") + } + foundIPs := 0 + for _, nic := range vm.Status.Resources.NicList { + for _, ipEndpoint := range nic.IPEndpointList { + if ipEndpoint.IP != nil { + rctx.NutanixMachine.Status.Addresses = append(rctx.NutanixMachine.Status.Addresses, capiv1.MachineAddress{ + Type: capiv1.MachineInternalIP, + Address: *ipEndpoint.IP, + }) + foundIPs++ } } - if len(foundPEs) == 0 { - return "", fmt.Errorf("failed to retrieve Prism Element cluster by name %s", *peName) - } else if len(foundPEs) > 1 { - return "", fmt.Errorf("more than one Prism Element cluster found with name %s", *peName) - } else { - foundPEUUID = *foundPEs[0].Metadata.UUID - } - if foundPEUUID == "" { - return "", fmt.Errorf("failed to retrieve Prism Element cluster by name or uuid. Verify input parameters.") - } } - return foundPEUUID, nil -} - -// GetMibValueOfQuantity returns the given quantity value in Mib -func GetMibValueOfQuantity(quantity resource.Quantity) int64 { - return quantity.Value() / (1024 * 1024) -} - -func createSystemDiskSpec(imageUUID string, systemDiskSize int64) (*nutanixClientV3.VMDisk, error) { - if imageUUID == "" { - return nil, fmt.Errorf("image UUID must be set when creating system disk") - } - if systemDiskSize <= 0 { - return nil, fmt.Errorf("Invalid system disk size: %d. Provide in XXGi (for example 70Gi) format instead", systemDiskSize) + if foundIPs == 0 { + return fmt.Errorf("unable to determine network interfaces from VM. Retrying") } - systemDisk := &nutanixClientV3.VMDisk{ - DataSourceReference: &nutanixClientV3.Reference{ - Kind: utils.StringPtr("image"), - UUID: utils.StringPtr(imageUUID), - }, - DiskSizeMib: utils.Int64Ptr(systemDiskSize)} - return systemDisk, nil - + rctx.IP = rctx.NutanixMachine.Status.Addresses[0].Address + rctx.NutanixMachine.Status.Addresses = append(rctx.NutanixMachine.Status.Addresses, capiv1.MachineAddress{ + Type: capiv1.MachineHostName, + Address: *vm.Spec.Name, + }) + return nil } diff --git a/pkg/client/client.go b/pkg/client/client.go index 50ae3ee9fc..dd551393f6 100644 --- a/pkg/client/client.go +++ b/pkg/client/client.go @@ -3,6 +3,7 @@ package client import ( "fmt" "os" + "strings" "k8s.io/klog/v2" @@ -11,7 +12,8 @@ import ( ) const ( - ProviderName = "nutanix" + ProviderName = "nutanix" + debugModeName = "DEBUG_MODE" ) type ClientOptions struct { @@ -32,7 +34,7 @@ func Client(options ClientOptions) (*nutanixClientV3.Client, error) { Insecure: true, } - cli, err := nutanixClientV3.NewV3Client(cred, options.Debug) + cli, err := nutanixClientV3.NewV3Client(cred, debugMode(options)) if err != nil { klog.Errorf("Failed to create the nutanix client. error: %v", err) return nil, err @@ -41,6 +43,21 @@ func Client(options ClientOptions) (*nutanixClientV3.Client, error) { return cli, nil } +func debugMode(options ClientOptions) bool { + //Read environment variable to enable debug mode + debugModeEnv := getEnvVar(debugModeName) + if debugModeEnv != "" { + //See if env var is set to 'true', otherwise default to false + if strings.ToLower(debugModeEnv) == "true" { + return true + } else { + return false + } + } + // If env var not set -> use options + return options.Debug +} + func getEnvVar(key string) (val string) { if val, ok := os.LookupEnv(key); ok { return val diff --git a/pkg/client/state.go b/pkg/client/state.go index dffc50e77a..7f3f2b95f4 100644 --- a/pkg/client/state.go +++ b/pkg/client/state.go @@ -8,10 +8,22 @@ import ( "k8s.io/klog/v2" nutanixClientV3 "github.com/nutanix-core/cluster-api-provider-nutanix/pkg/nutanix/v3" + "github.com/nutanix-core/cluster-api-provider-nutanix/pkg/utils" ) type stateRefreshFunc func() (string, error) +func WaitForTaskCompletion(conn *nutanixClientV3.Client, uuid string) error { + errCh := make(chan error, 1) + go waitForState( + errCh, + "SUCCEEDED", + waitUntilTaskStateFunc(conn, uuid)) + + err := <-errCh + return err +} + func WaitForGetVMComplete(conn *nutanixClientV3.Client, vmUUID string) error { errCh := make(chan error, 1) go waitForState(errCh, "COMPLETE", waitUntilVMStateFunc(conn, vmUUID)) @@ -108,6 +120,31 @@ func waitUntilSubnetStateFunc(conn *nutanixClientV3.Client, uuid string) stateRe } } +func waitUntilTaskStateFunc(conn *nutanixClientV3.Client, uuid string) stateRefreshFunc { + return func() (string, error) { + return GetTaskState(conn, uuid) + } +} + +func GetTaskState(client *nutanixClientV3.Client, taskUUID string) (string, error) { + + klog.Infof("Getting task with UUID %s", taskUUID) + v, err := client.V3.GetTask(taskUUID) + + if err != nil { + klog.Errorf("error occurred while waiting for task with UUID %s: %v", taskUUID, err) + return "", err + } + + if *v.Status == "INVALID_UUID" || *v.Status == "FAILED" { + return *v.Status, + fmt.Errorf("error_detail: %s, progress_message: %s", utils.StringValue(v.ErrorDetail), utils.StringValue(v.ProgressMessage)) + } + taskStatus := *v.Status + klog.Infof("Status for task with UUID %s: %s", taskUUID, taskStatus) + return taskStatus, nil +} + // RetryableFunc performs an action and returns a bool indicating whether the // function is done, or if it should keep retrying, and an error which will // abort the retry and be returned by the Retry function. The 0-indexed attempt