Skip to content

Commit

Permalink
Detach volume groups before deleting machine (#460)
Browse files Browse the repository at this point in the history
* Detach volume groups before deleting machine

This commit is a workaround to a known problem with hypervisor attached
volumes that are created by CSI 3.0. Hypervisor attached volumes result
in a volume group disk being attached to the VM on which the pod consuming
the PV is running. When a cluster is deleted before deleting the PVC, the
prism task fails to delete the VM because an existing volume group is attached
to the VM. Since this problem is restricted to CSI 3.0 which relies on a
minimum PC version of 2024.1 and the solution requires the volume group
detach v4 API which is also available starting 2024.1, we make a v3
get prism central info call to fetch the PC version. If PC version is
2024 or greater we create a v4 client for the cluster as well. Before a machine
is deleted, we check if the VM backing the machine has any disks backed by
volume groups. If it does, we detach those volume groups before deleting the
machine.

* Handle internal PC Version names in v4 compatibility check

* Address review comments

* Address more comments

* go mod tidy
  • Loading branch information
thunderboltsid authored Jul 26, 2024
1 parent 9ebba60 commit 412d9d4
Show file tree
Hide file tree
Showing 27 changed files with 5,737 additions and 669 deletions.
2 changes: 2 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -261,6 +261,7 @@ cluster-e2e-templates-v1beta1: ## Generate cluster templates for v1beta1
kustomize build $(NUTANIX_E2E_TEMPLATES)/v1beta1/cluster-template-kcp-remediation --load-restrictor LoadRestrictionsNone > $(NUTANIX_E2E_TEMPLATES)/v1beta1/cluster-template-kcp-remediation.yaml
kustomize build $(NUTANIX_E2E_TEMPLATES)/v1beta1/cluster-template-kcp-scale-in --load-restrictor LoadRestrictionsNone > $(NUTANIX_E2E_TEMPLATES)/v1beta1/cluster-template-kcp-scale-in.yaml
kustomize build $(NUTANIX_E2E_TEMPLATES)/v1beta1/cluster-template-csi --load-restrictor LoadRestrictionsNone > $(NUTANIX_E2E_TEMPLATES)/v1beta1/cluster-template-csi.yaml
kustomize build $(NUTANIX_E2E_TEMPLATES)/v1beta1/cluster-template-csi3 --load-restrictor LoadRestrictionsNone > $(NUTANIX_E2E_TEMPLATES)/v1beta1/cluster-template-csi3.yaml
kustomize build $(NUTANIX_E2E_TEMPLATES)/v1beta1/cluster-template-failure-domains --load-restrictor LoadRestrictionsNone > $(NUTANIX_E2E_TEMPLATES)/v1beta1/cluster-template-failure-domains.yaml
kustomize build $(NUTANIX_E2E_TEMPLATES)/v1beta1/cluster-template-clusterclass --load-restrictor LoadRestrictionsNone > $(NUTANIX_E2E_TEMPLATES)/v1beta1/cluster-template-clusterclass.yaml
kustomize build $(NUTANIX_E2E_TEMPLATES)/v1beta1/cluster-template-clusterclass --load-restrictor LoadRestrictionsNone > $(NUTANIX_E2E_TEMPLATES)/v1beta1/clusterclass-nutanix-quick-start.yaml
Expand All @@ -287,6 +288,7 @@ cluster-e2e-templates-no-kubeproxy: ##Generate cluster templates without kubepro
cluster-templates: ## Generate cluster templates for all flavors
kustomize build $(TEMPLATES_DIR)/base > $(TEMPLATES_DIR)/cluster-template.yaml
kustomize build $(TEMPLATES_DIR)/csi > $(TEMPLATES_DIR)/cluster-template-csi.yaml
kustomize build $(TEMPLATES_DIR)/csi3 > $(TEMPLATES_DIR)/cluster-template-csi3.yaml
kustomize build $(TEMPLATES_DIR)/clusterclass > $(TEMPLATES_DIR)/cluster-template-clusterclass.yaml
kustomize build $(TEMPLATES_DIR)/topology > $(TEMPLATES_DIR)/cluster-template-topology.yaml

Expand Down
8 changes: 6 additions & 2 deletions api/v1beta1/conditions.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,8 @@ import capiv1 "sigs.k8s.io/cluster-api/api/v1beta1"

const (
DeletionFailed = "DeletionFailed"

VolumeGroupDetachFailed = "VolumeGroupDetachFailed"
)

const (
Expand All @@ -42,9 +44,11 @@ const (

const (
// PrismCentralClientCondition indicates the status of the client used to connect to Prism Central
PrismCentralClientCondition capiv1.ConditionType = "PrismClientInit"
PrismCentralClientCondition capiv1.ConditionType = "PrismClientInit"
PrismCentralV4ClientCondition capiv1.ConditionType = "PrismClientV4Init"

PrismCentralClientInitializationFailed = "PrismClientInitFailed"
PrismCentralClientInitializationFailed = "PrismClientInitFailed"
PrismCentralV4ClientInitializationFailed = "PrismClientV4InitFailed"
)

const (
Expand Down
191 changes: 189 additions & 2 deletions controllers/helpers.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,14 +18,24 @@ package controllers

import (
"context"
"errors"
"fmt"
"reflect"
"strconv"
"strings"
"time"

"github.com/google/uuid"
"github.com/nutanix-cloud-native/prism-go-client/utils"
prismclientv3 "github.com/nutanix-cloud-native/prism-go-client/v3"
prismclientv4 "github.com/nutanix-cloud-native/prism-go-client/v4"
prismcommonconfig "github.com/nutanix/ntnx-api-golang-clients/prism-go-client/v4/models/common/v1/config"
prismapi "github.com/nutanix/ntnx-api-golang-clients/prism-go-client/v4/models/prism/v4/config"
vmconfig "github.com/nutanix/ntnx-api-golang-clients/vmm-go-client/v4/models/vmm/v4/ahv/config"
prismconfig "github.com/nutanix/ntnx-api-golang-clients/volumes-go-client/v4/models/prism/v4/config"
volumesconfig "github.com/nutanix/ntnx-api-golang-clients/volumes-go-client/v4/models/volumes/v4/config"
"k8s.io/apimachinery/pkg/api/resource"
"k8s.io/apimachinery/pkg/util/wait"
v1 "k8s.io/client-go/informers/core/v1"
capiv1 "sigs.k8s.io/cluster-api/api/v1beta1"
"sigs.k8s.io/cluster-api/util/conditions"
Expand All @@ -44,6 +54,8 @@ const (
subnetTypeOverlay = "OVERLAY"

gpuUnused = "UNUSED"

pollingInterval = time.Second * 2
)

// DeleteVM deletes a VM and is invoked by the NutanixMachineReconciler
Expand Down Expand Up @@ -771,11 +783,186 @@ func getPrismCentralClientForCluster(ctx context.Context, cluster *infrav1.Nutan
PrismManagementEndpoint: managementEndpoint,
})
if err != nil {
log.Error(err, "error occurred while getting nutanix prism client from cache")
log.Error(err, "error occurred while getting nutanix prism v3 Client from cache")
conditions.MarkFalse(cluster, infrav1.PrismCentralClientCondition, infrav1.PrismCentralClientInitializationFailed, capiv1.ConditionSeverityError, err.Error())
return nil, fmt.Errorf("nutanix prism client error: %w", err)
return nil, fmt.Errorf("nutanix prism v3 Client error: %w", err)
}

conditions.MarkTrue(cluster, infrav1.PrismCentralClientCondition)
return v3Client, nil
}

func getPrismCentralV4ClientForCluster(ctx context.Context, cluster *infrav1.NutanixCluster, secretInformer v1.SecretInformer, mapInformer v1.ConfigMapInformer) (*prismclientv4.Client, error) {
log := ctrl.LoggerFrom(ctx)

clientHelper := nutanixclient.NewHelper(secretInformer, mapInformer)
managementEndpoint, err := clientHelper.BuildManagementEndpoint(ctx, cluster)
if err != nil {
log.Error(err, fmt.Sprintf("error occurred while getting management endpoint for cluster %q", cluster.GetNamespacedName()))
conditions.MarkFalse(cluster, infrav1.PrismCentralV4ClientCondition, infrav1.PrismCentralV4ClientInitializationFailed, capiv1.ConditionSeverityError, err.Error())
return nil, err
}

client, err := nutanixclient.NutanixClientCacheV4.GetOrCreate(&nutanixclient.CacheParams{
NutanixCluster: cluster,
PrismManagementEndpoint: managementEndpoint,
})
if err != nil {
log.Error(err, "error occurred while getting nutanix prism v4 client from cache")
conditions.MarkFalse(cluster, infrav1.PrismCentralV4ClientCondition, infrav1.PrismCentralV4ClientInitializationFailed, capiv1.ConditionSeverityError, err.Error())
return nil, fmt.Errorf("nutanix prism v4 client error: %w", err)
}

conditions.MarkTrue(cluster, infrav1.PrismCentralV4ClientCondition)
return client, nil
}

// isPrismCentralV4Compatible checks if the Prism Central is v4 API compatible
func isPrismCentralV4Compatible(ctx context.Context, v3Client *prismclientv3.Client) (bool, error) {
log := ctrl.LoggerFrom(ctx)
internalPCNames := []string{"master", "fraser"}
pcVersion, err := getPrismCentralVersion(ctx, v3Client)
if err != nil {
return false, fmt.Errorf("failed to get Prism Central version: %w", err)
}

// Check if the version is v4 compatible
// PC versions look like pc.2024.1.0.1
// We can check if the version is greater than or equal to 2024

if pcVersion == "" {
return false, errors.New("prism central version is empty")
}

for _, internalPCName := range internalPCNames {
// TODO(sid): This is a naive check to see if the PC version is an internal build. This can potentially lead to failures
// if internal fraser build is not v4 compatible.
if strings.Contains(pcVersion, internalPCName) {
log.Info(fmt.Sprintf("Prism Central version %s is an internal build; assuming it is v4 compatible", pcVersion))
return true, nil
}
}

// Remove the prefix "pc."
version := strings.TrimPrefix(pcVersion, "pc.")
// Split the version string by "." to extract the year part
parts := strings.Split(version, ".")
if len(parts) < 1 {
return false, errors.New("invalid version format")
}

// Convert the year part to an integer
year, err := strconv.Atoi(parts[0])
if err != nil {
return false, errors.New("invalid version: failed to parse year from PC version")
}

if year >= 2024 {
return true, nil
}

return false, nil
}

// getPrismCentralVersion returns the version of the Prism Central instance
func getPrismCentralVersion(ctx context.Context, v3Client *prismclientv3.Client) (string, error) {
pcInfo, err := v3Client.V3.GetPrismCentral(ctx)
if err != nil {
return "", err
}

if pcInfo.Resources == nil || pcInfo.Resources.Version == nil {
return "", fmt.Errorf("failed to get Prism Central version")
}

return *pcInfo.Resources.Version, nil
}

func detachVolumeGroupsFromVM(ctx context.Context, v4Client *prismclientv4.Client, vmUUID string) error {
log := ctrl.LoggerFrom(ctx)
vmResp, err := v4Client.VmApiInstance.GetVmById(&vmUUID)
if err != nil {
return fmt.Errorf("failed to get virtual machine: %w", err)
}

data := vmResp.GetData()
vm, ok := data.(vmconfig.Vm)
if !ok {
return fmt.Errorf("failed to cast response to VM")
}

volumeGroupsToDetach := make([]string, 0)
for _, disk := range vm.Disks {
backingInfo, ok := disk.GetBackingInfo().(vmconfig.ADSFVolumeGroupReference)
if !ok {
continue
}

volumeGroupsToDetach = append(volumeGroupsToDetach, *backingInfo.VolumeGroupExtId)
}

// Detach the volume groups from the virtual machine
for _, volumeGroup := range volumeGroupsToDetach {
log.Info(fmt.Sprintf("detaching volume group %s from virtual machine %s", volumeGroup, vmUUID))
body := &volumesconfig.VmAttachment{
ExtId: &vmUUID,
}
resp, err := v4Client.VolumeGroupsApiInstance.DetachVm(&volumeGroup, body)
if err != nil {
return fmt.Errorf("failed to detach volume group %s from virtual machine %s: %w", volumeGroup, vmUUID, err)
}

data := resp.GetData()
task, ok := data.(prismconfig.TaskReference)
if !ok {
return fmt.Errorf("failed to cast response to TaskReference")
}

// Wait for the task to complete
if _, err := waitForTaskCompletionV4(ctx, v4Client, *task.ExtId); err != nil {
return fmt.Errorf("failed to wait for task %s to complete: %w", *task.ExtId, err)
}
}

return nil
}

// waitForTaskCompletionV4 waits for a task to complete and returns the completion details
// of the task. The function will poll the task status every 100ms until the task is
// completed or the context is cancelled.
func waitForTaskCompletionV4(ctx context.Context, v4Client *prismclientv4.Client, taskID string) ([]prismcommonconfig.KVPair, error) {
var data []prismcommonconfig.KVPair

if err := wait.PollUntilContextCancel(
ctx,
pollingInterval,
true,
func(ctx context.Context) (done bool, err error) {
task, err := v4Client.TasksApiInstance.GetTaskById(utils.StringPtr(taskID))
if err != nil {
return false, fmt.Errorf("failed to get task %s: %w", taskID, err)
}

taskData, ok := task.GetData().(prismapi.Task)
if !ok {
return false, fmt.Errorf("unexpected task data type %[1]T: %+[1]v", task.GetData())
}

if taskData.Status == nil {
return false, nil
}

if *taskData.Status != prismapi.TASKSTATUS_SUCCEEDED {
return false, nil
}

data = taskData.CompletionDetails

return true, nil
},
); err != nil {
return nil, fmt.Errorf("failed to wait for task %s to complete: %w", taskID, err)
}

return data, nil
}
20 changes: 18 additions & 2 deletions controllers/nutanixcluster_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ import (
"sigs.k8s.io/controller-runtime/pkg/source"

infrav1 "github.com/nutanix-cloud-native/cluster-api-provider-nutanix/api/v1beta1"
nutanixClient "github.com/nutanix-cloud-native/cluster-api-provider-nutanix/pkg/client"
nutanixclient "github.com/nutanix-cloud-native/cluster-api-provider-nutanix/pkg/client"
nctx "github.com/nutanix-cloud-native/cluster-api-provider-nutanix/pkg/context"
)

Expand Down Expand Up @@ -202,6 +202,21 @@ func (r *NutanixClusterReconciler) Reconcile(ctx context.Context, req ctrl.Reque
NutanixClient: v3Client,
}

createV4Client, err := isPrismCentralV4Compatible(ctx, v3Client)
if err != nil {
log.Error(err, "error occurred while checking environment compatibility for Prism Central v4 APIs")
}

if createV4Client {
v4Client, err := getPrismCentralV4ClientForCluster(ctx, cluster, r.SecretInformer, r.ConfigMapInformer)
if err != nil {
log.Error(err, "error occurred while fetching Prism Central v4 client")
return reconcile.Result{}, err
}

rctx.NutanixClientV4 = v4Client
}

// Check for request action
if !cluster.DeletionTimestamp.IsZero() {
// NutanixCluster is being deleted
Expand Down Expand Up @@ -236,7 +251,8 @@ func (r *NutanixClusterReconciler) reconcileDelete(rctx *nctx.ClusterContext) (r

// delete the client from the cache
log.Info(fmt.Sprintf("deleting nutanix prism client for cluster %s from cache", rctx.NutanixCluster.GetNamespacedName()))
nutanixClient.NutanixClientCache.Delete(&nutanixClient.CacheParams{NutanixCluster: rctx.NutanixCluster})
nutanixclient.NutanixClientCache.Delete(&nutanixclient.CacheParams{NutanixCluster: rctx.NutanixCluster})
nutanixclient.NutanixClientCacheV4.Delete(&nutanixclient.CacheParams{NutanixCluster: rctx.NutanixCluster})

if err := r.reconcileCredentialRefDelete(rctx.Context, rctx.NutanixCluster); err != nil {
log.Error(err, fmt.Sprintf("error occurred while reconciling credential ref deletion for cluster %s", rctx.Cluster.Name))
Expand Down
Loading

0 comments on commit 412d9d4

Please sign in to comment.