From 0573b8bac7e32396998a58aff461391ce3accc09 Mon Sep 17 00:00:00 2001 From: Michael Burman Date: Wed, 4 Sep 2024 13:59:31 +0300 Subject: [PATCH] Change default labels to use Kubernetes resource datacenter name and not datacenter override name. Create new Status field for MetadataVersion and if we keep running old ones (or old CRD), we simply fetch also with the older possible name --- .../v1beta1/cassandradatacenter_types.go | 27 +++++++--- ...dra.datastax.com_cassandradatacenters.yaml | 3 ++ .../control/cassandratask_controller.go | 2 +- .../construct_podtemplatespec.go | 4 +- pkg/reconciliation/construct_statefulset.go | 2 +- .../construct_statefulset_test.go | 4 +- pkg/reconciliation/constructor.go | 7 ++- pkg/reconciliation/context.go | 8 +-- pkg/reconciliation/handler_reconcile_test.go | 8 +-- pkg/reconciliation/handler_test.go | 6 ++- pkg/reconciliation/reconcile_configsecret.go | 2 +- pkg/reconciliation/reconcile_datacenter.go | 5 +- pkg/reconciliation/reconcile_fql.go | 2 +- pkg/reconciliation/reconcile_racks.go | 52 +++++++++++++------ scripts/release-helm-chart.sh | 2 +- 15 files changed, 89 insertions(+), 45 deletions(-) diff --git a/apis/cassandra/v1beta1/cassandradatacenter_types.go b/apis/cassandra/v1beta1/cassandradatacenter_types.go index ec9aa20e..8a628e93 100644 --- a/apis/cassandra/v1beta1/cassandradatacenter_types.go +++ b/apis/cassandra/v1beta1/cassandradatacenter_types.go @@ -485,6 +485,9 @@ type CassandraDatacenterStatus struct { // This field is used to perform validation checks preventing a user from changing the override // +optional DatacenterName *string `json:"datacenterName,omitempty"` + + // +optional + MetadataVersion int64 `json:"metadataVersion,omitempty"` } // CassandraDatacenter is the Schema for the cassandradatacenters API @@ -596,7 +599,7 @@ func (dc *CassandraDatacenter) SetCondition(condition DatacenterCondition) { // GetDatacenterLabels ... func (dc *CassandraDatacenter) GetDatacenterLabels() map[string]string { labels := dc.GetClusterLabels() - labels[DatacenterLabel] = CleanLabelValue(dc.DatacenterName()) + labels[DatacenterLabel] = CleanLabelValue(dc.Name) return labels } @@ -661,19 +664,19 @@ func (dc *CassandraDatacenter) GetSeedServiceName() string { } func (dc *CassandraDatacenter) GetAdditionalSeedsServiceName() string { - return CleanupForKubernetes(dc.Spec.ClusterName) + "-" + dc.SanitizedName() + "-additional-seed-service" + return CleanupForKubernetes(dc.Spec.ClusterName) + "-" + dc.LabelResourceName() + "-additional-seed-service" } func (dc *CassandraDatacenter) GetAllPodsServiceName() string { - return CleanupForKubernetes(dc.Spec.ClusterName) + "-" + dc.SanitizedName() + "-all-pods-service" + return CleanupForKubernetes(dc.Spec.ClusterName) + "-" + dc.LabelResourceName() + "-all-pods-service" } func (dc *CassandraDatacenter) GetDatacenterServiceName() string { - return CleanupForKubernetes(dc.Spec.ClusterName) + "-" + dc.SanitizedName() + "-service" + return CleanupForKubernetes(dc.Spec.ClusterName) + "-" + dc.LabelResourceName() + "-service" } func (dc *CassandraDatacenter) GetNodePortServiceName() string { - return CleanupForKubernetes(dc.Spec.ClusterName) + "-" + dc.SanitizedName() + "-node-port-service" + return CleanupForKubernetes(dc.Spec.ClusterName) + "-" + dc.LabelResourceName() + "-node-port-service" } func (dc *CassandraDatacenter) ShouldGenerateSuperuserSecret() bool { @@ -970,9 +973,17 @@ func SplitRacks(nodeCount, rackCount int) []int { return topology } -// SanitizedName returns a sanitized version of the name returned by DatacenterName() -func (dc *CassandraDatacenter) SanitizedName() string { - return CleanupForKubernetes(dc.DatacenterName()) +func (dc *CassandraDatacenter) DatacenterNameStatus() bool { + return dc.Status.DatacenterName != nil +} + +// LabelResourceName returns a sanitized version of the name returned by DatacenterName() +func (dc *CassandraDatacenter) LabelResourceName() string { + // If existing cluster, return dc.DatacenterName() else return dc.Name + if dc.DatacenterNameStatus() { + return CleanupForKubernetes(*dc.Status.DatacenterName) + } + return CleanupForKubernetes(dc.Name) } // DatacenterName returns the Cassandra DC name override if it exists, diff --git a/config/crd/bases/cassandra.datastax.com_cassandradatacenters.yaml b/config/crd/bases/cassandra.datastax.com_cassandradatacenters.yaml index 2d7a9493..83d5ad0e 100644 --- a/config/crd/bases/cassandra.datastax.com_cassandradatacenters.yaml +++ b/config/crd/bases/cassandra.datastax.com_cassandradatacenters.yaml @@ -11221,6 +11221,9 @@ spec: with the management API format: date-time type: string + metadataVersion: + format: int64 + type: integer nodeReplacements: items: type: string diff --git a/internal/controllers/control/cassandratask_controller.go b/internal/controllers/control/cassandratask_controller.go index 45bd78fd..4d76c7d8 100644 --- a/internal/controllers/control/cassandratask_controller.go +++ b/internal/controllers/control/cassandratask_controller.go @@ -201,7 +201,7 @@ func (r *CassandraTaskReconciler) Reconcile(ctx context.Context, req ctrl.Reques return ctrl.Result{}, errors.Wrapf(err, "unable to fetch target CassandraDatacenter: %s", cassTask.Spec.Datacenter) } - logger = log.FromContext(ctx, "datacenterName", dc.SanitizedName(), "clusterName", dc.Spec.ClusterName) + logger = log.FromContext(ctx, "datacenterName", dc.LabelResourceName(), "clusterName", dc.Spec.ClusterName) log.IntoContext(ctx, logger) // If we're active, we can proceed - otherwise verify if we're allowed to run diff --git a/pkg/reconciliation/construct_podtemplatespec.go b/pkg/reconciliation/construct_podtemplatespec.go index 2dfc83e7..24006730 100644 --- a/pkg/reconciliation/construct_podtemplatespec.go +++ b/pkg/reconciliation/construct_podtemplatespec.go @@ -351,7 +351,7 @@ func addVolumes(dc *api.CassandraDatacenter, baseTemplate *corev1.PodTemplateSpe Name: "encryption-cred-storage", VolumeSource: corev1.VolumeSource{ Secret: &corev1.SecretVolumeSource{ - SecretName: fmt.Sprintf("%s-keystore", dc.SanitizedName()), + SecretName: fmt.Sprintf("%s-keystore", dc.LabelResourceName()), }, }, } @@ -558,7 +558,7 @@ func getConfigDataEnVars(dc *api.CassandraDatacenter) ([]corev1.EnvVar, error) { return envVars, nil } - return nil, fmt.Errorf("datacenter %s is missing %s annotation", dc.SanitizedName(), api.ConfigHashAnnotation) + return nil, fmt.Errorf("datacenter %s is missing %s annotation", dc.LabelResourceName(), api.ConfigHashAnnotation) } configData, err := dc.GetConfigAsJSON(dc.Spec.Config) diff --git a/pkg/reconciliation/construct_statefulset.go b/pkg/reconciliation/construct_statefulset.go index f9227710..a8ed4fbc 100644 --- a/pkg/reconciliation/construct_statefulset.go +++ b/pkg/reconciliation/construct_statefulset.go @@ -26,7 +26,7 @@ func newNamespacedNameForStatefulSet( dc *api.CassandraDatacenter, rackName string) types.NamespacedName { - name := api.CleanupForKubernetes(dc.Spec.ClusterName) + "-" + dc.SanitizedName() + "-" + api.CleanupSubdomain(rackName) + "-sts" + name := api.CleanupForKubernetes(dc.Spec.ClusterName) + "-" + dc.LabelResourceName() + "-" + api.CleanupSubdomain(rackName) + "-sts" ns := dc.Namespace return types.NamespacedName{ diff --git a/pkg/reconciliation/construct_statefulset_test.go b/pkg/reconciliation/construct_statefulset_test.go index a0530b9e..f53983c1 100644 --- a/pkg/reconciliation/construct_statefulset_test.go +++ b/pkg/reconciliation/construct_statefulset_test.go @@ -641,7 +641,7 @@ func Test_newStatefulSetForCassandraDatacenter_dcNameOverride(t *testing.T) { oplabels.NameLabel: oplabels.NameLabelValue, oplabels.CreatedByLabel: oplabels.CreatedByLabelValue, oplabels.VersionLabel: "4.0.1", - api.DatacenterLabel: "MySuperDC", + api.DatacenterLabel: "dc1", api.ClusterLabel: "piclem", api.RackLabel: dc.Spec.Racks[0].Name, } @@ -652,7 +652,7 @@ func Test_newStatefulSetForCassandraDatacenter_dcNameOverride(t *testing.T) { oplabels.NameLabel: oplabels.NameLabelValue, oplabels.CreatedByLabel: oplabels.CreatedByLabelValue, oplabels.VersionLabel: "4.0.1", - api.DatacenterLabel: "MySuperDC", + api.DatacenterLabel: "dc1", api.ClusterLabel: "piclem", api.RackLabel: dc.Spec.Racks[0].Name, api.CassNodeState: stateReadyToStart, diff --git a/pkg/reconciliation/constructor.go b/pkg/reconciliation/constructor.go index edb29ab2..ea8af3af 100644 --- a/pkg/reconciliation/constructor.go +++ b/pkg/reconciliation/constructor.go @@ -31,7 +31,7 @@ func newPodDisruptionBudgetForDatacenter(dc *api.CassandraDatacenter) *policyv1. pdb := &policyv1.PodDisruptionBudget{ ObjectMeta: metav1.ObjectMeta{ - Name: dc.SanitizedName() + "-pdb", + Name: dc.LabelResourceName() + "-pdb", Namespace: dc.Namespace, Labels: labels, Annotations: anns, @@ -62,8 +62,11 @@ func setOperatorProgressStatus(rc *ReconciliationContext, newState api.ProgressS rc.Datacenter.Status.CassandraOperatorProgress = newState if newState == api.ProgressReady { + if rc.Datacenter.Status.MetadataVersion < 1 { + rc.Datacenter.Status.MetadataVersion = 1 + } if rc.Datacenter.Status.DatacenterName == nil { - rc.Datacenter.Status.DatacenterName = &rc.Datacenter.Spec.DatacenterName + rc.Datacenter.Status.DatacenterName = &rc.Datacenter.Name } } if err := rc.Client.Status().Patch(rc.Ctx, rc.Datacenter, patch); err != nil { diff --git a/pkg/reconciliation/context.go b/pkg/reconciliation/context.go index c3e7dbb0..1069103b 100644 --- a/pkg/reconciliation/context.go +++ b/pkg/reconciliation/context.go @@ -92,7 +92,7 @@ func CreateReconciliationContext( } rc.ReqLogger = rc.ReqLogger. - WithValues("datacenterName", dc.SanitizedName()). + WithValues("datacenterName", dc.LabelResourceName()). WithValues("clusterName", dc.Spec.ClusterName) log.IntoContext(ctx, rc.ReqLogger) @@ -146,8 +146,8 @@ func (rc *ReconciliationContext) validateDatacenterNameConflicts() []error { errs = append(errs, fmt.Errorf("failed to list CassandraDatacenters in namespace %s: %w", dc.Namespace, err)) } else { for _, existingDc := range cassandraDatacenters.Items { - if existingDc.SanitizedName() == dc.SanitizedName() && existingDc.Name != dc.Name { - errs = append(errs, fmt.Errorf("datacenter name/override %s/%s is already in use by CassandraDatacenter %s/%s", dc.Name, dc.SanitizedName(), existingDc.Name, existingDc.SanitizedName())) + if existingDc.LabelResourceName() == dc.LabelResourceName() && existingDc.Name != dc.Name { + errs = append(errs, fmt.Errorf("datacenter name/override %s/%s is already in use by CassandraDatacenter %s/%s", dc.Name, dc.LabelResourceName(), existingDc.Name, existingDc.LabelResourceName())) } } } @@ -164,7 +164,7 @@ func (rc *ReconciliationContext) validateDatacenterNameOverride() []error { return errs } else { if *dc.Status.DatacenterName != dc.Spec.DatacenterName { - errs = append(errs, fmt.Errorf("datacenter %s name override '%s' cannot be changed after creation to '%s'.", dc.Name, dc.Spec.DatacenterName, *dc.Status.DatacenterName)) + errs = append(errs, fmt.Errorf("datacenter %s name override '%s' cannot be changed after creation to '%s'", dc.Name, dc.Spec.DatacenterName, *dc.Status.DatacenterName)) } } diff --git a/pkg/reconciliation/handler_reconcile_test.go b/pkg/reconciliation/handler_reconcile_test.go index 5d995235..1b66984d 100644 --- a/pkg/reconciliation/handler_reconcile_test.go +++ b/pkg/reconciliation/handler_reconcile_test.go @@ -19,6 +19,7 @@ import ( "k8s.io/apimachinery/pkg/types" "k8s.io/client-go/kubernetes/scheme" "k8s.io/client-go/tools/record" + ctrl "sigs.k8s.io/controller-runtime" "sigs.k8s.io/controller-runtime/pkg/client" "sigs.k8s.io/controller-runtime/pkg/client/fake" "sigs.k8s.io/controller-runtime/pkg/reconcile" @@ -26,7 +27,7 @@ import ( func TestReconcile(t *testing.T) { var ( - name = "cluster-example-cluster" + name = "dc1-example" namespace = "default" size int32 = 2 ) @@ -74,6 +75,7 @@ func TestReconcile(t *testing.T) { Client: fakeClient, Scheme: s, Recorder: record.NewFakeRecorder(100), + Log: ctrl.Log.WithName("controllers").WithName("CassandraDatacenter"), } request := reconcile.Request{ @@ -88,8 +90,8 @@ func TestReconcile(t *testing.T) { t.Fatalf("Reconciliation Failure: (%v)", err) } - if result != (reconcile.Result{Requeue: true, RequeueAfter: 2 * time.Second}) { - t.Error("Reconcile did not return a correct result.") + if result != (reconcile.Result{Requeue: true, RequeueAfter: 10 * time.Second}) { + t.Errorf("Reconcile did not return a correct result. (%v)", result) } } diff --git a/pkg/reconciliation/handler_test.go b/pkg/reconciliation/handler_test.go index d32a2f3b..f8bcb60c 100644 --- a/pkg/reconciliation/handler_test.go +++ b/pkg/reconciliation/handler_test.go @@ -220,7 +220,11 @@ func TestConflictingDcNameOverride(t *testing.T) { Spec: api.CassandraDatacenterSpec{ ClusterName: "cluster1", DatacenterName: "CassandraDatacenter_example", - }}} + }, + Status: api.CassandraDatacenterStatus{ + DatacenterName: ptr.To[string]("CassandraDatacenter_example"), + }, + }} }) errs := rc.validateDatacenterNameConflicts() diff --git a/pkg/reconciliation/reconcile_configsecret.go b/pkg/reconciliation/reconcile_configsecret.go index 038c3b76..947040d9 100644 --- a/pkg/reconciliation/reconcile_configsecret.go +++ b/pkg/reconciliation/reconcile_configsecret.go @@ -131,7 +131,7 @@ func getConfigFromConfigSecret(dc *api.CassandraDatacenter, secret *corev1.Secre // getDatacenterConfigSecretName The format is clusterName-dcName-config func getDatacenterConfigSecretName(dc *api.CassandraDatacenter) string { - return api.CleanupForKubernetes(dc.Spec.ClusterName) + "-" + dc.SanitizedName() + "-config" + return api.CleanupForKubernetes(dc.Spec.ClusterName) + "-" + dc.LabelResourceName() + "-config" } // getDatacenterConfigSecret Fetches the secret from the api server or creates a new secret diff --git a/pkg/reconciliation/reconcile_datacenter.go b/pkg/reconciliation/reconcile_datacenter.go index f55f67ae..ec8c11e8 100644 --- a/pkg/reconciliation/reconcile_datacenter.go +++ b/pkg/reconciliation/reconcile_datacenter.go @@ -50,13 +50,12 @@ func (rc *ReconciliationContext) ProcessDeletion() result.ReconcileResult { } if _, found := rc.Datacenter.Annotations[api.DecommissionOnDeleteAnnotation]; found { - podList, err := rc.listPods(rc.Datacenter.GetDatacenterLabels()) + dcPods, err := rc.listPods(rc.Datacenter.GetDatacenterLabels()) if err != nil { rc.ReqLogger.Error(err, "Failed to list pods, unable to proceed with deletion") return result.Error(err) } - dcPods := PodPtrsFromPodList(podList) - if len(podList.Items) > 0 { + if len(dcPods) > 0 { rc.ReqLogger.V(1).Info("Deletion is being processed by the decommission check") dcs, err := rc.getClusterDatacenters(dcPods) if err != nil { diff --git a/pkg/reconciliation/reconcile_fql.go b/pkg/reconciliation/reconcile_fql.go index ee3a1143..f6e20ad1 100644 --- a/pkg/reconciliation/reconcile_fql.go +++ b/pkg/reconciliation/reconcile_fql.go @@ -25,7 +25,7 @@ func (rc *ReconciliationContext) CheckFullQueryLogging() result.ReconcileResult rc.ReqLogger.Error(err, "error listing all pods in the cluster to progress full query logging reconciliation") return result.RequeueSoon(2) } - for _, podPtr := range PodPtrsFromPodList(podList) { + for _, podPtr := range podList { features, err := rc.NodeMgmtClient.FeatureSet(podPtr) if err != nil { rc.ReqLogger.Error(err, "failed to verify featureset for FQL support") diff --git a/pkg/reconciliation/reconcile_racks.go b/pkg/reconciliation/reconcile_racks.go index 2d79cb1e..805a178d 100644 --- a/pkg/reconciliation/reconcile_racks.go +++ b/pkg/reconciliation/reconcile_racks.go @@ -34,10 +34,9 @@ import ( ) var ( - ResultShouldNotRequeue reconcile.Result = reconcile.Result{Requeue: false} - ResultShouldRequeueNow reconcile.Result = reconcile.Result{Requeue: true} - ResultShouldRequeueSoon reconcile.Result = reconcile.Result{Requeue: true, RequeueAfter: 2 * time.Second} - ResultShouldRequeueTenSecs reconcile.Result = reconcile.Result{Requeue: true, RequeueAfter: 10 * time.Second} + ResultShouldNotRequeue reconcile.Result = reconcile.Result{Requeue: false} + ResultShouldRequeueNow reconcile.Result = reconcile.Result{Requeue: true} + ResultShouldRequeueSoon reconcile.Result = reconcile.Result{Requeue: true, RequeueAfter: 2 * time.Second} QuietDurationFunc func(int) time.Duration = func(secs int) time.Duration { return time.Duration(secs) * time.Second } ) @@ -618,7 +617,7 @@ func (rc *ReconciliationContext) CheckRackStoppedState() result.ReconcileResult emittedStoppingEvent = true } - rackPods := FilterPodListByLabels(rc.dcPods, rc.Datacenter.GetRackLabels(rackInfo.RackName)) + rackPods := rc.rackPods(rackInfo.RackName) nodesDrained := 0 nodeDrainErrors := 0 @@ -751,7 +750,7 @@ func (rc *ReconciliationContext) CheckPodsReady(endpointData httphelper.CassMeta return result.Error(err) } if atLeastOneFirstNodeNotReady { - return result.RequeueSoon(2) + return result.RequeueSoon(10) } // step 3 - if the cluster isn't healthy, that's ok, but go back to step 1 @@ -1432,8 +1431,8 @@ func (rc *ReconciliationContext) isClusterHealthy() bool { func (rc *ReconciliationContext) labelSeedPods(rackInfo *RackInformation) (int, error) { logger := rc.ReqLogger.WithName("labelSeedPods") - rackLabels := rc.Datacenter.GetRackLabels(rackInfo.RackName) - rackPods := FilterPodListByLabels(rc.dcPods, rackLabels) + rackPods := rc.rackPods(rackInfo.RackName) + sort.SliceStable(rackPods, func(i, j int) bool { return rackPods[i].Name < rackPods[j].Name }) @@ -2150,7 +2149,7 @@ func (rc *ReconciliationContext) refreshSeeds() error { return nil } -func (rc *ReconciliationContext) listPods(selector map[string]string) (*corev1.PodList, error) { +func (rc *ReconciliationContext) listPods(selector map[string]string) ([]*corev1.Pod, error) { rc.ReqLogger.Info("reconcile_racks::listPods") listOptions := &client.ListOptions{ @@ -2165,7 +2164,11 @@ func (rc *ReconciliationContext) listPods(selector map[string]string) (*corev1.P }, } - return podList, rc.Client.List(rc.Ctx, podList, listOptions) + if err := rc.Client.List(rc.Ctx, podList, listOptions); err != nil { + return nil, err + } + + return PodPtrsFromPodList(podList), nil } func (rc *ReconciliationContext) CheckRollingRestart() result.ReconcileResult { @@ -2425,21 +2428,40 @@ func (rc *ReconciliationContext) fixMissingPVC() (bool, error) { return false, nil } +func (rc *ReconciliationContext) datacenterPods() []*corev1.Pod { + if rc.dcPods != nil { + return rc.dcPods + } + + dcSelector := rc.Datacenter.GetDatacenterLabels() + dcPods := FilterPodListByLabels(rc.clusterPods, dcSelector) + + if rc.Datacenter.Status.MetadataVersion < 1 && rc.Datacenter.Status.DatacenterName != nil && *rc.Datacenter.Status.DatacenterName == rc.Datacenter.Spec.DatacenterName { + rc.ReqLogger.Info("Fetching with the old metadata version also") + dcSelector[api.DatacenterLabel] = api.CleanLabelValue(rc.Datacenter.Spec.DatacenterName) + rc.dcPods = append(rc.dcPods, FilterPodListByLabels(rc.clusterPods, dcSelector)...) + } + + return dcPods +} + +func (rc *ReconciliationContext) rackPods(rackName string) []*corev1.Pod { + return FilterPodListByLabels(rc.datacenterPods(), map[string]string{api.RackLabel: rackName}) +} + // ReconcileAllRacks determines if a rack needs to be reconciled. func (rc *ReconciliationContext) ReconcileAllRacks() (reconcile.Result, error) { rc.ReqLogger.Info("reconciliationContext::reconcileAllRacks") logger := rc.ReqLogger - podList, err := rc.listPods(rc.Datacenter.GetClusterLabels()) + pods, err := rc.listPods(rc.Datacenter.GetClusterLabels()) if err != nil { logger.Error(err, "error listing all pods in the cluster") } - rc.clusterPods = PodPtrsFromPodList(podList) - - dcSelector := rc.Datacenter.GetDatacenterLabels() - rc.dcPods = FilterPodListByLabels(rc.clusterPods, dcSelector) + rc.clusterPods = pods + rc.dcPods = rc.datacenterPods() endpointData := rc.getCassMetadataEndpoints() diff --git a/scripts/release-helm-chart.sh b/scripts/release-helm-chart.sh index 4a37b775..d3db2030 100755 --- a/scripts/release-helm-chart.sh +++ b/scripts/release-helm-chart.sh @@ -6,7 +6,7 @@ if [[ ! $0 == scripts/* ]]; then fi # This script assumes k8ssandra is checked out at ../k8ssandra and is checked out at main -if [ "$#" -le 1 ]; then +if [ "$#" -lt 1 ]; then echo "Usage: scripts/release-helm-chart.sh version legacy" echo "Script assumes you are in the correct branch / tag and that k8ssandra repository" echo "has been checked out to ../k8ssandra/. If legacy is set, the script will generate"