Skip to content

Commit

Permalink
Make parallel restart of already bootstrapped nodes the default (#733)
Browse files Browse the repository at this point in the history
  • Loading branch information
burmanm authored Nov 25, 2024
1 parent bf4a6a4 commit e580f21
Show file tree
Hide file tree
Showing 4 changed files with 111 additions and 12 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ Changelog for Cass Operator, new PRs should update the `main / unreleased` secti
## unreleased

* [CHANGE] [#720](https://github.com/k8ssandra/cass-operator/issues/720) Always use ObjectMeta.Name for the PodDisruptionBudget resource name, not the DatacenterName
* [CHANGE] [#731](https://github.com/k8ssandra/cass-operator/issues/731) Make the concurrent restart of already bootstrapped nodes the default. If user wishes to revert to older behavior, set annotation ``cassandra.datastax.com/allow-parallel-starts: "false"`` to datacenter.
* [FEATURE] [#651](https://github.com/k8ssandra/cass-operator/issues/651) Add tsreload task for DSE deployments and ability to check if sync operation is available on the mgmt-api side
* [ENHANCEMENT] [#722](https://github.com/k8ssandra/cass-operator/issues/722) Add back the ability to track cleanup task before marking scale up as done. This is controlled by an annotation cassandra.datastax.com/track-cleanup-tasks
* [ENHANCEMENT] [#532](https://github.com/k8ssandra/k8ssandra-operator/issues/532) Extend ImageConfig type to allow additional parameters for k8ssandra-operator requirements. These include per-image PullPolicy / PullSecrets as well as additional image. Also add ability to override a single HCD version image.
Expand Down
6 changes: 5 additions & 1 deletion pkg/reconciliation/reconcile_racks.go
Original file line number Diff line number Diff line change
Expand Up @@ -672,6 +672,10 @@ func (rc *ReconciliationContext) checkSeedLabels() (int, error) {
return seedCount, nil
}

func shouldUseFastPath(dc *api.CassandraDatacenter, seedCount int) bool {
return seedCount > 0 && !(metav1.HasAnnotation(dc.ObjectMeta, api.AllowParallelStartsAnnotations) && dc.Annotations[api.AllowParallelStartsAnnotations] == "false")
}

// CheckPodsReady loops over all the server pods and starts them
func (rc *ReconciliationContext) CheckPodsReady(endpointData httphelper.CassMetadataEndpoints) result.ReconcileResult {
rc.ReqLogger.Info("reconcile_racks::CheckPodsReady")
Expand Down Expand Up @@ -719,7 +723,7 @@ func (rc *ReconciliationContext) CheckPodsReady(endpointData httphelper.CassMeta
}

// step 0 - fastpath
if seedCount > 0 && metav1.HasAnnotation(rc.Datacenter.ObjectMeta, api.AllowParallelStartsAnnotations) && rc.Datacenter.Annotations[api.AllowParallelStartsAnnotations] == "true" {
if shouldUseFastPath(rc.Datacenter, seedCount) {
notReadyPods, err := rc.startBootstrappedNodes(endpointData)
if err != nil {
return result.Error(err)
Expand Down
114 changes: 105 additions & 9 deletions pkg/reconciliation/reconcile_racks_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -210,19 +210,16 @@ func TestReconcileRacks_ReconcilePods(t *testing.T) {
rc, _, cleanupMockScr := setupTest()
defer cleanupMockScr()

var (
one = int32(1)
)

rc.Datacenter.Spec.Size = 1
desiredStatefulSet, err := newStatefulSetForCassandraDatacenter(
nil,
"default",
rc.Datacenter,
2)
1)
assert.NoErrorf(t, err, "error occurred creating statefulset")

desiredStatefulSet.Spec.Replicas = &one
desiredStatefulSet.Status.ReadyReplicas = one
desiredStatefulSet.Spec.Replicas = ptr.To[int32](1)
desiredStatefulSet.Status.ReadyReplicas = 1

trackObjects := []runtime.Object{
desiredStatefulSet,
Expand All @@ -238,8 +235,8 @@ func TestReconcileRacks_ReconcilePods(t *testing.T) {
rc.Client = fake.NewClientBuilder().WithStatusSubresource(rc.Datacenter).WithRuntimeObjects(trackObjects...).Build()

nextRack := &RackInformation{}
nextRack.RackName = "default"
nextRack.NodeCount = 1
nextRack.RackName = desiredStatefulSet.Labels[api.RackLabel]
nextRack.NodeCount = int(*desiredStatefulSet.Spec.Replicas)
nextRack.SeedCount = 1

rackInfo := []*RackInformation{nextRack}
Expand Down Expand Up @@ -1175,9 +1172,12 @@ func mockReadyPodsForStatefulSet(sts *appsv1.StatefulSet, cluster, dc string) []
pod.Labels[api.ClusterLabel] = cluster
pod.Labels[api.DatacenterLabel] = dc
pod.Labels[api.CassNodeState] = "Started"
pod.Labels[api.RackLabel] = sts.Labels[api.RackLabel]
pod.Status.ContainerStatuses = []corev1.ContainerStatus{{
Name: "cassandra",
Ready: true,
}}
pod.Status.PodIP = fmt.Sprintf("192.168.1.%d", i)
pods = append(pods, pod)
}
return pods
Expand Down Expand Up @@ -2952,3 +2952,99 @@ func TestCheckRackLabels(t *testing.T) {
require.Equal(result.Continue(), res, "Label updates should not cause errors")
require.Subset(desiredStatefulSet.Labels, rc.Datacenter.GetRackLabels("default"))
}

func TestCheckPodsReadyAllStarted(t *testing.T) {
rc, _, cleanupMockScr := setupTest()
defer cleanupMockScr()
assert := assert.New(t)

desiredStatefulSet, err := newStatefulSetForCassandraDatacenter(
nil,
"default",
rc.Datacenter,
3)
assert.NoErrorf(err, "error occurred creating statefulset")

desiredStatefulSet.Status.ReadyReplicas = *desiredStatefulSet.Spec.Replicas

trackObjects := []runtime.Object{
desiredStatefulSet,
rc.Datacenter,
}

mockPods := mockReadyPodsForStatefulSet(desiredStatefulSet, rc.Datacenter.Spec.ClusterName, rc.Datacenter.Name)
for idx := range mockPods {
mp := mockPods[idx]
metav1.SetMetaDataLabel(&mp.ObjectMeta, api.SeedNodeLabel, "true")
trackObjects = append(trackObjects, mp)
}

rc.Client = fake.NewClientBuilder().WithStatusSubresource(rc.Datacenter).WithRuntimeObjects(trackObjects...).Build()

nextRack := &RackInformation{}
nextRack.RackName = desiredStatefulSet.Labels[api.RackLabel]
nextRack.NodeCount = int(*desiredStatefulSet.Spec.Replicas)
nextRack.SeedCount = 1

rackInfo := []*RackInformation{nextRack}

rc.desiredRackInformation = rackInfo
rc.statefulSets = make([]*appsv1.StatefulSet, len(rackInfo))
rc.statefulSets[0] = desiredStatefulSet

rc.clusterPods = mockPods
rc.dcPods = mockPods

epData := httphelper.CassMetadataEndpoints{
Entity: []httphelper.EndpointState{},
}

for i := 0; i < int(*desiredStatefulSet.Spec.Replicas); i++ {
ep := httphelper.EndpointState{
RpcAddress: fmt.Sprintf("192.168.1.%d", i+1),
Status: "UN",
}
epData.Entity = append(epData.Entity, ep)
}

res := &http.Response{
StatusCode: http.StatusOK,
Body: io.NopCloser(strings.NewReader("OK")),
}

mockHttpClient := mocks.NewHttpClient(t)
mockHttpClient.On("Do",
mock.MatchedBy(
func(req *http.Request) bool {
return req != nil
})).
Return(res, nil).
Times(len(epData.Entity) * 2) // reloadSeeds * pods + clusterHealthCheck * pods

client := httphelper.NodeMgmtClient{
Client: mockHttpClient,
Log: rc.ReqLogger,
Protocol: "http",
}

rc.NodeMgmtClient = client

recRes := rc.CheckPodsReady(epData)
assert.Equal(result.Continue(), recRes) // All pods should be up, no need to call anything
}

func TestShouldUseFastPath(t *testing.T) {
dc := &api.CassandraDatacenter{}

seedCount := 0

assert := assert.New(t)
assert.False(shouldUseFastPath(dc, seedCount))
seedCount = 1
assert.True(shouldUseFastPath(dc, seedCount))

metav1.SetMetaDataAnnotation(&dc.ObjectMeta, api.AllowParallelStartsAnnotations, "true")
assert.True(shouldUseFastPath(dc, seedCount))
metav1.SetMetaDataAnnotation(&dc.ObjectMeta, api.AllowParallelStartsAnnotations, "false")
assert.False(shouldUseFastPath(dc, seedCount))
}
2 changes: 0 additions & 2 deletions tests/testdata/default-three-rack-three-node-dc.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,6 @@ apiVersion: cassandra.datastax.com/v1beta1
kind: CassandraDatacenter
metadata:
name: dc1
annotations:
cassandra.datastax.com/allow-parallel-starts: "true"
spec:
clusterName: cluster1
serverType: cassandra
Expand Down

0 comments on commit e580f21

Please sign in to comment.