From 27e76750738353851cf0a973660fae093dd737a7 Mon Sep 17 00:00:00 2001 From: Amir Alavi Date: Sat, 10 Feb 2024 19:49:27 -0500 Subject: [PATCH] add `ignoreMinNodesCheck` to allow disabling minimum node size check Signed-off-by: Amir Alavi --- README.md | 1 + docs/deprecated/v1alpha1.md | 1 + pkg/api/types.go | 3 + pkg/api/v1alpha1/conversion.go | 1 + pkg/api/v1alpha1/types.go | 3 + pkg/api/v1alpha1/zz_generated.deepcopy.go | 5 ++ pkg/api/v1alpha2/types.go | 3 + pkg/api/v1alpha2/zz_generated.conversion.go | 2 + pkg/api/v1alpha2/zz_generated.deepcopy.go | 5 ++ pkg/api/zz_generated.deepcopy.go | 5 ++ pkg/descheduler/descheduler.go | 17 +++--- pkg/descheduler/descheduler_test.go | 61 +++++++++++++++++++++ 12 files changed, 99 insertions(+), 8 deletions(-) diff --git a/README.md b/README.md index f172236e00..fb132ed5d7 100644 --- a/README.md +++ b/README.md @@ -127,6 +127,7 @@ These are top level keys in the Descheduler Policy that you can use to configure | `nodeSelector` |`string`| `nil` | limiting the nodes which are processed. Only used when `nodeFit`=`true` and only by the PreEvictionFilter Extension Point | | `maxNoOfPodsToEvictPerNode` |`int`| `nil` | maximum number of pods evicted from each node (summed through all strategies) | | `maxNoOfPodsToEvictPerNamespace` |`int`| `nil` | maximum number of pods evicted from each namespace (summed through all strategies) | +| `ignoreMinNodesCheck` |`bool`| `nil` | descheduler requires more than one node in the cluster, setting this to `true` will disable the check | ### Evictor Plugin configuration (Default Evictor) diff --git a/docs/deprecated/v1alpha1.md b/docs/deprecated/v1alpha1.md index 5867a3b499..2ab916df7f 100644 --- a/docs/deprecated/v1alpha1.md +++ b/docs/deprecated/v1alpha1.md @@ -155,6 +155,7 @@ evictLocalStoragePods: true evictSystemCriticalPods: true maxNoOfPodsToEvictPerNode: 40 ignorePvcPods: false +ignoreMinNodesCheck: false strategies: ... ``` diff --git a/pkg/api/types.go b/pkg/api/types.go index f874c3dd8a..6cd9abd407 100644 --- a/pkg/api/types.go +++ b/pkg/api/types.go @@ -38,6 +38,9 @@ type DeschedulerPolicy struct { // MaxNoOfPodsToEvictPerNamespace restricts maximum of pods to be evicted per namespace. MaxNoOfPodsToEvictPerNamespace *uint + + // IgnoreMinNodesCheck allows skipping the min nodes check, if set to true. + IgnoreMinNodesCheck *bool } // Namespaces carries a list of included/excluded namespaces diff --git a/pkg/api/v1alpha1/conversion.go b/pkg/api/v1alpha1/conversion.go index 7aaa5e51d4..05a014fe44 100644 --- a/pkg/api/v1alpha1/conversion.go +++ b/pkg/api/v1alpha1/conversion.go @@ -234,6 +234,7 @@ func V1alpha1ToInternal( out.NodeSelector = deschedulerPolicy.NodeSelector out.MaxNoOfPodsToEvictPerNamespace = deschedulerPolicy.MaxNoOfPodsToEvictPerNamespace out.MaxNoOfPodsToEvictPerNode = deschedulerPolicy.MaxNoOfPodsToEvictPerNode + out.IgnoreMinNodesCheck = deschedulerPolicy.IgnoreMinNodesCheck return nil } diff --git a/pkg/api/v1alpha1/types.go b/pkg/api/v1alpha1/types.go index f70b90068b..6a64df25e6 100644 --- a/pkg/api/v1alpha1/types.go +++ b/pkg/api/v1alpha1/types.go @@ -49,6 +49,9 @@ type DeschedulerPolicy struct { // MaxNoOfPodsToEvictPerNamespace restricts maximum of pods to be evicted per namespace. MaxNoOfPodsToEvictPerNamespace *uint `json:"maxNoOfPodsToEvictPerNamespace,omitempty"` + + // IgnoreMinNodesCheck allows skipping the min nodes check, if set to true. + IgnoreMinNodesCheck *bool `json:"ignoreMinNodesCheck,omitempty"` } type ( diff --git a/pkg/api/v1alpha1/zz_generated.deepcopy.go b/pkg/api/v1alpha1/zz_generated.deepcopy.go index 2440c24006..a1cd0c7e2d 100644 --- a/pkg/api/v1alpha1/zz_generated.deepcopy.go +++ b/pkg/api/v1alpha1/zz_generated.deepcopy.go @@ -72,6 +72,11 @@ func (in *DeschedulerPolicy) DeepCopyInto(out *DeschedulerPolicy) { *out = new(uint) **out = **in } + if in.IgnoreMinNodesCheck != nil { + in, out := &in.IgnoreMinNodesCheck, &out.IgnoreMinNodesCheck + *out = new(bool) + **out = **in + } return } diff --git a/pkg/api/v1alpha2/types.go b/pkg/api/v1alpha2/types.go index 72c7949d26..4fd946260d 100644 --- a/pkg/api/v1alpha2/types.go +++ b/pkg/api/v1alpha2/types.go @@ -37,6 +37,9 @@ type DeschedulerPolicy struct { // MaxNoOfPodsToEvictPerNamespace restricts maximum of pods to be evicted per namespace. MaxNoOfPodsToEvictPerNamespace *uint `json:"maxNoOfPodsToEvictPerNamespace,omitempty"` + + // IgnoreMinNodesCheck allows skipping the min nodes check, if set to true. + IgnoreMinNodesCheck *bool `json:"ignoreMinNodesCheck,omitempty"` } type DeschedulerProfile struct { diff --git a/pkg/api/v1alpha2/zz_generated.conversion.go b/pkg/api/v1alpha2/zz_generated.conversion.go index c405000434..0441b76ac5 100644 --- a/pkg/api/v1alpha2/zz_generated.conversion.go +++ b/pkg/api/v1alpha2/zz_generated.conversion.go @@ -104,6 +104,7 @@ func autoConvert_v1alpha2_DeschedulerPolicy_To_api_DeschedulerPolicy(in *Desched out.NodeSelector = (*string)(unsafe.Pointer(in.NodeSelector)) out.MaxNoOfPodsToEvictPerNode = (*uint)(unsafe.Pointer(in.MaxNoOfPodsToEvictPerNode)) out.MaxNoOfPodsToEvictPerNamespace = (*uint)(unsafe.Pointer(in.MaxNoOfPodsToEvictPerNamespace)) + out.IgnoreMinNodesCheck = (*bool)(unsafe.Pointer(in.IgnoreMinNodesCheck)) return nil } @@ -122,6 +123,7 @@ func autoConvert_api_DeschedulerPolicy_To_v1alpha2_DeschedulerPolicy(in *api.Des out.NodeSelector = (*string)(unsafe.Pointer(in.NodeSelector)) out.MaxNoOfPodsToEvictPerNode = (*uint)(unsafe.Pointer(in.MaxNoOfPodsToEvictPerNode)) out.MaxNoOfPodsToEvictPerNamespace = (*uint)(unsafe.Pointer(in.MaxNoOfPodsToEvictPerNamespace)) + out.IgnoreMinNodesCheck = (*bool)(unsafe.Pointer(in.IgnoreMinNodesCheck)) return nil } diff --git a/pkg/api/v1alpha2/zz_generated.deepcopy.go b/pkg/api/v1alpha2/zz_generated.deepcopy.go index af73d50233..64b6be6c2a 100644 --- a/pkg/api/v1alpha2/zz_generated.deepcopy.go +++ b/pkg/api/v1alpha2/zz_generated.deepcopy.go @@ -51,6 +51,11 @@ func (in *DeschedulerPolicy) DeepCopyInto(out *DeschedulerPolicy) { *out = new(uint) **out = **in } + if in.IgnoreMinNodesCheck != nil { + in, out := &in.IgnoreMinNodesCheck, &out.IgnoreMinNodesCheck + *out = new(bool) + **out = **in + } return } diff --git a/pkg/api/zz_generated.deepcopy.go b/pkg/api/zz_generated.deepcopy.go index c4abcbf94b..9ffa212a0b 100644 --- a/pkg/api/zz_generated.deepcopy.go +++ b/pkg/api/zz_generated.deepcopy.go @@ -51,6 +51,11 @@ func (in *DeschedulerPolicy) DeepCopyInto(out *DeschedulerPolicy) { *out = new(uint) **out = **in } + if in.IgnoreMinNodesCheck != nil { + in, out := &in.IgnoreMinNodesCheck, &out.IgnoreMinNodesCheck + *out = new(bool) + **out = **in + } return } diff --git a/pkg/descheduler/descheduler.go b/pkg/descheduler/descheduler.go index 17b0b16e43..76d1413636 100644 --- a/pkg/descheduler/descheduler.go +++ b/pkg/descheduler/descheduler.go @@ -25,25 +25,25 @@ import ( "go.opentelemetry.io/otel/attribute" "go.opentelemetry.io/otel/trace" - "k8s.io/apimachinery/pkg/api/meta" - "k8s.io/client-go/discovery" - "k8s.io/client-go/informers" - "k8s.io/client-go/tools/events" - componentbaseconfig "k8s.io/component-base/config" - "k8s.io/klog/v2" - v1 "k8s.io/api/core/v1" policy "k8s.io/api/policy/v1" + "k8s.io/apimachinery/pkg/api/meta" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/labels" "k8s.io/apimachinery/pkg/runtime" utilversion "k8s.io/apimachinery/pkg/util/version" "k8s.io/apimachinery/pkg/util/wait" + "k8s.io/client-go/discovery" + "k8s.io/client-go/informers" clientset "k8s.io/client-go/kubernetes" fakeclientset "k8s.io/client-go/kubernetes/fake" listersv1 "k8s.io/client-go/listers/core/v1" schedulingv1 "k8s.io/client-go/listers/scheduling/v1" core "k8s.io/client-go/testing" + "k8s.io/client-go/tools/events" + componentbaseconfig "k8s.io/component-base/config" + "k8s.io/klog/v2" + "k8s.io/utils/ptr" "sigs.k8s.io/descheduler/pkg/descheduler/client" eutils "sigs.k8s.io/descheduler/pkg/descheduler/evictions/utils" @@ -116,8 +116,9 @@ func (d *descheduler) runDeschedulerLoop(ctx context.Context, nodes []*v1.Node) metrics.DeschedulerLoopDuration.With(map[string]string{}).Observe(time.Since(loopStartDuration).Seconds()) }(time.Now()) + shouldCheckMinNodes := !ptr.Deref(d.deschedulerPolicy.IgnoreMinNodesCheck, false) // if len is still <= 1 error out - if len(nodes) <= 1 { + if shouldCheckMinNodes && len(nodes) <= 1 { klog.V(1).InfoS("The cluster size is 0 or 1 meaning eviction causes service disruption or degradation. So aborting..") return fmt.Errorf("the cluster size is 0 or 1") } diff --git a/pkg/descheduler/descheduler_test.go b/pkg/descheduler/descheduler_test.go index 596def1861..0e3034742a 100644 --- a/pkg/descheduler/descheduler_test.go +++ b/pkg/descheduler/descheduler_test.go @@ -13,8 +13,11 @@ import ( "k8s.io/apimachinery/pkg/runtime" apiversion "k8s.io/apimachinery/pkg/version" fakediscovery "k8s.io/client-go/discovery/fake" + "k8s.io/client-go/informers" fakeclientset "k8s.io/client-go/kubernetes/fake" core "k8s.io/client-go/testing" + "k8s.io/client-go/tools/events" + "k8s.io/utils/ptr" "sigs.k8s.io/descheduler/cmd/descheduler/app/options" "sigs.k8s.io/descheduler/pkg/api" "sigs.k8s.io/descheduler/pkg/api/v1alpha1" @@ -248,6 +251,64 @@ func TestRootCancelWithNoInterval(t *testing.T) { } } +func TestRunDeschedulerLoop(t *testing.T) { + n1 := test.BuildTestNode("n1", 2000, 3000, 10, nil) + n2 := test.BuildTestNode("n2", 2000, 3000, 10, nil) + + testCases := map[string]struct { + expectError bool + deschedulerPolicy *api.DeschedulerPolicy + nodes []*v1.Node + }{ + "nodes=1, IgnoreMinNodesCheck=true, no error": { + expectError: false, + deschedulerPolicy: &api.DeschedulerPolicy{IgnoreMinNodesCheck: ptr.To[bool](true)}, + nodes: []*v1.Node{n1}, + }, + "nodes=1, IgnoreMinNodesCheck=false, has error": { + expectError: true, + deschedulerPolicy: &api.DeschedulerPolicy{IgnoreMinNodesCheck: ptr.To[bool](false)}, + nodes: []*v1.Node{n1}, + }, + "nodes=1, IgnoreMinNodesCheck=nil, has error": { + expectError: true, + deschedulerPolicy: &api.DeschedulerPolicy{IgnoreMinNodesCheck: nil}, + nodes: []*v1.Node{n1}, + }, + "nodes=2, IgnoreMinNodesCheck=false, no error": { + expectError: false, + deschedulerPolicy: &api.DeschedulerPolicy{IgnoreMinNodesCheck: ptr.To[bool](false)}, + nodes: []*v1.Node{n1, n2}, + }, + "nodes=2, IgnoreMinNodesCheck=true, no error": { + expectError: false, + deschedulerPolicy: &api.DeschedulerPolicy{IgnoreMinNodesCheck: ptr.To[bool](true)}, + nodes: []*v1.Node{n1, n2}, + }, + } + + for name, tc := range testCases { + t.Run(name, func(t *testing.T) { + ctx := context.Background() + fakeClient := fakeclientset.NewSimpleClientset(n1, n2) + + eventRecorder := &events.FakeRecorder{} + sharedInformerFactory := informers.NewSharedInformerFactory(fakeClient, 0) + + instance, err := newDescheduler(ctx, &options.DeschedulerServer{}, tc.deschedulerPolicy, "v1", eventRecorder, sharedInformerFactory) + if err != nil { + t.Fatalf("unable to initialize descheduler") + } + + err = instance.runDeschedulerLoop(ctx, tc.nodes) + hasError := err != nil + if tc.expectError != hasError { + t.Errorf("unexpected descheduler loop behavior. err %v", err) + } + }) + } +} + func TestValidateVersionCompatibility(t *testing.T) { type testCase struct { name string