-
Notifications
You must be signed in to change notification settings - Fork 1.2k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[clusteragent/autoscaling] Impose 100 DatadogPodAutoscaler limit in cluster agent #28684
Changes from 3 commits
8ee3911
6cd039c
46ece6f
cabcad1
e70833e
843d36c
b200155
c6d6431
1863ce1
9ecf501
156ecff
90799f2
b680558
ea7979d
91e6b9f
0622fb4
7254318
03fd859
b29cfd0
565a3ef
b92ac07
3913a97
6669594
44589e5
4ae0ab3
3285d31
aedc9f8
0429380
653ab74
6cdaf91
b9a47f3
96ef6dd
4a317d5
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -250,9 +250,12 @@ func (c *Controller) syncPodAutoscaler(ctx context.Context, key, ns, name string | |
// and compare it with the one in the PodAutoscaler. If they differ, we should update the PodAutoscaler | ||
// otherwise store the Generation | ||
if podAutoscalerInternal.Generation() != podAutoscaler.Generation { | ||
podAutoscalerInternal.UpdateCreationTimestamp(podAutoscaler.CreationTimestamp.Time) | ||
if podAutoscalerInternal.CreationTimestamp().IsZero() { | ||
podAutoscalerInternal.UpdateCreationTimestamp(podAutoscaler.CreationTimestamp.Time) | ||
return autoscaling.Requeue, c.updateAutoscalerStatusAndUnlock(ctx, key, ns, name, nil, podAutoscalerInternal, podAutoscaler) | ||
} | ||
|
||
localHash, err := autoscaling.ObjectHash(podAutoscalerInternal.Spec) | ||
localHash, err := autoscaling.ObjectHash(podAutoscalerInternal.Spec()) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Good find! We need to check all usage of |
||
if err != nil { | ||
c.store.Unlock(key) | ||
return autoscaling.Requeue, fmt.Errorf("Failed to compute Spec hash for PodAutoscaler: %s/%s, err: %v", ns, name, err) | ||
|
@@ -420,7 +423,7 @@ func (c *Controller) deletePodAutoscaler(ns, name string) error { | |
func (c *Controller) validateAutoscaler(podAutoscalerInternal model.PodAutoscalerInternal) error { | ||
// Check that we are within the limit of 100 DatadogPodAutoscalers | ||
key := podAutoscalerInternal.ID() | ||
if !c.limitHeap.Exists(key) { | ||
if !podAutoscalerInternal.CreationTimestamp().IsZero() && !c.limitHeap.Exists(key) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Is it possible to reach this point while have a zero There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. good point! i can update it to check if the value in the store is 0 if necessary but removed for now |
||
return fmt.Errorf("Autoscaler disabled as maximum number per cluster reached (%d)", maxDatadogPodAutoscalerObjects) | ||
} | ||
|
||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -71,17 +71,29 @@ func newFixture(t *testing.T, testTime time.Time) *fixture { | |
} | ||
|
||
func newFakePodAutoscaler(ns, name string, gen int64, creationTimestamp time.Time, spec datadoghq.DatadogPodAutoscalerSpec, status datadoghq.DatadogPodAutoscalerStatus) (obj *unstructured.Unstructured, dpa *datadoghq.DatadogPodAutoscaler) { | ||
dpa = &datadoghq.DatadogPodAutoscaler{ | ||
TypeMeta: podAutoscalerMeta, | ||
ObjectMeta: metav1.ObjectMeta{ | ||
Name: name, | ||
Namespace: ns, | ||
Generation: gen, | ||
UID: uuid.NewUUID(), | ||
CreationTimestamp: metav1.NewTime(creationTimestamp), | ||
}, | ||
Spec: spec, | ||
Status: status, | ||
if gen == -1 { // Create fake pod autoscaler for remote owner | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. nit: avoid duplication and set in
|
||
dpa = &datadoghq.DatadogPodAutoscaler{ | ||
TypeMeta: podAutoscalerMeta, | ||
ObjectMeta: metav1.ObjectMeta{ | ||
Name: name, | ||
Namespace: ns, | ||
}, | ||
Spec: spec, | ||
Status: status, | ||
} | ||
} else { | ||
dpa = &datadoghq.DatadogPodAutoscaler{ | ||
TypeMeta: podAutoscalerMeta, | ||
ObjectMeta: metav1.ObjectMeta{ | ||
Name: name, | ||
Namespace: ns, | ||
Generation: gen, | ||
UID: uuid.NewUUID(), | ||
CreationTimestamp: metav1.NewTime(creationTimestamp), | ||
}, | ||
Spec: spec, | ||
Status: status, | ||
} | ||
} | ||
|
||
obj, err := autoscaling.ToUnstructured(dpa) | ||
|
@@ -492,3 +504,209 @@ func TestPodAutoscalerLocalOwnerObjectsLimit(t *testing.T) { | |
assert.Falsef(t, f.autoscalingHeap.Keys[dpa1ID], "Expected dpa-1 to not be in heap") | ||
assert.Truef(t, f.autoscalingHeap.Keys[dpa2ID], "Expected dpa-2 to be in heap") | ||
} | ||
|
||
func TestPodAutoscalerRemoteOwnerObjectsLimit(t *testing.T) { | ||
testTime := time.Now() | ||
f := newFixture(t, testTime) | ||
|
||
dpaSpec := datadoghq.DatadogPodAutoscalerSpec{ | ||
TargetRef: autoscalingv2.CrossVersionObjectReference{ | ||
Kind: "Deployment", | ||
Name: "app-0", | ||
APIVersion: "apps/v1", | ||
}, | ||
// Remote owner means .Spec source of truth is Datadog App | ||
Owner: datadoghq.DatadogPodAutoscalerRemoteOwner, | ||
} | ||
|
||
dpa1Spec := datadoghq.DatadogPodAutoscalerSpec{ | ||
TargetRef: autoscalingv2.CrossVersionObjectReference{ | ||
Kind: "Deployment", | ||
Name: "app-1", | ||
APIVersion: "apps/v1", | ||
}, | ||
// Remote owner means .Spec source of truth is Datadog App | ||
Owner: datadoghq.DatadogPodAutoscalerRemoteOwner, | ||
} | ||
dpa2Spec := datadoghq.DatadogPodAutoscalerSpec{ | ||
TargetRef: autoscalingv2.CrossVersionObjectReference{ | ||
Kind: "Deployment", | ||
Name: "app-2", | ||
APIVersion: "apps/v1", | ||
}, | ||
// Remote owner means .Spec source of truth is Datadog App | ||
Owner: datadoghq.DatadogPodAutoscalerRemoteOwner, | ||
} | ||
|
||
dpaInternal := model.FakePodAutoscalerInternal{ | ||
Namespace: "default", | ||
Name: "dpa-0", | ||
Spec: &dpaSpec, | ||
} | ||
f.store.Set("default/dpa-0", dpaInternal.Build(), controllerID) | ||
|
||
dpaInternal1 := model.FakePodAutoscalerInternal{ | ||
Namespace: "default", | ||
Name: "dpa-1", | ||
Spec: &dpa1Spec, | ||
} | ||
f.store.Set("default/dpa-1", dpaInternal1.Build(), controllerID) | ||
|
||
dpaInternal2 := model.FakePodAutoscalerInternal{ | ||
Namespace: "default", | ||
Name: "dpa-2", | ||
Spec: &dpa2Spec, | ||
} | ||
f.store.Set("default/dpa-2", dpaInternal2.Build(), controllerID) | ||
|
||
// Should create object in Kubernetes | ||
expectedStatus := datadoghq.DatadogPodAutoscalerStatus{ | ||
Conditions: []datadoghq.DatadogPodAutoscalerCondition{ | ||
{ | ||
Type: datadoghq.DatadogPodAutoscalerErrorCondition, | ||
Status: corev1.ConditionFalse, | ||
LastTransitionTime: metav1.NewTime(testTime), | ||
}, | ||
{ | ||
Type: datadoghq.DatadogPodAutoscalerActiveCondition, | ||
Status: corev1.ConditionTrue, | ||
LastTransitionTime: metav1.NewTime(testTime), | ||
}, | ||
{ | ||
Type: datadoghq.DatadogPodAutoscalerHorizontalAbleToRecommendCondition, | ||
Status: corev1.ConditionUnknown, | ||
LastTransitionTime: metav1.NewTime(testTime), | ||
}, | ||
{ | ||
Type: datadoghq.DatadogPodAutoscalerVerticalAbleToRecommendCondition, | ||
Status: corev1.ConditionUnknown, | ||
LastTransitionTime: metav1.NewTime(testTime), | ||
}, | ||
{ | ||
Type: datadoghq.DatadogPodAutoscalerHorizontalScalingLimitedCondition, | ||
Status: corev1.ConditionFalse, | ||
LastTransitionTime: metav1.NewTime(testTime), | ||
}, | ||
{ | ||
Type: datadoghq.DatadogPodAutoscalerHorizontalAbleToScaleCondition, | ||
Status: corev1.ConditionUnknown, | ||
LastTransitionTime: metav1.NewTime(testTime), | ||
}, | ||
{ | ||
Type: datadoghq.DatadogPodAutoscalerVerticalAbleToApply, | ||
Status: corev1.ConditionUnknown, | ||
LastTransitionTime: metav1.NewTime(testTime), | ||
}, | ||
}, | ||
} | ||
expectedUnstructured, _ := newFakePodAutoscaler("default", "dpa-0", -1, time.Time{}, dpaSpec, expectedStatus) | ||
f.ExpectCreateAction(expectedUnstructured) | ||
f.RunControllerSync(true, "default/dpa-0") | ||
|
||
expectedUnstructured1, _ := newFakePodAutoscaler("default", "dpa-1", -1, time.Time{}, dpa1Spec, expectedStatus) | ||
f.Actions = nil | ||
f.ExpectCreateAction(expectedUnstructured1) | ||
f.RunControllerSync(true, "default/dpa-1") | ||
|
||
expectedUnstructured2, _ := newFakePodAutoscaler("default", "dpa-2", -1, time.Time{}, dpa2Spec, expectedStatus) | ||
f.Actions = nil | ||
f.ExpectCreateAction(expectedUnstructured2) | ||
f.RunControllerSync(true, "default/dpa-2") | ||
assert.Len(t, f.store.GetAll(), 3) | ||
|
||
dpaTime := testTime.Add(-1 * time.Hour) | ||
dpa1Time := testTime | ||
dpa2Time := testTime.Add(1 * time.Hour) | ||
|
||
dpa, dpaTyped := newFakePodAutoscaler("default", "dpa-0", 1, dpaTime, dpaSpec, expectedStatus) | ||
dpa1, dpaTyped1 := newFakePodAutoscaler("default", "dpa-1", 1, dpa1Time, dpaSpec, expectedStatus) | ||
dpa2, dpaTyped2 := newFakePodAutoscaler("default", "dpa-2", 1, dpa2Time, dpaSpec, expectedStatus) | ||
|
||
f.Actions = nil | ||
f.InformerObjects = append(f.InformerObjects, dpa, dpa1, dpa2) | ||
f.Objects = append(f.Objects, dpaTyped, dpaTyped1, dpaTyped2) | ||
|
||
// Check that DatadogPodAutoscaler object is inserted into heap | ||
f.RunControllerSync(true, "default/dpa-1") | ||
assert.Equal(t, 1, f.autoscalingHeap.MaxHeap.Len()) | ||
assert.Equal(t, "default/dpa-1", f.autoscalingHeap.MaxHeap.Peek().Key) | ||
assert.Truef(t, f.autoscalingHeap.Keys["default/dpa-1"], "Expected dpa-1 to be in heap") | ||
|
||
// Check that multiple objects can be inserted with ordering preserved | ||
f.RunControllerSync(true, "default/dpa-2") | ||
assert.Equal(t, 2, f.autoscalingHeap.MaxHeap.Len()) | ||
assert.Equal(t, "default/dpa-2", f.autoscalingHeap.MaxHeap.Peek().Key) | ||
assert.Truef(t, f.autoscalingHeap.Keys["default/dpa-1"], "Expected dpa-1 to be in heap") | ||
assert.Truef(t, f.autoscalingHeap.Keys["default/dpa-2"], "Expected dpa-2 to be in heap") | ||
|
||
// Check that heap ordering is preserved and limit is not exceeeded | ||
f.RunControllerSync(true, "default/dpa-0") | ||
assert.Equal(t, 2, f.autoscalingHeap.MaxHeap.Len()) | ||
assert.Equal(t, "default/dpa-1", f.autoscalingHeap.MaxHeap.Peek().Key) | ||
assert.Truef(t, f.autoscalingHeap.Keys["default/dpa-0"], "Expected dpa-0 to be in heap") | ||
assert.Truef(t, f.autoscalingHeap.Keys["default/dpa-1"], "Expected dpa-1 to be in heap") | ||
assert.Falsef(t, f.autoscalingHeap.Keys["default/dpa-2"], "Expected dpa-2 to not be in heap") | ||
|
||
// Check that when object (dpa1) is deleted, heap is updated accordingly | ||
dpaInternal1.Deleted = true | ||
f.store.Set("default/dpa-1", dpaInternal1.Build(), controllerID) | ||
f.ExpectDeleteAction("default", "dpa-1") | ||
f.RunControllerSync(true, "default/dpa-1") | ||
assert.Len(t, f.store.GetAll(), 3) | ||
|
||
f.InformerObjects = nil | ||
f.Objects = nil | ||
f.Actions = nil | ||
|
||
f.RunControllerSync(true, "default/dpa-1") | ||
|
||
// dpa-2 status currently has an error, it will get resolved in next reconcile | ||
errorStatus := datadoghq.DatadogPodAutoscalerStatus{ | ||
Conditions: []datadoghq.DatadogPodAutoscalerCondition{ | ||
{ | ||
Type: datadoghq.DatadogPodAutoscalerErrorCondition, | ||
Status: corev1.ConditionTrue, | ||
LastTransitionTime: metav1.NewTime(testTime), | ||
Reason: "Autoscaler disabled as maximum number per cluster reached (100)", | ||
}, | ||
{ | ||
Type: datadoghq.DatadogPodAutoscalerActiveCondition, | ||
Status: corev1.ConditionTrue, | ||
LastTransitionTime: metav1.NewTime(testTime), | ||
}, | ||
{ | ||
Type: datadoghq.DatadogPodAutoscalerHorizontalAbleToRecommendCondition, | ||
Status: corev1.ConditionUnknown, | ||
LastTransitionTime: metav1.NewTime(testTime), | ||
}, | ||
{ | ||
Type: datadoghq.DatadogPodAutoscalerVerticalAbleToRecommendCondition, | ||
Status: corev1.ConditionUnknown, | ||
LastTransitionTime: metav1.NewTime(testTime), | ||
}, | ||
{ | ||
Type: datadoghq.DatadogPodAutoscalerHorizontalScalingLimitedCondition, | ||
Status: corev1.ConditionFalse, | ||
LastTransitionTime: metav1.NewTime(testTime), | ||
}, | ||
{ | ||
Type: datadoghq.DatadogPodAutoscalerHorizontalAbleToScaleCondition, | ||
Status: corev1.ConditionUnknown, | ||
LastTransitionTime: metav1.NewTime(testTime), | ||
}, | ||
{ | ||
Type: datadoghq.DatadogPodAutoscalerVerticalAbleToApply, | ||
Status: corev1.ConditionUnknown, | ||
LastTransitionTime: metav1.NewTime(testTime), | ||
}, | ||
}, | ||
} | ||
dpa2, dpaTyped2 = newFakePodAutoscaler("default", "dpa-2", 0, dpa2Time, dpaSpec, errorStatus) | ||
f.InformerObjects = append(f.InformerObjects, dpa2) | ||
f.Objects = append(f.Objects, dpaTyped2) | ||
f.RunControllerSync(true, "default/dpa-2") | ||
assert.Len(t, f.store.GetAll(), 2) | ||
assert.Truef(t, f.autoscalingHeap.Keys["default/dpa-0"], "Expected dpa-0 to be in heap") | ||
assert.Falsef(t, f.autoscalingHeap.Keys["default/dpa-1"], "Expected dpa-1 to not be in heap") | ||
assert.Truef(t, f.autoscalingHeap.Keys["default/dpa-2"], "Expected dpa-2 to be in heap") | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Is there any reason why we requeue here? I'd not expect it to be necessary (similar to the
Generation
update).nit: I'd move this code close the
SetGeneration
for consistency.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
originally set it to requeue to trigger a store update earlier, and minimize the amount of time that a status may show an error state incorrectly (i.e. when the limit is not exceeded but the key hasn't yet been added to the heap because it had a zero creation timestamp). moved this and removed the requeue for now as well!