diff --git a/pkg/clusteragent/autoscaling/workload/controller_test.go b/pkg/clusteragent/autoscaling/workload/controller_test.go index e07be01363cb5..8d45825119994 100755 --- a/pkg/clusteragent/autoscaling/workload/controller_test.go +++ b/pkg/clusteragent/autoscaling/workload/controller_test.go @@ -121,10 +121,11 @@ func TestLeaderCreateDeleteLocal(t *testing.T) { // Check internal store content expectedDPAInternal := model.FakePodAutoscalerInternal{ - Namespace: "default", - Name: "dpa-0", - Generation: 1, - Spec: &dpaSpec, + Namespace: "default", + Name: "dpa-0", + Generation: 1, + Spec: &dpaSpec, + CustomRecommenderConfiguration: &model.RecommenderConfiguration{}, } dpaInternal, found := f.store.Get("default/dpa-0") assert.True(t, found) diff --git a/pkg/clusteragent/autoscaling/workload/model/pod_autoscaler.go b/pkg/clusteragent/autoscaling/workload/model/pod_autoscaler.go index a1dc2f722beb1..e34ea792d107c 100644 --- a/pkg/clusteragent/autoscaling/workload/model/pod_autoscaler.go +++ b/pkg/clusteragent/autoscaling/workload/model/pod_autoscaler.go @@ -8,14 +8,17 @@ package model import ( + "encoding/json" "errors" "fmt" "slices" "time" - "github.com/DataDog/datadog-agent/pkg/util/pointer" datadoghq "github.com/DataDog/datadog-operator/api/datadoghq/v1alpha1" + "github.com/DataDog/datadog-agent/pkg/util/log" + "github.com/DataDog/datadog-agent/pkg/util/pointer" + corev1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/runtime/schema" @@ -28,6 +31,9 @@ const ( // statusRetainedActions is the number of horizontal actions kept in status statusRetainedActions = 5 + + // AnnotationsConfigurationKey is the key used to store custom recommender configuration in annotations + AnnotationsConfigurationKey = "autoscaling.datadoghq.com/custom-recommender" ) // PodAutoscalerInternal holds the necessary data to work with the `DatadogPodAutoscaler` CRD. @@ -53,9 +59,15 @@ type PodAutoscalerInternal struct { // (only if owner == remote) settingsTimestamp time.Time - // scalingValues represents the current target scaling values (retrieved from RC) + // scalingValues represents the active scaling values that should be used scalingValues ScalingValues + // mainScalingValues represents the scaling values retrieved from the main recommender (product, optionally a custom endpoint) + mainScalingValues ScalingValues + + // fallbackScalingValues represents the scaling values retrieved from the fallback + fallbackScalingValues ScalingValues + // horizontalLastActions is the last horizontal action successfully taken horizontalLastActions []datadoghq.DatadogPodAutoscalerHorizontalAction @@ -95,6 +107,10 @@ type PodAutoscalerInternal struct { // horizontalEventsRetention is the time to keep horizontal events in memory // based on scale policies horizontalEventsRetention time.Duration + + // customRecommenderConfiguration holds the configuration for custom recommenders, + // Parsed from annotations on the autoscaler + customRecommenderConfiguration *RecommenderConfiguration } // NewPodAutoscalerInternal creates a new PodAutoscalerInternal from a Kubernetes CR @@ -134,6 +150,8 @@ func (p *PodAutoscalerInternal) UpdateFromPodAutoscaler(podAutoscaler *datadoghq p.targetGVK = schema.GroupVersionKind{} // Compute the horizontal events retention again in case .Spec.Policy has changed p.horizontalEventsRetention = getHorizontalEventsRetention(podAutoscaler.Spec.Policy, longestScalingRulePeriodAllowed) + // Compute recommender configuration again in case .Annotations has changed + p.customRecommenderConfiguration = parseCustomConfigurationAnnotation(podAutoscaler.Annotations) } // UpdateFromSettings updates the PodAutoscalerInternal from a new settings @@ -148,16 +166,36 @@ func (p *PodAutoscalerInternal) UpdateFromSettings(podAutoscalerSpec *datadoghq. p.horizontalEventsRetention = getHorizontalEventsRetention(podAutoscalerSpec.Policy, longestScalingRulePeriodAllowed) } -// UpdateFromValues updates the PodAutoscalerInternal from a new scaling values +// UpdateFromValues updates the PodAutoscalerInternal scaling values func (p *PodAutoscalerInternal) UpdateFromValues(scalingValues ScalingValues) { p.scalingValues = scalingValues } +// UpdateFromMainValues updates the PodAutoscalerInternal from new main scaling values +func (p *PodAutoscalerInternal) UpdateFromMainValues(mainScalingValues ScalingValues) { + p.mainScalingValues = mainScalingValues +} + +// UpdateFromLocalValues updates the PodAutoscalerInternal from new local scaling values +func (p *PodAutoscalerInternal) UpdateFromLocalValues(fallbackScalingValues ScalingValues) { + p.fallbackScalingValues = fallbackScalingValues +} + // RemoveValues clears autoscaling values data from the PodAutoscalerInternal as we stopped autoscaling func (p *PodAutoscalerInternal) RemoveValues() { p.scalingValues = ScalingValues{} } +// RemoveMainValues clears main autoscaling values data from the PodAutoscalerInternal as we stopped autoscaling +func (p *PodAutoscalerInternal) RemoveMainValues() { + p.mainScalingValues = ScalingValues{} +} + +// RemoveLocalValues clears local autoscaling values data from the PodAutoscalerInternal as we stopped autoscaling +func (p *PodAutoscalerInternal) RemoveLocalValues() { + p.fallbackScalingValues = ScalingValues{} +} + // UpdateFromHorizontalAction updates the PodAutoscalerInternal from a new horizontal action func (p *PodAutoscalerInternal) UpdateFromHorizontalAction(action *datadoghq.DatadogPodAutoscalerHorizontalAction, err error) { if err != nil { @@ -325,6 +363,16 @@ func (p *PodAutoscalerInternal) ScalingValues() ScalingValues { return p.scalingValues } +// MainScalingValues returns the main scaling values of the PodAutoscaler +func (p *PodAutoscalerInternal) MainScalingValues() ScalingValues { + return p.mainScalingValues +} + +// FallbackScalingValues returns the fallback scaling values of the PodAutoscaler +func (p *PodAutoscalerInternal) FallbackScalingValues() ScalingValues { + return p.fallbackScalingValues +} + // HorizontalLastActions returns the last horizontal actions taken func (p *PodAutoscalerInternal) HorizontalLastActions() []datadoghq.DatadogPodAutoscalerHorizontalAction { return p.horizontalLastActions @@ -384,6 +432,11 @@ func (p *PodAutoscalerInternal) TargetGVK() (schema.GroupVersionKind, error) { return p.targetGVK, nil } +// CustomRecommenderConfiguration returns the configuration set on the autoscaler for a customer recommender +func (p *PodAutoscalerInternal) CustomRecommenderConfiguration() *RecommenderConfiguration { + return p.customRecommenderConfiguration +} + // // Helpers // @@ -618,3 +671,13 @@ func getLongestScalingRulesPeriod(rules []datadoghq.DatadogPodAutoscalerScalingR return longest } + +func parseCustomConfigurationAnnotation(annotations map[string]string) *RecommenderConfiguration { + customConfiguration := RecommenderConfiguration{} + + if err := json.Unmarshal([]byte(annotations[AnnotationsConfigurationKey]), &customConfiguration); err != nil { + log.Debugf("Failed to parse annotations for custom recommender configuration: %v", err) + } + + return &customConfiguration +} diff --git a/pkg/clusteragent/autoscaling/workload/model/pod_autoscaler_test.go b/pkg/clusteragent/autoscaling/workload/model/pod_autoscaler_test.go index 15dc2a99bea8d..ceb133afa5e63 100644 --- a/pkg/clusteragent/autoscaling/workload/model/pod_autoscaler_test.go +++ b/pkg/clusteragent/autoscaling/workload/model/pod_autoscaler_test.go @@ -76,3 +76,49 @@ func TestAddHorizontalAction(t *testing.T) { *addedAction2, }, horizontalLastActions) } + +func TestParseCustomConfigurationAnnotation(t *testing.T) { + tests := []struct { + name string + annotations map[string]string + expected RecommenderConfiguration + }{ + { + name: "Empty annotations", + annotations: map[string]string{}, + expected: RecommenderConfiguration{}, + }, + { + name: "URL annotation", + annotations: map[string]string{ + AnnotationsConfigurationKey: "{\"endpoint\": \"localhost:8080/test\"}", + }, + expected: RecommenderConfiguration{ + Endpoint: "localhost:8080/test", + }, + }, + { + name: "Settings annotation", + annotations: map[string]string{ + AnnotationsConfigurationKey: "{\"endpoint\": \"localhost:8080/test\", \"settings\": {\"key\": \"value\", \"number\": 1, \"bool\": true, \"array\": [1, 2, 3], \"object\": {\"key\": \"value\"}}}", + }, + expected: RecommenderConfiguration{ + Endpoint: "localhost:8080/test", + Settings: map[string]any{ + "key": "value", + "number": 1.0, + "bool": true, + "array": []interface{}{1.0, 2.0, 3.0}, + "object": map[string]interface{}{"key": "value"}, + }, + }, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + customConfiguration := *parseCustomConfigurationAnnotation(tt.annotations) + assert.Equal(t, tt.expected, customConfiguration) + }) + } +} diff --git a/pkg/clusteragent/autoscaling/workload/model/pod_autoscaler_test_utils.go b/pkg/clusteragent/autoscaling/workload/model/pod_autoscaler_test_utils.go index c05b15b20a9cc..5649d62cdd93d 100644 --- a/pkg/clusteragent/autoscaling/workload/model/pod_autoscaler_test_utils.go +++ b/pkg/clusteragent/autoscaling/workload/model/pod_autoscaler_test_utils.go @@ -24,47 +24,49 @@ import ( // FakePodAutoscalerInternal is a fake PodAutoscalerInternal object. type FakePodAutoscalerInternal struct { - Namespace string - Name string - Generation int64 - Spec *datadoghq.DatadogPodAutoscalerSpec - SettingsTimestamp time.Time - CreationTimestamp time.Time - ScalingValues ScalingValues - HorizontalLastActions []datadoghq.DatadogPodAutoscalerHorizontalAction - HorizontalLastLimitReason string - HorizontalLastActionError error - HorizontalEventsRetention time.Duration - VerticalLastAction *datadoghq.DatadogPodAutoscalerVerticalAction - VerticalLastActionError error - CurrentReplicas *int32 - ScaledReplicas *int32 - Error error - Deleted bool - TargetGVK schema.GroupVersionKind + Namespace string + Name string + Generation int64 + Spec *datadoghq.DatadogPodAutoscalerSpec + SettingsTimestamp time.Time + CreationTimestamp time.Time + ScalingValues ScalingValues + HorizontalLastActions []datadoghq.DatadogPodAutoscalerHorizontalAction + HorizontalLastLimitReason string + HorizontalLastActionError error + HorizontalEventsRetention time.Duration + VerticalLastAction *datadoghq.DatadogPodAutoscalerVerticalAction + VerticalLastActionError error + CurrentReplicas *int32 + ScaledReplicas *int32 + Error error + Deleted bool + TargetGVK schema.GroupVersionKind + CustomRecommenderConfiguration *RecommenderConfiguration } // Build creates a PodAutoscalerInternal object from the FakePodAutoscalerInternal. func (f FakePodAutoscalerInternal) Build() PodAutoscalerInternal { return PodAutoscalerInternal{ - namespace: f.Namespace, - name: f.Name, - generation: f.Generation, - spec: f.Spec, - settingsTimestamp: f.SettingsTimestamp, - creationTimestamp: f.CreationTimestamp, - scalingValues: f.ScalingValues, - horizontalLastActions: f.HorizontalLastActions, - horizontalLastLimitReason: f.HorizontalLastLimitReason, - horizontalLastActionError: f.HorizontalLastActionError, - horizontalEventsRetention: f.HorizontalEventsRetention, - verticalLastAction: f.VerticalLastAction, - verticalLastActionError: f.VerticalLastActionError, - currentReplicas: f.CurrentReplicas, - scaledReplicas: f.ScaledReplicas, - error: f.Error, - deleted: f.Deleted, - targetGVK: f.TargetGVK, + namespace: f.Namespace, + name: f.Name, + generation: f.Generation, + spec: f.Spec, + settingsTimestamp: f.SettingsTimestamp, + creationTimestamp: f.CreationTimestamp, + scalingValues: f.ScalingValues, + horizontalLastActions: f.HorizontalLastActions, + horizontalLastLimitReason: f.HorizontalLastLimitReason, + horizontalLastActionError: f.HorizontalLastActionError, + horizontalEventsRetention: f.HorizontalEventsRetention, + verticalLastAction: f.VerticalLastAction, + verticalLastActionError: f.VerticalLastActionError, + currentReplicas: f.CurrentReplicas, + scaledReplicas: f.ScaledReplicas, + error: f.Error, + deleted: f.Deleted, + targetGVK: f.TargetGVK, + customRecommenderConfiguration: f.CustomRecommenderConfiguration, } } @@ -110,7 +112,7 @@ func ComparePodAutoscalers(expected any, actual any) string { if fake, ok := x.(FakePodAutoscalerInternal); ok { return fake.Build() } - panic("filer failed - unexpected type") + panic("filter failed - unexpected type") }), ), cmp.FilterValues( diff --git a/pkg/clusteragent/autoscaling/workload/model/rc_schema.go b/pkg/clusteragent/autoscaling/workload/model/rc_schema.go index ee19c3339f590..1d219717c8a2d 100644 --- a/pkg/clusteragent/autoscaling/workload/model/rc_schema.go +++ b/pkg/clusteragent/autoscaling/workload/model/rc_schema.go @@ -8,10 +8,6 @@ package model import ( - "time" - - "k8s.io/apimachinery/pkg/api/resource" - kubeAutoscaling "github.com/DataDog/agent-payload/v5/autoscaling/kubernetes" datadoghq "github.com/DataDog/datadog-operator/api/datadoghq/v1alpha1" ) @@ -41,61 +37,3 @@ type AutoscalingSettings struct { // Spec is the full spec of the PodAutoscaler Spec *datadoghq.DatadogPodAutoscalerSpec `json:"spec"` } - -// ScalingValues represents the scaling values (horizontal and vertical) for a target -type ScalingValues struct { - // HorizontalError refers to an error encountered by Datadog while computing the horizontal scaling values - HorizontalError error - Horizontal *HorizontalScalingValues - - // VerticalError refers to an error encountered by Datadog while computing the vertical scaling values - VerticalError error - Vertical *VerticalScalingValues - - // Error refers to a general error encountered by Datadog while computing the scaling values - Error error -} - -// HorizontalScalingValues holds the horizontal scaling values for a target -type HorizontalScalingValues struct { - // Source is the source of the value - Source datadoghq.DatadogPodAutoscalerValueSource - - // Timestamp is the time at which the data was generated - Timestamp time.Time - - // Replicas is the desired number of replicas for the target - Replicas int32 -} - -// VerticalScalingValues holds the vertical scaling values for a target -type VerticalScalingValues struct { - // Source is the source of the value - Source datadoghq.DatadogPodAutoscalerValueSource - - // Timestamp is the time at which the data was generated - Timestamp time.Time - - // ResourcesHash is the hash of containerResources - ResourcesHash string - - // ContainerResources holds the resources for a container - ContainerResources []datadoghq.DatadogPodAutoscalerContainerResources -} - -// SumCPUMemoryRequests sums the CPU and memory requests of all containers -func (v *VerticalScalingValues) SumCPUMemoryRequests() (cpu, memory resource.Quantity) { - for _, container := range v.ContainerResources { - cpuReq := container.Requests.Cpu() - if cpuReq != nil { - cpu.Add(*cpuReq) - } - - memoryReq := container.Requests.Memory() - if memoryReq != nil { - memory.Add(*memoryReq) - } - } - - return -} diff --git a/pkg/clusteragent/autoscaling/workload/model/recommendations.go b/pkg/clusteragent/autoscaling/workload/model/recommendations.go new file mode 100644 index 0000000000000..5ab1057e93488 --- /dev/null +++ b/pkg/clusteragent/autoscaling/workload/model/recommendations.go @@ -0,0 +1,80 @@ +// Unless explicitly stated otherwise all files in this repository are licensed +// under the Apache License Version 2.0. +// This product includes software developed at Datadog (https://www.datadoghq.com/). +// Copyright 2024-present Datadog, Inc. + +//go:build kubeapiserver + +package model + +import ( + "time" + + "k8s.io/apimachinery/pkg/api/resource" + + datadoghq "github.com/DataDog/datadog-operator/api/datadoghq/v1alpha1" +) + +// ScalingValues represents the scaling values (horizontal and vertical) for a target +type ScalingValues struct { + // HorizontalError refers to an error encountered by Datadog while computing the horizontal scaling values + HorizontalError error + Horizontal *HorizontalScalingValues + + // VerticalError refers to an error encountered by Datadog while computing the vertical scaling values + VerticalError error + Vertical *VerticalScalingValues + + // Error refers to a general error encountered by Datadog while computing the scaling values + Error error +} + +// HorizontalScalingValues holds the horizontal scaling values for a target +type HorizontalScalingValues struct { + // Source is the source of the value + Source datadoghq.DatadogPodAutoscalerValueSource + + // Timestamp is the time at which the data was generated + Timestamp time.Time + + // Replicas is the desired number of replicas for the target + Replicas int32 +} + +// VerticalScalingValues holds the vertical scaling values for a target +type VerticalScalingValues struct { + // Source is the source of the value + Source datadoghq.DatadogPodAutoscalerValueSource + + // Timestamp is the time at which the data was generated + Timestamp time.Time + + // ResourcesHash is the hash of containerResources + ResourcesHash string + + // ContainerResources holds the resources for a container + ContainerResources []datadoghq.DatadogPodAutoscalerContainerResources +} + +// RecommenderConfiguration holds the configuration for a custom recommender +type RecommenderConfiguration struct { + Endpoint string + Settings map[string]any +} + +// SumCPUMemoryRequests sums the CPU and memory requests of all containers +func (v *VerticalScalingValues) SumCPUMemoryRequests() (cpu, memory resource.Quantity) { + for _, container := range v.ContainerResources { + cpuReq := container.Requests.Cpu() + if cpuReq != nil { + cpu.Add(*cpuReq) + } + + memoryReq := container.Requests.Memory() + if memoryReq != nil { + memory.Add(*memoryReq) + } + } + + return +}