Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[clusteragent/autoscaling] Add local fallback fields to DatadogPodAutoscalerInternal #30776

Open
wants to merge 18 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 7 commits
Commits
Show all changes
18 commits
Select commit Hold shift + click to select a range
2346774
Add annotations and local recommendations to pod autoscaler internal
jennchenn Nov 1, 2024
bf45051
Extend scaling values with local recommendations
jennchenn Nov 4, 2024
c54a91e
Track previous horizontal recommendations
jennchenn Nov 4, 2024
0ca33ba
Fix linter errors
jennchenn Nov 5, 2024
2b491ac
fixup! Track previous horizontal recommendations
jennchenn Nov 5, 2024
1957a34
Define bool to determine when local fallback active
jennchenn Nov 5, 2024
f1305ff
Check target source prior to updating DPAI from existing status
jennchenn Nov 5, 2024
3e72df5
Define annotation structure and parse directly
jennchenn Nov 6, 2024
70ed026
Use scalingValues to represent active scaling values
jennchenn Nov 6, 2024
3afc059
Update function description comments
jennchenn Dec 4, 2024
5757b59
Parse annotations to new custom recommender config type
jennchenn Dec 5, 2024
92263a1
Rename type CustomRecommenderConfiguration to RecommenderConfiguration
jennchenn Dec 5, 2024
fffced9
Merge remote-tracking branch 'origin/main' into jenn/CASCL-102_adapt-…
jennchenn Dec 5, 2024
b69bcfb
Add recommender configuration type comment
jennchenn Dec 5, 2024
8424eb9
fixup! Merge remote-tracking branch 'origin/main' into jenn/CASCL-102…
jennchenn Dec 9, 2024
5b04388
Merge remote-tracking branch 'origin/main' into jenn/CASCL-102_adapt-…
jennchenn Dec 9, 2024
767066e
Add custom recommender configuration to fake DPAI for tests
jennchenn Dec 9, 2024
5dd8695
Set custom recommender configuration once
jennchenn Dec 9, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
93 changes: 71 additions & 22 deletions pkg/clusteragent/autoscaling/workload/model/pod_autoscaler.go
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,9 @@ type PodAutoscalerInternal struct {
// name is the name of the PodAutoscaler
name string

// annotations are the annotations of the PodAutoscaler
annotations map[string]string
jennchenn marked this conversation as resolved.
Show resolved Hide resolved

// creationTimestamp is the time when the kubernetes object was created
// creationTimestamp is stored in .DatadogPodAutoscaler.CreationTimestamp
creationTimestamp time.Time
Expand All @@ -53,9 +56,15 @@ type PodAutoscalerInternal struct {
// (only if owner == remote)
settingsTimestamp time.Time

// scalingValues represents the current target scaling values (retrieved from RC)
// isLocalFallbackActive is true if the PodAutoscaler is using local fallback
isLocalFallbackActive bool

// scalingValues represents the main scaling values
scalingValues ScalingValues

// localScalingValues represents the local scaling values
localScalingValues ScalingValues
jennchenn marked this conversation as resolved.
Show resolved Hide resolved

// horizontalLastActions is the last horizontal action successfully taken
horizontalLastActions []datadoghq.DatadogPodAutoscalerHorizontalAction

Expand Down Expand Up @@ -100,8 +109,9 @@ type PodAutoscalerInternal struct {
// NewPodAutoscalerInternal creates a new PodAutoscalerInternal from a Kubernetes CR
func NewPodAutoscalerInternal(podAutoscaler *datadoghq.DatadogPodAutoscaler) PodAutoscalerInternal {
pai := PodAutoscalerInternal{
namespace: podAutoscaler.Namespace,
name: podAutoscaler.Name,
namespace: podAutoscaler.Namespace,
name: podAutoscaler.Name,
annotations: podAutoscaler.Annotations,
}
pai.UpdateFromPodAutoscaler(podAutoscaler)
pai.UpdateFromStatus(&podAutoscaler.Status)
Expand All @@ -128,6 +138,7 @@ func NewPodAutoscalerFromSettings(ns, name string, podAutoscalerSpec *datadoghq.
func (p *PodAutoscalerInternal) UpdateFromPodAutoscaler(podAutoscaler *datadoghq.DatadogPodAutoscaler) {
p.creationTimestamp = podAutoscaler.CreationTimestamp.Time
p.generation = podAutoscaler.Generation
p.annotations = podAutoscaler.Annotations
p.spec = podAutoscaler.Spec.DeepCopy()
// Reset the target GVK as it might have changed
// Resolving the target GVK is done in the controller sync to ensure proper sync and error handling
Expand All @@ -153,11 +164,23 @@ func (p *PodAutoscalerInternal) UpdateFromValues(scalingValues ScalingValues) {
p.scalingValues = scalingValues
}

// UpdateFromLocalValues updates the PodAutoscalerInternal from new local scaling values
// nolint:unused
func (p *PodAutoscalerInternal) UpdateFromLocalValues(localScalingValues ScalingValues) {
p.localScalingValues = localScalingValues
}

// RemoveValues clears autoscaling values data from the PodAutoscalerInternal as we stopped autoscaling
func (p *PodAutoscalerInternal) RemoveValues() {
p.scalingValues = ScalingValues{}
}

// RemoveLocalValues clears local autoscaling values data from the PodAutoscalerInternal as we stopped autoscaling
// nolint:unused
func (p *PodAutoscalerInternal) RemoveLocalValues() {
p.localScalingValues = ScalingValues{}
}

// UpdateFromHorizontalAction updates the PodAutoscalerInternal from a new horizontal action
func (p *PodAutoscalerInternal) UpdateFromHorizontalAction(action *datadoghq.DatadogPodAutoscalerHorizontalAction, err error) {
if err != nil {
Expand Down Expand Up @@ -223,13 +246,19 @@ func (p *PodAutoscalerInternal) SetDeleted() {
// UpdateFromStatus updates the PodAutoscalerInternal from an existing status.
// It assumes the PodAutoscalerInternal is empty so it's not emptying existing data.
func (p *PodAutoscalerInternal) UpdateFromStatus(status *datadoghq.DatadogPodAutoscalerStatus) {
activeScalingValues := &p.scalingValues
if status.Horizontal != nil {
if status.Horizontal.Target != nil {
p.scalingValues.Horizontal = &HorizontalScalingValues{
if status.Horizontal.Target.Source == "Local" {
activeScalingValues = &p.localScalingValues
}

horizontalScalingValues := &HorizontalScalingValues{
Source: status.Horizontal.Target.Source,
Timestamp: status.Horizontal.Target.GeneratedAt.Time,
Replicas: status.Horizontal.Target.Replicas,
}
activeScalingValues.Horizontal = horizontalScalingValues
}

if len(status.Horizontal.LastActions) > 0 {
Expand All @@ -239,12 +268,13 @@ func (p *PodAutoscalerInternal) UpdateFromStatus(status *datadoghq.DatadogPodAut

if status.Vertical != nil {
if status.Vertical.Target != nil {
p.scalingValues.Vertical = &VerticalScalingValues{
verticalScalingValues := &VerticalScalingValues{
Source: status.Vertical.Target.Source,
Timestamp: status.Vertical.Target.GeneratedAt.Time,
ContainerResources: status.Vertical.Target.DesiredResources,
ResourcesHash: status.Vertical.Target.Version,
}
activeScalingValues.Vertical = verticalScalingValues
}

p.verticalLastAction = status.Vertical.LastAction
Expand All @@ -263,13 +293,13 @@ func (p *PodAutoscalerInternal) UpdateFromStatus(status *datadoghq.DatadogPodAut
// We're restoring this to error as it's the most generic
p.error = errors.New(cond.Reason)
case cond.Type == datadoghq.DatadogPodAutoscalerHorizontalAbleToRecommendCondition && cond.Status == corev1.ConditionFalse:
p.scalingValues.HorizontalError = errors.New(cond.Reason)
activeScalingValues.HorizontalError = errors.New(cond.Reason)
case cond.Type == datadoghq.DatadogPodAutoscalerHorizontalAbleToScaleCondition && cond.Status == corev1.ConditionFalse:
p.horizontalLastActionError = errors.New(cond.Reason)
case cond.Type == datadoghq.DatadogPodAutoscalerHorizontalScalingLimitedCondition && cond.Status == corev1.ConditionTrue:
p.horizontalLastLimitReason = cond.Reason
case cond.Type == datadoghq.DatadogPodAutoscalerVerticalAbleToRecommendCondition && cond.Status == corev1.ConditionFalse:
p.scalingValues.VerticalError = errors.New(cond.Reason)
activeScalingValues.VerticalError = errors.New(cond.Reason)
case cond.Type == datadoghq.DatadogPodAutoscalerVerticalAbleToApply && cond.Status == corev1.ConditionFalse:
p.verticalLastActionError = errors.New(cond.Reason)
}
Expand All @@ -295,6 +325,11 @@ func (p *PodAutoscalerInternal) Name() string {
return p.name
}

// Annotations returns the annotations on the PodAutoscaler
func (p *PodAutoscalerInternal) Annotations() map[string]string {
return p.annotations
}

// ID returns the functional identifier of the PodAutoscaler
func (p *PodAutoscalerInternal) ID() string {
return p.namespace + "/" + p.name
Expand Down Expand Up @@ -325,6 +360,11 @@ func (p *PodAutoscalerInternal) ScalingValues() ScalingValues {
return p.scalingValues
}

// LocalScalingValues returns the local scaling values of the PodAutoscaler
func (p *PodAutoscalerInternal) LocalScalingValues() ScalingValues {
return p.localScalingValues
}

// HorizontalLastActions returns the last horizontal actions taken
func (p *PodAutoscalerInternal) HorizontalLastActions() []datadoghq.DatadogPodAutoscalerHorizontalAction {
return p.horizontalLastActions
Expand Down Expand Up @@ -397,13 +437,15 @@ func (p *PodAutoscalerInternal) BuildStatus(currentTime metav1.Time, currentStat
status.CurrentReplicas = p.currentReplicas
}

activeScalingValues := p.getActiveScalingValues()

// Produce Horizontal status only if we have a desired number of replicas
if p.scalingValues.Horizontal != nil {
if activeScalingValues.Horizontal != nil {
status.Horizontal = &datadoghq.DatadogPodAutoscalerHorizontalStatus{
Target: &datadoghq.DatadogPodAutoscalerHorizontalTargetStatus{
Source: p.scalingValues.Horizontal.Source,
GeneratedAt: metav1.NewTime(p.scalingValues.Horizontal.Timestamp),
Replicas: p.scalingValues.Horizontal.Replicas,
Source: activeScalingValues.Horizontal.Source,
GeneratedAt: metav1.NewTime(activeScalingValues.Horizontal.Timestamp),
Replicas: activeScalingValues.Horizontal.Replicas,
},
}

Expand All @@ -418,15 +460,15 @@ func (p *PodAutoscalerInternal) BuildStatus(currentTime metav1.Time, currentStat
}

// Produce Vertical status only if we have a desired container resources
if p.scalingValues.Vertical != nil {
cpuReqSum, memReqSum := p.scalingValues.Vertical.SumCPUMemoryRequests()
if activeScalingValues.Vertical != nil {
cpuReqSum, memReqSum := activeScalingValues.Vertical.SumCPUMemoryRequests()

status.Vertical = &datadoghq.DatadogPodAutoscalerVerticalStatus{
Target: &datadoghq.DatadogPodAutoscalerVerticalTargetStatus{
Source: p.scalingValues.Vertical.Source,
GeneratedAt: metav1.NewTime(p.scalingValues.Vertical.Timestamp),
Version: p.scalingValues.Vertical.ResourcesHash,
DesiredResources: p.scalingValues.Vertical.ContainerResources,
Source: activeScalingValues.Vertical.Source,
GeneratedAt: metav1.NewTime(activeScalingValues.Vertical.Timestamp),
Version: activeScalingValues.Vertical.ResourcesHash,
DesiredResources: activeScalingValues.Vertical.ContainerResources,
Scaled: p.scaledReplicas,
PODCPURequest: cpuReqSum,
PODMemoryRequest: memReqSum,
Expand Down Expand Up @@ -458,7 +500,7 @@ func (p *PodAutoscalerInternal) BuildStatus(currentTime metav1.Time, currentStat
// Building global error condition
globalError := p.error
if p.error == nil {
globalError = p.scalingValues.Error
globalError = activeScalingValues.Error
}
status.Conditions = append(status.Conditions, newConditionFromError(true, currentTime, globalError, datadoghq.DatadogPodAutoscalerErrorCondition, existingConditions))

Expand All @@ -471,16 +513,16 @@ func (p *PodAutoscalerInternal) BuildStatus(currentTime metav1.Time, currentStat

// Building errors related to compute recommendations
var horizontalAbleToRecommend datadoghq.DatadogPodAutoscalerCondition
if p.scalingValues.HorizontalError != nil || p.scalingValues.Horizontal != nil {
horizontalAbleToRecommend = newConditionFromError(false, currentTime, p.scalingValues.HorizontalError, datadoghq.DatadogPodAutoscalerHorizontalAbleToRecommendCondition, existingConditions)
if activeScalingValues.HorizontalError != nil || activeScalingValues.Horizontal != nil {
horizontalAbleToRecommend = newConditionFromError(false, currentTime, activeScalingValues.HorizontalError, datadoghq.DatadogPodAutoscalerHorizontalAbleToRecommendCondition, existingConditions)
} else {
horizontalAbleToRecommend = newCondition(corev1.ConditionUnknown, "", currentTime, datadoghq.DatadogPodAutoscalerHorizontalAbleToRecommendCondition, existingConditions)
}
status.Conditions = append(status.Conditions, horizontalAbleToRecommend)

var verticalAbleToRecommend datadoghq.DatadogPodAutoscalerCondition
if p.scalingValues.VerticalError != nil || p.scalingValues.Vertical != nil {
verticalAbleToRecommend = newConditionFromError(false, currentTime, p.scalingValues.VerticalError, datadoghq.DatadogPodAutoscalerVerticalAbleToRecommendCondition, existingConditions)
if activeScalingValues.VerticalError != nil || activeScalingValues.Vertical != nil {
verticalAbleToRecommend = newConditionFromError(false, currentTime, activeScalingValues.VerticalError, datadoghq.DatadogPodAutoscalerVerticalAbleToRecommendCondition, existingConditions)
} else {
verticalAbleToRecommend = newCondition(corev1.ConditionUnknown, "", currentTime, datadoghq.DatadogPodAutoscalerVerticalAbleToRecommendCondition, existingConditions)
}
Expand Down Expand Up @@ -518,6 +560,13 @@ func (p *PodAutoscalerInternal) BuildStatus(currentTime metav1.Time, currentStat
}

// Private helpers
func (p *PodAutoscalerInternal) getActiveScalingValues() *ScalingValues {
if p.isLocalFallbackActive {
return &p.localScalingValues
}
return &p.scalingValues
}

func addHorizontalAction(currentTime time.Time, retention time.Duration, actions []datadoghq.DatadogPodAutoscalerHorizontalAction, action *datadoghq.DatadogPodAutoscalerHorizontalAction) []datadoghq.DatadogPodAutoscalerHorizontalAction {
if retention == 0 {
actions = actions[:0]
Expand Down
62 changes: 0 additions & 62 deletions pkg/clusteragent/autoscaling/workload/model/rc_schema.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,10 +8,6 @@
package model

import (
"time"

"k8s.io/apimachinery/pkg/api/resource"

kubeAutoscaling "github.com/DataDog/agent-payload/v5/autoscaling/kubernetes"
datadoghq "github.com/DataDog/datadog-operator/apis/datadoghq/v1alpha1"
)
Expand Down Expand Up @@ -41,61 +37,3 @@ type AutoscalingSettings struct {
// Spec is the full spec of the PodAutoscaler
Spec *datadoghq.DatadogPodAutoscalerSpec `json:"spec"`
}

// ScalingValues represents the scaling values (horizontal and vertical) for a target
type ScalingValues struct {
// HorizontalError refers to an error encountered by Datadog while computing the horizontal scaling values
HorizontalError error
Horizontal *HorizontalScalingValues

// VerticalError refers to an error encountered by Datadog while computing the vertical scaling values
VerticalError error
Vertical *VerticalScalingValues

// Error refers to a general error encountered by Datadog while computing the scaling values
Error error
}

// HorizontalScalingValues holds the horizontal scaling values for a target
type HorizontalScalingValues struct {
// Source is the source of the value
Source datadoghq.DatadogPodAutoscalerValueSource

// Timestamp is the time at which the data was generated
Timestamp time.Time

// Replicas is the desired number of replicas for the target
Replicas int32
}

// VerticalScalingValues holds the vertical scaling values for a target
type VerticalScalingValues struct {
// Source is the source of the value
Source datadoghq.DatadogPodAutoscalerValueSource

// Timestamp is the time at which the data was generated
Timestamp time.Time

// ResourcesHash is the hash of containerResources
ResourcesHash string

// ContainerResources holds the resources for a container
ContainerResources []datadoghq.DatadogPodAutoscalerContainerResources
}

// SumCPUMemoryRequests sums the CPU and memory requests of all containers
func (v *VerticalScalingValues) SumCPUMemoryRequests() (cpu, memory resource.Quantity) {
for _, container := range v.ContainerResources {
cpuReq := container.Requests.Cpu()
if cpuReq != nil {
cpu.Add(*cpuReq)
}

memoryReq := container.Requests.Memory()
if memoryReq != nil {
memory.Add(*memoryReq)
}
}

return
}
74 changes: 74 additions & 0 deletions pkg/clusteragent/autoscaling/workload/model/recommendations.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
// Unless explicitly stated otherwise all files in this repository are licensed
// under the Apache License Version 2.0.
// This product includes software developed at Datadog (https://www.datadoghq.com/).
// Copyright 2024-present Datadog, Inc.

//go:build kubeapiserver

package model

import (
"time"

"k8s.io/apimachinery/pkg/api/resource"

datadoghq "github.com/DataDog/datadog-operator/apis/datadoghq/v1alpha1"
)

// ScalingValues represents the scaling values (horizontal and vertical) for a target
type ScalingValues struct {
// HorizontalError refers to an error encountered by Datadog while computing the horizontal scaling values
HorizontalError error
Horizontal *HorizontalScalingValues

// VerticalError refers to an error encountered by Datadog while computing the vertical scaling values
VerticalError error
Vertical *VerticalScalingValues

// Error refers to a general error encountered by Datadog while computing the scaling values
Error error
}

// HorizontalScalingValues holds the horizontal scaling values for a target
type HorizontalScalingValues struct {
// Source is the source of the value
Source datadoghq.DatadogPodAutoscalerValueSource

// Timestamp is the time at which the data was generated
Timestamp time.Time

// Replicas is the desired number of replicas for the target
Replicas int32
}

// VerticalScalingValues holds the vertical scaling values for a target
type VerticalScalingValues struct {
// Source is the source of the value
Source datadoghq.DatadogPodAutoscalerValueSource

// Timestamp is the time at which the data was generated
Timestamp time.Time

// ResourcesHash is the hash of containerResources
ResourcesHash string

// ContainerResources holds the resources for a container
ContainerResources []datadoghq.DatadogPodAutoscalerContainerResources
}

// SumCPUMemoryRequests sums the CPU and memory requests of all containers
func (v *VerticalScalingValues) SumCPUMemoryRequests() (cpu, memory resource.Quantity) {
for _, container := range v.ContainerResources {
cpuReq := container.Requests.Cpu()
if cpuReq != nil {
cpu.Add(*cpuReq)
}

memoryReq := container.Requests.Memory()
if memoryReq != nil {
memory.Add(*memoryReq)
}
}

return
}
Loading