Skip to content

Commit

Permalink
feat: add support for Passive Health Checks (Outlier Detection) (#2556)
Browse files Browse the repository at this point in the history
* feat: add support for Passive Health Checks (Outlier Detection)

Signed-off-by: yeedove <yeedove@gmail.com>

* fix lint

Signed-off-by: yeedove <yeedove@gmail.com>

* fix gen

* use ptr type for the optional

Signed-off-by: yeedove <yeedove@gmail.com>

---------

Signed-off-by: yeedove <yeedove@gmail.com>
  • Loading branch information
deszhou authored Feb 12, 2024
1 parent 2c1b946 commit da092e0
Show file tree
Hide file tree
Showing 13 changed files with 716 additions and 228 deletions.
56 changes: 55 additions & 1 deletion api/v1alpha1/healthcheck_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,60 @@ type HealthCheck struct {
// Active health check configuration
// +optional
Active *ActiveHealthCheck `json:"active,omitempty"`

// Passive passive check configuration
// +optional
Passive *PassiveHealthCheck `json:"passive,omitempty"`
}

// PassiveHealthCheck defines the configuration for passive health checks in the context of Envoy's Outlier Detection,
// see https://www.envoyproxy.io/docs/envoy/latest/intro/arch_overview/upstream/outlier
type PassiveHealthCheck struct {

// SplitExternalLocalOriginErrors enables splitting of errors between external and local origin.
//
// +kubebuilder:default=false
// +optional
SplitExternalLocalOriginErrors *bool `json:"splitExternalLocalOriginErrors,omitempty"`

// Interval defines the time between passive health checks.
//
// +kubebuilder:validation:Format=duration
// +kubebuilder:default="3s"
// +optional
Interval *metav1.Duration `json:"interval,omitempty"`

// ConsecutiveLocalOriginFailures sets the number of consecutive local origin failures triggering ejection.
// Parameter takes effect only when split_external_local_origin_errors is set to true.
//
// +kubebuilder:default=5
// +optional
ConsecutiveLocalOriginFailures *uint32 `json:"consecutiveLocalOriginFailures,omitempty"`

// ConsecutiveGatewayErrors sets the number of consecutive gateway errors triggering ejection.
//
// +kubebuilder:default=0
// +optional
ConsecutiveGatewayErrors *uint32 `json:"consecutiveGatewayErrors,omitempty"`

// Consecutive5xxErrors sets the number of consecutive 5xx errors triggering ejection.
//
// +kubebuilder:default=5
// +optional
Consecutive5xxErrors *uint32 `json:"consecutive5XxErrors,omitempty"`

// BaseEjectionTime defines the base duration for which a host will be ejected on consecutive failures.
//
// +kubebuilder:validation:Format=duration
// +kubebuilder:default="30s"
// +optional
BaseEjectionTime *metav1.Duration `json:"baseEjectionTime,omitempty"`

// MaxEjectionPercent sets the maximum percentage of hosts in a cluster that can be ejected.
//
// +kubebuilder:default=10
// +optional
MaxEjectionPercent *int32 `json:"maxEjectionPercent,omitempty"`
}

// ActiveHealthCheck defines the active health check configuration.
Expand All @@ -29,7 +83,7 @@ type ActiveHealthCheck struct {
// +optional
Timeout *metav1.Duration `json:"timeout"`

// Interval defines the time between health checks.
// Interval defines the time between active health checks.
//
// +kubebuilder:validation:Format=duration
// +kubebuilder:default="3s"
Expand Down
55 changes: 55 additions & 0 deletions api/v1alpha1/zz_generated.deepcopy.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Original file line number Diff line number Diff line change
Expand Up @@ -234,7 +234,8 @@ spec:
type: object
interval:
default: 3s
description: Interval defines the time between health checks.
description: Interval defines the time between active health
checks.
format: duration
type: string
tcp:
Expand Down Expand Up @@ -337,6 +338,53 @@ spec:
- message: If Health Checker type is TCP, tcp field needs to be
set.
rule: 'self.type == ''TCP'' ? has(self.tcp) : !has(self.tcp)'
passive:
description: Passive passive check configuration
properties:
baseEjectionTime:
default: 30s
description: BaseEjectionTime defines the base duration for
which a host will be ejected on consecutive failures.
format: duration
type: string
consecutive5XxErrors:
default: 5
description: Consecutive5xxErrors sets the number of consecutive
5xx errors triggering ejection.
format: int32
type: integer
consecutiveGatewayErrors:
default: 0
description: ConsecutiveGatewayErrors sets the number of consecutive
gateway errors triggering ejection.
format: int32
type: integer
consecutiveLocalOriginFailures:
default: 5
description: ConsecutiveLocalOriginFailures sets the number
of consecutive local origin failures triggering ejection.
Parameter takes effect only when split_external_local_origin_errors
is set to true.
format: int32
type: integer
interval:
default: 3s
description: Interval defines the time between passive health
checks.
format: duration
type: string
maxEjectionPercent:
default: 10
description: MaxEjectionPercent sets the maximum percentage
of hosts in a cluster that can be ejected.
format: int32
type: integer
splitExternalLocalOriginErrors:
default: false
description: SplitExternalLocalOriginErrors enables splitting
of errors between external and local origin.
type: boolean
type: object
type: object
loadBalancer:
description: LoadBalancer policy to apply when routing traffic from
Expand Down
34 changes: 33 additions & 1 deletion internal/gatewayapi/backendtrafficpolicy.go
Original file line number Diff line number Diff line change
Expand Up @@ -699,12 +699,44 @@ func (t *Translator) buildProxyProtocol(policy *egv1a1.BackendTrafficPolicy) *ir
}

func (t *Translator) buildHealthCheck(policy *egv1a1.BackendTrafficPolicy) *ir.HealthCheck {
if policy.Spec.HealthCheck == nil {
return nil
}
irhc := &ir.HealthCheck{}
if policy.Spec.HealthCheck.Passive != nil {
irhc.Passive = t.buildPassiveHealthCheck(policy)
}
if policy.Spec.HealthCheck.Active != nil {
irhc.Active = t.buildActiveHealthCheck(policy)
}
return irhc
}

func (t *Translator) buildPassiveHealthCheck(policy *egv1a1.BackendTrafficPolicy) *ir.OutlierDetection {
if policy.Spec.HealthCheck == nil || policy.Spec.HealthCheck.Passive == nil {
return nil
}

hc := policy.Spec.HealthCheck.Passive
irOD := &ir.OutlierDetection{
Interval: hc.Interval,
SplitExternalLocalOriginErrors: hc.SplitExternalLocalOriginErrors,
ConsecutiveLocalOriginFailures: hc.ConsecutiveLocalOriginFailures,
ConsecutiveGatewayErrors: hc.ConsecutiveGatewayErrors,
Consecutive5xxErrors: hc.Consecutive5xxErrors,
BaseEjectionTime: hc.BaseEjectionTime,
MaxEjectionPercent: hc.MaxEjectionPercent,
}
return irOD
}

func (t *Translator) buildActiveHealthCheck(policy *egv1a1.BackendTrafficPolicy) *ir.ActiveHealthCheck {
if policy.Spec.HealthCheck == nil || policy.Spec.HealthCheck.Active == nil {
return nil
}

hc := policy.Spec.HealthCheck.Active
irHC := &ir.HealthCheck{
irHC := &ir.ActiveHealthCheck{
Timeout: hc.Timeout,
Interval: hc.Interval,
UnhealthyThreshold: hc.UnhealthyThreshold,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -128,6 +128,10 @@ backendTrafficPolicies:
expectedResponse:
type: Binary
binary: RXZlcnl0aGluZyBPSw==
passive:
baseEjectionTime: 160s
interval: 2s
maxEjectionPercent: 100
- apiVersion: gateway.envoyproxy.io/v1alpha1
kind: BackendTrafficPolicy
metadata:
Expand Down Expand Up @@ -155,6 +159,10 @@ backendTrafficPolicies:
expectedResponse:
type: Text
text: pong
passive:
baseEjectionTime: 150s
interval: 1s
maxEjectionPercent: 100
- apiVersion: gateway.envoyproxy.io/v1alpha1
kind: BackendTrafficPolicy
metadata:
Expand All @@ -180,6 +188,10 @@ backendTrafficPolicies:
receive:
type: Text
text: pong
passive:
baseEjectionTime: 180s
interval: 1s
maxEjectionPercent: 100
- apiVersion: gateway.envoyproxy.io/v1alpha1
kind: BackendTrafficPolicy
metadata:
Expand All @@ -205,3 +217,7 @@ backendTrafficPolicies:
receive:
type: Binary
binary: RXZlcnl0aGluZyBPSw==
passive:
baseEjectionTime: 160s
interval: 8ms
maxEjectionPercent: 11
Loading

0 comments on commit da092e0

Please sign in to comment.