From d407e0ffb6516cbc623292c1aaedf2d0a16af134 Mon Sep 17 00:00:00 2001 From: Guy Daich Date: Thu, 22 Feb 2024 20:26:10 -0600 Subject: [PATCH] feat(translator): Implement Retry in BTP (#2678) * Implement Retries Signed-off-by: Guy Daich * fix linting Signed-off-by: Guy Daich * add tests Signed-off-by: Guy Daich --------- Signed-off-by: Guy Daich --- api/v1alpha1/retry_types.go | 1 + ....envoyproxy.io_backendtrafficpolicies.yaml | 3 +- internal/gatewayapi/backendtrafficpolicy.go | 99 ++++++ internal/gatewayapi/route.go | 1 + .../backendtrafficpolicy-with-retries.in.yaml | 110 ++++++ ...backendtrafficpolicy-with-retries.out.yaml | 333 ++++++++++++++++++ internal/ir/xds.go | 57 +++ internal/ir/zz_generated.deepcopy.go | 110 ++++++ internal/xds/translator/route.go | 122 +++++++ .../in/xds-ir/retry-partial-invalid.yaml | 53 +++ .../retry-partial-invalid.clusters.yaml | 14 + .../retry-partial-invalid.endpoints.yaml | 12 + .../retry-partial-invalid.listeners.yaml | 35 ++ .../xds-ir/retry-partial-invalid.routes.yaml | 39 ++ internal/xds/translator/translator_test.go | 3 + site/content/en/latest/api/extension_types.md | 2 +- 16 files changed, 992 insertions(+), 2 deletions(-) create mode 100644 internal/gatewayapi/testdata/backendtrafficpolicy-with-retries.in.yaml create mode 100644 internal/gatewayapi/testdata/backendtrafficpolicy-with-retries.out.yaml create mode 100644 internal/xds/translator/testdata/in/xds-ir/retry-partial-invalid.yaml create mode 100644 internal/xds/translator/testdata/out/xds-ir/retry-partial-invalid.clusters.yaml create mode 100644 internal/xds/translator/testdata/out/xds-ir/retry-partial-invalid.endpoints.yaml create mode 100644 internal/xds/translator/testdata/out/xds-ir/retry-partial-invalid.listeners.yaml create mode 100644 internal/xds/translator/testdata/out/xds-ir/retry-partial-invalid.routes.yaml diff --git a/api/v1alpha1/retry_types.go b/api/v1alpha1/retry_types.go index 1752806e830b..1fea2d6d93b4 100644 --- a/api/v1alpha1/retry_types.go +++ b/api/v1alpha1/retry_types.go @@ -37,6 +37,7 @@ type RetryOn struct { Triggers []TriggerEnum `json:"triggers,omitempty"` // HttpStatusCodes specifies the http status codes to be retried. + // The retriable-status-codes trigger must also be configured for these status codes to trigger a retry. // // +optional HTTPStatusCodes []HTTPStatus `json:"httpStatusCodes,omitempty"` diff --git a/charts/gateway-helm/crds/generated/gateway.envoyproxy.io_backendtrafficpolicies.yaml b/charts/gateway-helm/crds/generated/gateway.envoyproxy.io_backendtrafficpolicies.yaml index 2ba4265aeecd..680f1b19e9fb 100644 --- a/charts/gateway-helm/crds/generated/gateway.envoyproxy.io_backendtrafficpolicies.yaml +++ b/charts/gateway-helm/crds/generated/gateway.envoyproxy.io_backendtrafficpolicies.yaml @@ -786,7 +786,8 @@ spec: properties: httpStatusCodes: description: HttpStatusCodes specifies the http status codes - to be retried. + to be retried. The retriable-status-codes trigger must also + be configured for these status codes to trigger a retry. items: description: HTTPStatus defines the http status code. exclusiveMaximum: true diff --git a/internal/gatewayapi/backendtrafficpolicy.go b/internal/gatewayapi/backendtrafficpolicy.go index d5d71b1fd38e..3f21a987c2fd 100644 --- a/internal/gatewayapi/backendtrafficpolicy.go +++ b/internal/gatewayapi/backendtrafficpolicy.go @@ -251,6 +251,7 @@ func (t *Translator) translateBackendTrafficPolicyForRoute(policy *egv1a1.Backen fi *ir.FaultInjection to *ir.Timeout ka *ir.TCPKeepalive + rt *ir.Retry ) // Build IR @@ -276,6 +277,9 @@ func (t *Translator) translateBackendTrafficPolicyForRoute(policy *egv1a1.Backen if policy.Spec.TCPKeepalive != nil { ka = t.buildTCPKeepAlive(policy) } + if policy.Spec.Retry != nil { + rt = t.buildRetry(policy) + } // Apply IR to all relevant routes prefix := irRoutePrefix(route) for _, ir := range xdsIR { @@ -290,6 +294,7 @@ func (t *Translator) translateBackendTrafficPolicyForRoute(policy *egv1a1.Backen r.CircuitBreaker = cb r.FaultInjection = fi r.TCPKeepalive = ka + r.Retry = rt // some timeout setting originate from the route if policy.Spec.Timeout != nil { @@ -313,6 +318,7 @@ func (t *Translator) translateBackendTrafficPolicyForGateway(policy *egv1a1.Back fi *ir.FaultInjection ct *ir.Timeout ka *ir.TCPKeepalive + rt *ir.Retry ) // Build IR @@ -337,6 +343,9 @@ func (t *Translator) translateBackendTrafficPolicyForGateway(policy *egv1a1.Back if policy.Spec.TCPKeepalive != nil { ka = t.buildTCPKeepAlive(policy) } + if policy.Spec.Retry != nil { + rt = t.buildRetry(policy) + } // Apply IR to all the routes within the specific Gateway // If the feature is already set, then skip it, since it must be have @@ -369,6 +378,9 @@ func (t *Translator) translateBackendTrafficPolicyForGateway(policy *egv1a1.Back if r.TCPKeepalive == nil { r.TCPKeepalive = ka } + if r.Retry == nil { + r.Retry = rt + } if policy.Spec.Timeout != nil { ct = t.buildTimeout(policy, r) @@ -1016,3 +1028,90 @@ func (t *Translator) buildTCPKeepAlive(policy *egv1a1.BackendTrafficPolicy) *ir. } return ka } + +func (t *Translator) buildRetry(policy *egv1a1.BackendTrafficPolicy) *ir.Retry { + var rt *ir.Retry + if policy.Spec.Retry != nil { + prt := policy.Spec.Retry + rt = &ir.Retry{} + + if prt.NumRetries != nil { + rt.NumRetries = ptr.To(uint32(*prt.NumRetries)) + } + + if prt.RetryOn != nil { + ro := &ir.RetryOn{} + bro := false + if prt.RetryOn.HTTPStatusCodes != nil { + ro.HTTPStatusCodes = makeIrStatusSet(prt.RetryOn.HTTPStatusCodes) + bro = true + } + + if prt.RetryOn.Triggers != nil { + ro.Triggers = makeIrTriggerSet(prt.RetryOn.Triggers) + bro = true + } + + if bro { + rt.RetryOn = ro + } + } + + if prt.PerRetry != nil { + pr := &ir.PerRetryPolicy{} + bpr := false + + if prt.PerRetry.Timeout != nil { + pr.Timeout = prt.PerRetry.Timeout + bpr = true + } + + if prt.PerRetry.BackOff != nil { + if prt.PerRetry.BackOff.MaxInterval != nil || prt.PerRetry.BackOff.BaseInterval != nil { + bop := &ir.BackOffPolicy{} + if prt.PerRetry.BackOff.MaxInterval != nil { + bop.MaxInterval = prt.PerRetry.BackOff.MaxInterval + } + + if prt.PerRetry.BackOff.BaseInterval != nil { + bop.BaseInterval = prt.PerRetry.BackOff.BaseInterval + } + pr.BackOff = bop + bpr = true + } + } + + if bpr { + rt.PerRetry = pr + } + } + } + + return rt +} + +func makeIrStatusSet(in []egv1a1.HTTPStatus) []ir.HTTPStatus { + var irStatuses []ir.HTTPStatus + // deduplicate http statuses + statusSet := make(map[egv1a1.HTTPStatus]bool, len(in)) + for _, r := range in { + if _, ok := statusSet[r]; !ok { + statusSet[r] = true + irStatuses = append(irStatuses, ir.HTTPStatus(r)) + } + } + return irStatuses +} + +func makeIrTriggerSet(in []egv1a1.TriggerEnum) []ir.TriggerEnum { + var irTriggers []ir.TriggerEnum + // deduplicate http statuses + triggerSet := make(map[egv1a1.TriggerEnum]bool, len(in)) + for _, r := range in { + if _, ok := triggerSet[r]; !ok { + triggerSet[r] = true + irTriggers = append(irTriggers, ir.TriggerEnum(r)) + } + } + return irTriggers +} diff --git a/internal/gatewayapi/route.go b/internal/gatewayapi/route.go index b7e082c849db..fc4dac08cf52 100644 --- a/internal/gatewayapi/route.go +++ b/internal/gatewayapi/route.go @@ -657,6 +657,7 @@ func (t *Translator) processHTTPRouteParentRefListener(route RouteContext, route Mirrors: routeRoute.Mirrors, ExtensionRefs: routeRoute.ExtensionRefs, Timeout: routeRoute.Timeout, + Retry: routeRoute.Retry, } // Don't bother copying over the weights unless the route has invalid backends. if routeRoute.BackendWeights.Invalid > 0 { diff --git a/internal/gatewayapi/testdata/backendtrafficpolicy-with-retries.in.yaml b/internal/gatewayapi/testdata/backendtrafficpolicy-with-retries.in.yaml new file mode 100644 index 000000000000..6ed617254cc6 --- /dev/null +++ b/internal/gatewayapi/testdata/backendtrafficpolicy-with-retries.in.yaml @@ -0,0 +1,110 @@ +gateways: +- apiVersion: gateway.networking.k8s.io/v1 + kind: Gateway + metadata: + namespace: envoy-gateway + name: gateway-1 + spec: + gatewayClassName: envoy-gateway-class + listeners: + - name: http + protocol: HTTP + port: 80 + allowedRoutes: + namespaces: + from: All +- apiVersion: gateway.networking.k8s.io/v1 + kind: Gateway + metadata: + namespace: envoy-gateway + name: gateway-2 + spec: + gatewayClassName: envoy-gateway-class + listeners: + - name: http + protocol: HTTP + port: 80 + allowedRoutes: + namespaces: + from: All +grpcRoutes: +- apiVersion: gateway.networking.k8s.io/v1alpha2 + kind: GRPCRoute + metadata: + namespace: default + name: grpcroute-1 + spec: + parentRefs: + - namespace: envoy-gateway + name: gateway-1 + sectionName: http + rules: + - backendRefs: + - name: service-1 + port: 8080 +httpRoutes: +- apiVersion: gateway.networking.k8s.io/v1 + kind: HTTPRoute + metadata: + namespace: default + name: httproute-1 + spec: + hostnames: + - gateway.envoyproxy.io + parentRefs: + - namespace: envoy-gateway + name: gateway-2 + sectionName: http + rules: + - matches: + - path: + value: "/" + backendRefs: + - name: service-1 + port: 8080 +backendTrafficPolicies: +- apiVersion: gateway.envoyproxy.io/v1alpha1 + kind: BackendTrafficPolicy + metadata: + namespace: envoy-gateway + name: policy-for-gateway + spec: + targetRef: + group: gateway.networking.k8s.io + kind: Gateway + name: gateway-1 + namespace: envoy-gateway + retry: + retryOn: + triggers: + - cancelled + perRetry: + timeout: 250ms + backoff: + baseInterval: 100ms + maxInterval: 10s +- apiVersion: gateway.envoyproxy.io/v1alpha1 + kind: BackendTrafficPolicy + metadata: + namespace: default + name: policy-for-route + spec: + targetRef: + group: gateway.networking.k8s.io + kind: HTTPRoute + name: httproute-1 + namespace: default + retry: + numRetries: 5 + retryOn: + httpStatusCodes: + - 429 + - 503 + triggers: + - connect-failure + - retriable-status-codes + perRetry: + timeout: 250ms + backoff: + baseInterval: 100ms + maxInterval: 10s diff --git a/internal/gatewayapi/testdata/backendtrafficpolicy-with-retries.out.yaml b/internal/gatewayapi/testdata/backendtrafficpolicy-with-retries.out.yaml new file mode 100644 index 000000000000..76347ce5b792 --- /dev/null +++ b/internal/gatewayapi/testdata/backendtrafficpolicy-with-retries.out.yaml @@ -0,0 +1,333 @@ +backendTrafficPolicies: +- apiVersion: gateway.envoyproxy.io/v1alpha1 + kind: BackendTrafficPolicy + metadata: + creationTimestamp: null + name: policy-for-route + namespace: default + spec: + retry: + numRetries: 5 + perRetry: + backOff: + baseInterval: 100ms + maxInterval: 10s + timeout: 250ms + retryOn: + httpStatusCodes: + - 429 + - 503 + triggers: + - connect-failure + - retriable-status-codes + targetRef: + group: gateway.networking.k8s.io + kind: HTTPRoute + name: httproute-1 + namespace: default + status: + conditions: + - lastTransitionTime: null + message: BackendTrafficPolicy has been accepted. + reason: Accepted + status: "True" + type: Accepted +- apiVersion: gateway.envoyproxy.io/v1alpha1 + kind: BackendTrafficPolicy + metadata: + creationTimestamp: null + name: policy-for-gateway + namespace: envoy-gateway + spec: + retry: + perRetry: + backOff: + baseInterval: 100ms + maxInterval: 10s + timeout: 250ms + retryOn: + triggers: + - cancelled + targetRef: + group: gateway.networking.k8s.io + kind: Gateway + name: gateway-1 + namespace: envoy-gateway + status: + conditions: + - lastTransitionTime: null + message: BackendTrafficPolicy has been accepted. + reason: Accepted + status: "True" + type: Accepted +gateways: +- apiVersion: gateway.networking.k8s.io/v1 + kind: Gateway + metadata: + creationTimestamp: null + name: gateway-1 + namespace: envoy-gateway + spec: + gatewayClassName: envoy-gateway-class + listeners: + - allowedRoutes: + namespaces: + from: All + name: http + port: 80 + protocol: HTTP + status: + listeners: + - attachedRoutes: 1 + conditions: + - lastTransitionTime: null + message: Sending translated listener configuration to the data plane + reason: Programmed + status: "True" + type: Programmed + - lastTransitionTime: null + message: Listener has been successfully translated + reason: Accepted + status: "True" + type: Accepted + - lastTransitionTime: null + message: Listener references have been resolved + reason: ResolvedRefs + status: "True" + type: ResolvedRefs + name: http + supportedKinds: + - group: gateway.networking.k8s.io + kind: HTTPRoute + - group: gateway.networking.k8s.io + kind: GRPCRoute +- apiVersion: gateway.networking.k8s.io/v1 + kind: Gateway + metadata: + creationTimestamp: null + name: gateway-2 + namespace: envoy-gateway + spec: + gatewayClassName: envoy-gateway-class + listeners: + - allowedRoutes: + namespaces: + from: All + name: http + port: 80 + protocol: HTTP + status: + listeners: + - attachedRoutes: 1 + conditions: + - lastTransitionTime: null + message: Sending translated listener configuration to the data plane + reason: Programmed + status: "True" + type: Programmed + - lastTransitionTime: null + message: Listener has been successfully translated + reason: Accepted + status: "True" + type: Accepted + - lastTransitionTime: null + message: Listener references have been resolved + reason: ResolvedRefs + status: "True" + type: ResolvedRefs + name: http + supportedKinds: + - group: gateway.networking.k8s.io + kind: HTTPRoute + - group: gateway.networking.k8s.io + kind: GRPCRoute +grpcRoutes: +- apiVersion: gateway.networking.k8s.io/v1alpha2 + kind: GRPCRoute + metadata: + creationTimestamp: null + name: grpcroute-1 + namespace: default + spec: + parentRefs: + - name: gateway-1 + namespace: envoy-gateway + sectionName: http + rules: + - backendRefs: + - name: service-1 + port: 8080 + status: + parents: + - conditions: + - lastTransitionTime: null + message: Route is accepted + reason: Accepted + status: "True" + type: Accepted + - lastTransitionTime: null + message: Resolved all the Object references for the Route + reason: ResolvedRefs + status: "True" + type: ResolvedRefs + controllerName: gateway.envoyproxy.io/gatewayclass-controller + parentRef: + name: gateway-1 + namespace: envoy-gateway + sectionName: http +httpRoutes: +- apiVersion: gateway.networking.k8s.io/v1 + kind: HTTPRoute + metadata: + creationTimestamp: null + name: httproute-1 + namespace: default + spec: + hostnames: + - gateway.envoyproxy.io + parentRefs: + - name: gateway-2 + namespace: envoy-gateway + sectionName: http + rules: + - backendRefs: + - name: service-1 + port: 8080 + matches: + - path: + value: / + status: + parents: + - conditions: + - lastTransitionTime: null + message: Route is accepted + reason: Accepted + status: "True" + type: Accepted + - lastTransitionTime: null + message: Resolved all the Object references for the Route + reason: ResolvedRefs + status: "True" + type: ResolvedRefs + controllerName: gateway.envoyproxy.io/gatewayclass-controller + parentRef: + name: gateway-2 + namespace: envoy-gateway + sectionName: http +infraIR: + envoy-gateway/gateway-1: + proxy: + listeners: + - address: null + name: envoy-gateway/gateway-1/http + ports: + - containerPort: 10080 + name: http + protocol: HTTP + servicePort: 80 + metadata: + labels: + gateway.envoyproxy.io/owning-gateway-name: gateway-1 + gateway.envoyproxy.io/owning-gateway-namespace: envoy-gateway + name: envoy-gateway/gateway-1 + envoy-gateway/gateway-2: + proxy: + listeners: + - address: null + name: envoy-gateway/gateway-2/http + ports: + - containerPort: 10080 + name: http + protocol: HTTP + servicePort: 80 + metadata: + labels: + gateway.envoyproxy.io/owning-gateway-name: gateway-2 + gateway.envoyproxy.io/owning-gateway-namespace: envoy-gateway + name: envoy-gateway/gateway-2 +xdsIR: + envoy-gateway/gateway-1: + accessLog: + text: + - path: /dev/stdout + http: + - address: 0.0.0.0 + hostnames: + - '*' + isHTTP2: true + name: envoy-gateway/gateway-1/http + path: + escapedSlashesAction: UnescapeAndRedirect + mergeSlashes: true + port: 10080 + routes: + - backendWeights: + invalid: 0 + valid: 0 + destination: + name: grpcroute/default/grpcroute-1/rule/0 + settings: + - addressType: IP + endpoints: + - host: 7.7.7.7 + port: 8080 + protocol: GRPC + weight: 1 + hostname: '*' + name: grpcroute/default/grpcroute-1/rule/0/match/-1/* + retry: + perRetry: + backOff: + baseInterval: 100ms + maxInterval: 10s + timeout: 250ms + retryOn: + triggers: + - cancelled + envoy-gateway/gateway-2: + accessLog: + text: + - path: /dev/stdout + http: + - address: 0.0.0.0 + hostnames: + - '*' + isHTTP2: false + name: envoy-gateway/gateway-2/http + path: + escapedSlashesAction: UnescapeAndRedirect + mergeSlashes: true + port: 10080 + routes: + - backendWeights: + invalid: 0 + valid: 0 + destination: + name: httproute/default/httproute-1/rule/0 + settings: + - addressType: IP + endpoints: + - host: 7.7.7.7 + port: 8080 + protocol: HTTP + weight: 1 + hostname: gateway.envoyproxy.io + name: httproute/default/httproute-1/rule/0/match/0/gateway_envoyproxy_io + pathMatch: + distinct: false + name: "" + prefix: / + retry: + numRetries: 5 + perRetry: + backOff: + baseInterval: 100ms + maxInterval: 10s + timeout: 250ms + retryOn: + httpStatusCodes: + - 429 + - 503 + triggers: + - connect-failure + - retriable-status-codes diff --git a/internal/ir/xds.go b/internal/ir/xds.go index bb2a9d360965..b6894482cac2 100644 --- a/internal/ir/xds.go +++ b/internal/ir/xds.go @@ -468,6 +468,8 @@ type HTTPRoute struct { Timeout *Timeout `json:"timeout,omitempty" yaml:"timeout,omitempty"` // TcpKeepalive settings associated with the upstream client connection. TCPKeepalive *TCPKeepalive `json:"tcpKeepalive,omitempty" yaml:"tcpKeepalive,omitempty"` + // Retry settings + Retry *Retry `json:"retry,omitempty" yaml:"retry,omitempty"` } // UnstructuredRef holds unstructured data for an arbitrary k8s resource introduced by an extension @@ -1674,3 +1676,58 @@ type HTTPTimeout struct { // The maximum duration of an HTTP connection. MaxConnectionDuration *metav1.Duration `json:"maxConnectionDuration,omitempty" yaml:"maxConnectionDuration,omitempty"` } + +// Retry define the retry policy configuration. +// +k8s:deepcopy-gen=true +type Retry struct { + // NumRetries is the number of retries to be attempted. Defaults to 2. + NumRetries *uint32 `json:"numRetries,omitempty"` + + // RetryOn specifies the retry trigger condition. + RetryOn *RetryOn `json:"retryOn,omitempty"` + + // PerRetry is the retry policy to be applied per retry attempt. + PerRetry *PerRetryPolicy `json:"perRetry,omitempty"` +} + +type TriggerEnum egv1a1.TriggerEnum + +const ( + Error5XX = TriggerEnum(egv1a1.Error5XX) + GatewayError = TriggerEnum(egv1a1.GatewayError) + DisconnectRest = TriggerEnum(egv1a1.DisconnectRest) + ConnectFailure = TriggerEnum(egv1a1.ConnectFailure) + Retriable4XX = TriggerEnum(egv1a1.Retriable4XX) + RefusedStream = TriggerEnum(egv1a1.RefusedStream) + RetriableStatusCodes = TriggerEnum(egv1a1.RetriableStatusCodes) + Cancelled = TriggerEnum(egv1a1.Cancelled) + DeadlineExceeded = TriggerEnum(egv1a1.DeadlineExceeded) + Internal = TriggerEnum(egv1a1.Internal) + ResourceExhausted = TriggerEnum(egv1a1.ResourceExhausted) + Unavailable = TriggerEnum(egv1a1.Unavailable) +) + +// +k8s:deepcopy-gen=true +type RetryOn struct { + // Triggers specifies the retry trigger condition(Http/Grpc). + Triggers []TriggerEnum `json:"triggers,omitempty"` + + // HttpStatusCodes specifies the http status codes to be retried. + HTTPStatusCodes []HTTPStatus `json:"httpStatusCodes,omitempty"` +} + +// +k8s:deepcopy-gen=true +type PerRetryPolicy struct { + // Timeout is the timeout per retry attempt. + Timeout *metav1.Duration `json:"timeout,omitempty"` + // Backoff is the backoff policy to be applied per retry attempt. + BackOff *BackOffPolicy `json:"backOff,omitempty"` +} + +// +k8s:deepcopy-gen=true +type BackOffPolicy struct { + // BaseInterval is the base interval between retries. + BaseInterval *metav1.Duration `json:"baseInterval,omitempty"` + // MaxInterval is the maximum interval between retries. + MaxInterval *metav1.Duration `json:"maxInterval,omitempty"` +} diff --git a/internal/ir/zz_generated.deepcopy.go b/internal/ir/zz_generated.deepcopy.go index 1f07711beb17..52923b43f8ce 100644 --- a/internal/ir/zz_generated.deepcopy.go +++ b/internal/ir/zz_generated.deepcopy.go @@ -123,6 +123,31 @@ func (in *AddHeader) DeepCopy() *AddHeader { return out } +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *BackOffPolicy) DeepCopyInto(out *BackOffPolicy) { + *out = *in + if in.BaseInterval != nil { + in, out := &in.BaseInterval, &out.BaseInterval + *out = new(v1.Duration) + **out = **in + } + if in.MaxInterval != nil { + in, out := &in.MaxInterval, &out.MaxInterval + *out = new(v1.Duration) + **out = **in + } +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new BackOffPolicy. +func (in *BackOffPolicy) DeepCopy() *BackOffPolicy { + if in == nil { + return nil + } + out := new(BackOffPolicy) + in.DeepCopyInto(out) + return out +} + // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *BasicAuth) DeepCopyInto(out *BasicAuth) { *out = *in @@ -920,6 +945,11 @@ func (in *HTTPRoute) DeepCopyInto(out *HTTPRoute) { *out = new(TCPKeepalive) (*in).DeepCopyInto(*out) } + if in.Retry != nil { + in, out := &in.Retry, &out.Retry + *out = new(Retry) + (*in).DeepCopyInto(*out) + } } // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new HTTPRoute. @@ -1398,6 +1428,31 @@ func (in *PathSettings) DeepCopy() *PathSettings { return out } +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *PerRetryPolicy) DeepCopyInto(out *PerRetryPolicy) { + *out = *in + if in.Timeout != nil { + in, out := &in.Timeout, &out.Timeout + *out = new(v1.Duration) + **out = **in + } + if in.BackOff != nil { + in, out := &in.BackOff, &out.BackOff + *out = new(BackOffPolicy) + (*in).DeepCopyInto(*out) + } +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new PerRetryPolicy. +func (in *PerRetryPolicy) DeepCopy() *PerRetryPolicy { + if in == nil { + return nil + } + out := new(PerRetryPolicy) + in.DeepCopyInto(out) + return out +} + // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *ProxyInfra) DeepCopyInto(out *ProxyInfra) { *out = *in @@ -1611,6 +1666,61 @@ func (in *Redirect) DeepCopy() *Redirect { return out } +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *Retry) DeepCopyInto(out *Retry) { + *out = *in + if in.NumRetries != nil { + in, out := &in.NumRetries, &out.NumRetries + *out = new(uint32) + **out = **in + } + if in.RetryOn != nil { + in, out := &in.RetryOn, &out.RetryOn + *out = new(RetryOn) + (*in).DeepCopyInto(*out) + } + if in.PerRetry != nil { + in, out := &in.PerRetry, &out.PerRetry + *out = new(PerRetryPolicy) + (*in).DeepCopyInto(*out) + } +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new Retry. +func (in *Retry) DeepCopy() *Retry { + if in == nil { + return nil + } + out := new(Retry) + in.DeepCopyInto(out) + return out +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *RetryOn) DeepCopyInto(out *RetryOn) { + *out = *in + if in.Triggers != nil { + in, out := &in.Triggers, &out.Triggers + *out = make([]TriggerEnum, len(*in)) + copy(*out, *in) + } + if in.HTTPStatusCodes != nil { + in, out := &in.HTTPStatusCodes, &out.HTTPStatusCodes + *out = make([]HTTPStatus, len(*in)) + copy(*out, *in) + } +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new RetryOn. +func (in *RetryOn) DeepCopy() *RetryOn { + if in == nil { + return nil + } + out := new(RetryOn) + in.DeepCopyInto(out) + return out +} + // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *RoundRobin) DeepCopyInto(out *RoundRobin) { *out = *in diff --git a/internal/xds/translator/route.go b/internal/xds/translator/route.go index 5dd189ffa32f..c711e64daf5e 100644 --- a/internal/xds/translator/route.go +++ b/internal/xds/translator/route.go @@ -6,6 +6,7 @@ package translator import ( + "errors" "strings" corev3 "github.com/envoyproxy/go-control-plane/envoy/config/core/v3" @@ -17,6 +18,12 @@ import ( "github.com/envoyproxy/gateway/internal/ir" ) +const ( + retryDefaultRetryOn = "connect-failure,refused-stream,unavailable,cancelled,retriable-status-codes" + retryDefaultRetriableStatusCode = 503 + retryDefaultNumRetries = 2 +) + func buildXdsRoute(httpRoute *ir.HTTPRoute) (*routev3.Route, error) { router := &routev3.Route{ Name: httpRoute.Name, @@ -77,6 +84,15 @@ func buildXdsRoute(httpRoute *ir.HTTPRoute) (*routev3.Route, error) { router.GetRoute().Timeout = durationpb.New(httpRoute.Timeout.HTTP.RequestTimeout.Duration) } + // Retries + if router.GetRoute() != nil && httpRoute.Retry != nil { + if rp, err := buildRetryPolicy(httpRoute); err == nil { + router.GetRoute().RetryPolicy = rp + } else { + return nil, err + } + } + // Add per route filter configs to the route, if needed. if err := patchRouteWithPerRouteConfig(router, httpRoute); err != nil { return nil, err @@ -373,3 +389,109 @@ func buildHashPolicy(httpRoute *ir.HTTPRoute) []*routev3.RouteAction_HashPolicy return nil } + +func buildRetryPolicy(route *ir.HTTPRoute) (*routev3.RetryPolicy, error) { + if route.Retry != nil { + rr := route.Retry + rp := &routev3.RetryPolicy{ + RetryOn: retryDefaultRetryOn, + RetriableStatusCodes: []uint32{retryDefaultRetriableStatusCode}, + NumRetries: &wrapperspb.UInt32Value{Value: retryDefaultNumRetries}, + } + + if rr.NumRetries != nil { + rp.NumRetries = &wrapperspb.UInt32Value{Value: *rr.NumRetries} + } + + if rr.RetryOn != nil { + if rr.RetryOn.Triggers != nil && len(rr.RetryOn.Triggers) > 0 { + if ro, err := buildRetryOn(rr.RetryOn.Triggers); err == nil { + rp.RetryOn = ro + } else { + return nil, err + } + } + + if rr.RetryOn.HTTPStatusCodes != nil && len(rr.RetryOn.HTTPStatusCodes) > 0 { + rp.RetriableStatusCodes = buildRetryStatusCodes(rr.RetryOn.HTTPStatusCodes) + } + } + + if rr.PerRetry != nil { + if rr.PerRetry.Timeout != nil { + rp.PerTryTimeout = durationpb.New(rr.PerRetry.Timeout.Duration) + } + + if rr.PerRetry.BackOff != nil { + bbo := false + rbo := &routev3.RetryPolicy_RetryBackOff{} + if rr.PerRetry.BackOff.BaseInterval != nil { + rbo.BaseInterval = durationpb.New(rr.PerRetry.BackOff.BaseInterval.Duration) + bbo = true + } + + if rr.PerRetry.BackOff.MaxInterval != nil { + rbo.MaxInterval = durationpb.New(rr.PerRetry.BackOff.MaxInterval.Duration) + bbo = true + } + + if bbo { + rp.RetryBackOff = rbo + } + } + } + return rp, nil + } + + return nil, nil +} + +func buildRetryStatusCodes(codes []ir.HTTPStatus) []uint32 { + ret := make([]uint32, len(codes)) + for i, c := range codes { + ret[i] = uint32(c) + } + return ret +} + +// buildRetryOn concatenates triggers to a comma-delimited string. +// An error is returned if a trigger is not in the list of supported values (not likely, due to prior validations). +func buildRetryOn(triggers []ir.TriggerEnum) (string, error) { + if len(triggers) == 0 { + return "", nil + } + + lookup := map[ir.TriggerEnum]string{ + ir.Error5XX: "5xx", + ir.GatewayError: "gateway-error", + ir.DisconnectRest: "disconnect-reset", + ir.ConnectFailure: "connect-failure", + ir.Retriable4XX: "retriable-4xx", + ir.RefusedStream: "refused-stream", + ir.RetriableStatusCodes: "retriable-status-codes", + ir.Cancelled: "cancelled", + ir.DeadlineExceeded: "deadline-exceeded", + ir.Internal: "internal", + ir.ResourceExhausted: "resource-exhausted", + ir.Unavailable: "unavailable", + } + + var b strings.Builder + + if t, found := lookup[triggers[0]]; found { + b.WriteString(t) + } else { + return "", errors.New("unsupported RetryOn trigger") + } + + for _, v := range triggers[1:] { + if t, found := lookup[v]; found { + b.WriteString(",") + b.WriteString(t) + } else { + return "", errors.New("unsupported RetryOn trigger") + } + } + + return b.String(), nil +} diff --git a/internal/xds/translator/testdata/in/xds-ir/retry-partial-invalid.yaml b/internal/xds/translator/testdata/in/xds-ir/retry-partial-invalid.yaml new file mode 100644 index 000000000000..794df92c7f61 --- /dev/null +++ b/internal/xds/translator/testdata/in/xds-ir/retry-partial-invalid.yaml @@ -0,0 +1,53 @@ +http: +- name: "first-listener" + address: "0.0.0.0" + port: 10080 + hostnames: + - "*" + path: + mergeSlashes: true + escapedSlashesAction: UnescapeAndRedirect + routes: + - name: "first-route" + hostname: "*" + retry: + numRetries: 5 + retryOn: + httpStatusCodes: + - 429 + - 503 + triggers: + - connect-failure + - retriable-status-codes + perRetry: + timeout: 250ms + backoff: + baseInterval: 100ms + maxInterval: 10s + destination: + name: "first-route-dest" + settings: + - endpoints: + - host: "1.2.3.4" + port: 50000 + - name: "second-route-defaults" + hostname: "foo" + retry: {} + destination: + name: "first-route-dest" + settings: + - endpoints: + - host: "1.2.3.4" + port: 50000 + - name: "third-route-error" + hostname: "bar" + retry: + retryOn: + triggers: + - this-is-not-a-trigger + destination: + name: "first-route-dest" + settings: + - endpoints: + - host: "1.2.3.4" + port: 50000 diff --git a/internal/xds/translator/testdata/out/xds-ir/retry-partial-invalid.clusters.yaml b/internal/xds/translator/testdata/out/xds-ir/retry-partial-invalid.clusters.yaml new file mode 100644 index 000000000000..c8692b816025 --- /dev/null +++ b/internal/xds/translator/testdata/out/xds-ir/retry-partial-invalid.clusters.yaml @@ -0,0 +1,14 @@ +- commonLbConfig: + localityWeightedLbConfig: {} + connectTimeout: 10s + dnsLookupFamily: V4_ONLY + edsClusterConfig: + edsConfig: + ads: {} + resourceApiVersion: V3 + serviceName: first-route-dest + lbPolicy: LEAST_REQUEST + name: first-route-dest + outlierDetection: {} + perConnectionBufferLimitBytes: 32768 + type: EDS diff --git a/internal/xds/translator/testdata/out/xds-ir/retry-partial-invalid.endpoints.yaml b/internal/xds/translator/testdata/out/xds-ir/retry-partial-invalid.endpoints.yaml new file mode 100644 index 000000000000..3b3f2d09076e --- /dev/null +++ b/internal/xds/translator/testdata/out/xds-ir/retry-partial-invalid.endpoints.yaml @@ -0,0 +1,12 @@ +- clusterName: first-route-dest + endpoints: + - lbEndpoints: + - endpoint: + address: + socketAddress: + address: 1.2.3.4 + portValue: 50000 + loadBalancingWeight: 1 + loadBalancingWeight: 1 + locality: + region: first-route-dest/backend/0 diff --git a/internal/xds/translator/testdata/out/xds-ir/retry-partial-invalid.listeners.yaml b/internal/xds/translator/testdata/out/xds-ir/retry-partial-invalid.listeners.yaml new file mode 100644 index 000000000000..046589e0fd89 --- /dev/null +++ b/internal/xds/translator/testdata/out/xds-ir/retry-partial-invalid.listeners.yaml @@ -0,0 +1,35 @@ +- address: + socketAddress: + address: 0.0.0.0 + portValue: 10080 + defaultFilterChain: + filters: + - name: envoy.filters.network.http_connection_manager + typedConfig: + '@type': type.googleapis.com/envoy.extensions.filters.network.http_connection_manager.v3.HttpConnectionManager + commonHttpProtocolOptions: + headersWithUnderscoresAction: REJECT_REQUEST + http2ProtocolOptions: + initialConnectionWindowSize: 1048576 + initialStreamWindowSize: 65536 + maxConcurrentStreams: 100 + httpFilters: + - name: envoy.filters.http.router + typedConfig: + '@type': type.googleapis.com/envoy.extensions.filters.http.router.v3.Router + suppressEnvoyHeaders: true + mergeSlashes: true + normalizePath: true + pathWithEscapedSlashesAction: UNESCAPE_AND_REDIRECT + rds: + configSource: + ads: {} + resourceApiVersion: V3 + routeConfigName: first-listener + serverHeaderTransformation: PASS_THROUGH + statPrefix: http + upgradeConfigs: + - upgradeType: websocket + useRemoteAddress: true + name: first-listener + perConnectionBufferLimitBytes: 32768 diff --git a/internal/xds/translator/testdata/out/xds-ir/retry-partial-invalid.routes.yaml b/internal/xds/translator/testdata/out/xds-ir/retry-partial-invalid.routes.yaml new file mode 100644 index 000000000000..e71f0c41be5c --- /dev/null +++ b/internal/xds/translator/testdata/out/xds-ir/retry-partial-invalid.routes.yaml @@ -0,0 +1,39 @@ +- ignorePortInHostMatching: true + name: first-listener + virtualHosts: + - domains: + - '*' + name: first-listener/* + routes: + - match: + prefix: / + name: first-route + route: + cluster: first-route-dest + retryPolicy: + numRetries: 5 + perTryTimeout: 0.250s + retriableStatusCodes: + - 429 + - 503 + retryBackOff: + baseInterval: 0.100s + maxInterval: 10s + retryOn: connect-failure,retriable-status-codes + - domains: + - foo + name: first-listener/foo + routes: + - match: + prefix: / + name: second-route-defaults + route: + cluster: first-route-dest + retryPolicy: + numRetries: 2 + retriableStatusCodes: + - 503 + retryOn: connect-failure,refused-stream,unavailable,cancelled,retriable-status-codes + - domains: + - bar + name: first-listener/bar diff --git a/internal/xds/translator/translator_test.go b/internal/xds/translator/translator_test.go index 761b63456216..c3a5b609e585 100644 --- a/internal/xds/translator/translator_test.go +++ b/internal/xds/translator/translator_test.go @@ -273,6 +273,9 @@ func TestTranslateXds(t *testing.T) { { name: "client-timeout", }, + { + name: "retry-partial-invalid", + }, } for _, tc := range testCases { diff --git a/site/content/en/latest/api/extension_types.md b/site/content/en/latest/api/extension_types.md index 07cfc0dc8ba1..d86bfa39c097 100644 --- a/site/content/en/latest/api/extension_types.md +++ b/site/content/en/latest/api/extension_types.md @@ -2193,7 +2193,7 @@ _Appears in:_ | Field | Type | Required | Description | | --- | --- | --- | --- | | `triggers` | _[TriggerEnum](#triggerenum) array_ | false | Triggers specifies the retry trigger condition(Http/Grpc). | -| `httpStatusCodes` | _[HTTPStatus](#httpstatus) array_ | false | HttpStatusCodes specifies the http status codes to be retried. | +| `httpStatusCodes` | _[HTTPStatus](#httpstatus) array_ | false | HttpStatusCodes specifies the http status codes to be retried. The retriable-status-codes trigger must also be configured for these status codes to trigger a retry. | #### SecurityPolicy