diff --git a/Makefile b/Makefile index 53aebec9..dfc51e7c 100644 --- a/Makefile +++ b/Makefile @@ -88,7 +88,7 @@ test-cel: envtest apigen format # To build for multiple platforms, set the GOOS_LIST and GOARCH_LIST variables. # # Example: -# - `make build.controler GOOS_LIST="linux darwin" GOARCH_LIST="amd64 arm64"` +# - `make build.controller GOOS_LIST="linux darwin" GOARCH_LIST="amd64 arm64"` GOOS_LIST ?= $(shell go env GOOS) GOARCH_LIST ?= $(shell go env GOARCH) .PHONY: build.% diff --git a/api/v1alpha1/api.go b/api/v1alpha1/api.go index 0d5aa04f..8c92f2f4 100644 --- a/api/v1alpha1/api.go +++ b/api/v1alpha1/api.go @@ -123,3 +123,138 @@ const ( // https://docs.aws.amazon.com/bedrock/latest/APIReference/API_Operations_Amazon_Bedrock_Runtime.html APISchemaAWSBedrock APISchema = "AWSBedrock" ) + +// +kubebuilder:object:root=true + +// LLMBackendTrafficPolicy controls the flow of traffic to the backend. +type LLMBackendTrafficPolicy struct { + metav1.TypeMeta `json:",inline"` + metav1.ObjectMeta `json:"metadata,omitempty"` + // Spec defines the details of the LLMBackend traffic policy. + Spec LLMBackendTrafficPolicySpec `json:"spec,omitempty"` +} + +// +kubebuilder:object:root=true + +// LLMBackendTrafficPolicyList contains a list of LLMBackendTrafficPolicy +type LLMBackendTrafficPolicyList struct { + metav1.TypeMeta `json:",inline"` + metav1.ListMeta `json:"metadata,omitempty"` + Items []LLMBackendTrafficPolicy `json:"items"` +} + +// LLMBackendTrafficPolicySpec defines the details of llm backend traffic policy +// like rateLimit, timeout etc. +type LLMBackendTrafficPolicySpec struct { + // BackendRefs lists the LLMBackends that this traffic policy will apply + // The namespace is "local", i.e. the same namespace as the LLMRoute. + // + BackendRef LLMBackendLocalRef `json:"backendRef,omitempty"` + // RateLimit defines the rate limit policy. + RateLimit *LLMTrafficPolicyRateLimit `json:"rateLimit,omitempty"` +} + +type LLMTrafficPolicyRateLimit struct { + // Rules defines the rate limit rules. + Rules []LLMTrafficPolicyRateLimitRule `json:"rules,omitempty"` +} + +// LLMTrafficPolicyRateLimitRule defines the details of the rate limit policy. +type LLMTrafficPolicyRateLimitRule struct { + // Headers is a list of request headers to match. Multiple header values are ANDed together, + // meaning, a request MUST match all the specified headers. + // At least one of headers or sourceCIDR condition must be specified. + Headers []LLMPolicyRateLimitHeaderMatch `json:"headers,omitempty"` + // +kubebuilder:validation:MinItems=1 + Limits []LLMPolicyRateLimitValue `json:"limits"` +} + +// LLMPolicyRateLimitHeaderMatch defines the match attributes within the HTTP Headers of the request. +type LLMPolicyRateLimitHeaderMatch struct { + // Type specifies how to match against the value of the header. + Type LLMPolicyRateLimitStringMatchType `json:"type"` + + // Name of the HTTP header. + // +kubebuilder:validation:MinLength=1 + // +kubebuilder:validation:MaxLength=256 + Name string `json:"name"` + + // Value within the HTTP header. Due to the + // case-insensitivity of header names, "foo" and "Foo" are considered equivalent. + // Do not set this field when Type="Distinct", implying matching on any/all unique + // values within the header. + // + // +optional + // +kubebuilder:validation:MaxLength=1024 + Value *string `json:"value,omitempty"` +} + +// LLMPolicyRateLimitStringMatchType specifies the semantics of how string values should be compared. +// Valid LLMPolicyRateLimitStringMatchType values are "Exact", "RegularExpression", and "Distinct". +// +// +kubebuilder:validation:Enum=Exact;RegularExpression;Distinct +type LLMPolicyRateLimitStringMatchType string + +// HeaderMatchType constants. +const ( + // LLMPolicyRateLimitStringMatchHeaderMatchExact matches the exact value of the Value field against the value of + // the specified HTTP Header. + LLMPolicyRateLimitStringMatchHeaderMatchExact LLMPolicyRateLimitStringMatchType = "Exact" + // HeaderMatchRegularExpression matches a regular expression against the value of the + // specified HTTP Header. The regex string must adhere to the syntax documented in + // https://github.com/google/re2/wiki/Syntax. + HeaderMatchRegularExpression LLMPolicyRateLimitStringMatchType = "RegularExpression" + // LLMPolicyRateLimitStringMatchHeaderMatchDistinct matches any and all possible unique values encountered in the + // specified HTTP Header. Note that each unique value will receive its own rate limit + // bucket. + // Note: This is only supported for Global Rate Limits. + LLMPolicyRateLimitStringMatchHeaderMatchDistinct LLMPolicyRateLimitStringMatchType = "Distinct" +) + +// LLMPolicyRateLimitValue defines the limits for rate limiting. +type LLMPolicyRateLimitValue struct { + // Type specifies the type of rate limit. + // + // +kubebuilder:default=Token + Type LLMPolicyRateLimitType `json:"type,omitempty"` + // Quantity specifies the number of requests or tokens allowed in the given interval. + Quantity uint `json:"quantity"` + // Unit specifies the interval for the rate limit. + // + // +kubebuilder:default=Minute + Unit LLMPolicyRateLimitUnit `json:"unit,omitempty"` +} + +// LLMPolicyRateLimitType specifies the type of rate limit. +// Valid RateLimitType values are "Request" and "Token". +// +// +kubebuilder:validation:Enum=Request;Token +type LLMPolicyRateLimitType string + +const ( + // LLMPolicyRateLimitTypeRequest specifies the rate limit to be based on the number of requests. + LLMPolicyRateLimitTypeRequest LLMPolicyRateLimitType = "Request" + // LLMPolicyRateLimitTypeToken specifies the rate limit to be based on the number of tokens. + LLMPolicyRateLimitTypeToken LLMPolicyRateLimitType = "Token" +) + +// LLMPolicyRateLimitUnit specifies the intervals for setting rate limits. +// Valid RateLimitUnit values are "Second", "Minute", "Hour", and "Day". +// +// +kubebuilder:validation:Enum=Second;Minute;Hour;Day +type LLMPolicyRateLimitUnit string + +// RateLimitUnit constants. +const ( + // LLMPolicyRateLimitUnitSecond specifies the rate limit interval to be 1 second. + LLMPolicyRateLimitUnitSecond LLMPolicyRateLimitUnit = "Second" + + // LLMPolicyRateLimitUnitMinute specifies the rate limit interval to be 1 minute. + LLMPolicyRateLimitUnitMinute LLMPolicyRateLimitUnit = "Minute" + + // LLMPolicyRateLimitUnitHour specifies the rate limit interval to be 1 hour. + LLMPolicyRateLimitUnitHour LLMPolicyRateLimitUnit = "Hour" + + // LLMPolicyRateLimitUnitDay specifies the rate limit interval to be 1 day. + LLMPolicyRateLimitUnitDay LLMPolicyRateLimitUnit = "Day" +) diff --git a/api/v1alpha1/registry.go b/api/v1alpha1/registry.go index 7a64bc29..b32a2c3d 100644 --- a/api/v1alpha1/registry.go +++ b/api/v1alpha1/registry.go @@ -8,6 +8,7 @@ import ( func init() { SchemeBuilder.Register(&LLMRoute{}, &LLMRouteList{}) SchemeBuilder.Register(&LLMBackend{}, &LLMBackendList{}) + SchemeBuilder.Register(&LLMBackendTrafficPolicy{}, &LLMBackendTrafficPolicyList{}) } const GroupName = "aigateway.envoyproxy.io" diff --git a/api/v1alpha1/zz_generated.deepcopy.go b/api/v1alpha1/zz_generated.deepcopy.go index b95cd193..cd012aaf 100644 --- a/api/v1alpha1/zz_generated.deepcopy.go +++ b/api/v1alpha1/zz_generated.deepcopy.go @@ -113,6 +113,120 @@ func (in *LLMBackendSpec) DeepCopy() *LLMBackendSpec { return out } +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *LLMBackendTrafficPolicy) DeepCopyInto(out *LLMBackendTrafficPolicy) { + *out = *in + out.TypeMeta = in.TypeMeta + in.ObjectMeta.DeepCopyInto(&out.ObjectMeta) + in.Spec.DeepCopyInto(&out.Spec) +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new LLMBackendTrafficPolicy. +func (in *LLMBackendTrafficPolicy) DeepCopy() *LLMBackendTrafficPolicy { + if in == nil { + return nil + } + out := new(LLMBackendTrafficPolicy) + in.DeepCopyInto(out) + return out +} + +// DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object. +func (in *LLMBackendTrafficPolicy) DeepCopyObject() runtime.Object { + if c := in.DeepCopy(); c != nil { + return c + } + return nil +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *LLMBackendTrafficPolicyList) DeepCopyInto(out *LLMBackendTrafficPolicyList) { + *out = *in + out.TypeMeta = in.TypeMeta + in.ListMeta.DeepCopyInto(&out.ListMeta) + if in.Items != nil { + in, out := &in.Items, &out.Items + *out = make([]LLMBackendTrafficPolicy, len(*in)) + for i := range *in { + (*in)[i].DeepCopyInto(&(*out)[i]) + } + } +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new LLMBackendTrafficPolicyList. +func (in *LLMBackendTrafficPolicyList) DeepCopy() *LLMBackendTrafficPolicyList { + if in == nil { + return nil + } + out := new(LLMBackendTrafficPolicyList) + in.DeepCopyInto(out) + return out +} + +// DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object. +func (in *LLMBackendTrafficPolicyList) DeepCopyObject() runtime.Object { + if c := in.DeepCopy(); c != nil { + return c + } + return nil +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *LLMBackendTrafficPolicySpec) DeepCopyInto(out *LLMBackendTrafficPolicySpec) { + *out = *in + out.BackendRef = in.BackendRef + if in.RateLimit != nil { + in, out := &in.RateLimit, &out.RateLimit + *out = new(LLMTrafficPolicyRateLimit) + (*in).DeepCopyInto(*out) + } +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new LLMBackendTrafficPolicySpec. +func (in *LLMBackendTrafficPolicySpec) DeepCopy() *LLMBackendTrafficPolicySpec { + if in == nil { + return nil + } + out := new(LLMBackendTrafficPolicySpec) + in.DeepCopyInto(out) + return out +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *LLMPolicyRateLimitHeaderMatch) DeepCopyInto(out *LLMPolicyRateLimitHeaderMatch) { + *out = *in + if in.Value != nil { + in, out := &in.Value, &out.Value + *out = new(string) + **out = **in + } +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new LLMPolicyRateLimitHeaderMatch. +func (in *LLMPolicyRateLimitHeaderMatch) DeepCopy() *LLMPolicyRateLimitHeaderMatch { + if in == nil { + return nil + } + out := new(LLMPolicyRateLimitHeaderMatch) + in.DeepCopyInto(out) + return out +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *LLMPolicyRateLimitValue) DeepCopyInto(out *LLMPolicyRateLimitValue) { + *out = *in +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new LLMPolicyRateLimitValue. +func (in *LLMPolicyRateLimitValue) DeepCopy() *LLMPolicyRateLimitValue { + if in == nil { + return nil + } + out := new(LLMPolicyRateLimitValue) + in.DeepCopyInto(out) + return out +} + // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *LLMRoute) DeepCopyInto(out *LLMRoute) { *out = *in @@ -198,3 +312,52 @@ func (in *LLMRouteSpec) DeepCopy() *LLMRouteSpec { in.DeepCopyInto(out) return out } + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *LLMTrafficPolicyRateLimit) DeepCopyInto(out *LLMTrafficPolicyRateLimit) { + *out = *in + if in.Rules != nil { + in, out := &in.Rules, &out.Rules + *out = make([]LLMTrafficPolicyRateLimitRule, len(*in)) + for i := range *in { + (*in)[i].DeepCopyInto(&(*out)[i]) + } + } +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new LLMTrafficPolicyRateLimit. +func (in *LLMTrafficPolicyRateLimit) DeepCopy() *LLMTrafficPolicyRateLimit { + if in == nil { + return nil + } + out := new(LLMTrafficPolicyRateLimit) + in.DeepCopyInto(out) + return out +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *LLMTrafficPolicyRateLimitRule) DeepCopyInto(out *LLMTrafficPolicyRateLimitRule) { + *out = *in + if in.Headers != nil { + in, out := &in.Headers, &out.Headers + *out = make([]LLMPolicyRateLimitHeaderMatch, len(*in)) + for i := range *in { + (*in)[i].DeepCopyInto(&(*out)[i]) + } + } + if in.Limits != nil { + in, out := &in.Limits, &out.Limits + *out = make([]LLMPolicyRateLimitValue, len(*in)) + copy(*out, *in) + } +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new LLMTrafficPolicyRateLimitRule. +func (in *LLMTrafficPolicyRateLimitRule) DeepCopy() *LLMTrafficPolicyRateLimitRule { + if in == nil { + return nil + } + out := new(LLMTrafficPolicyRateLimitRule) + in.DeepCopyInto(out) + return out +} diff --git a/manifests/charts/ai-gateway-helm/crds/aigateway.envoyproxy.io_llmbackendtrafficpolicies.yaml b/manifests/charts/ai-gateway-helm/crds/aigateway.envoyproxy.io_llmbackendtrafficpolicies.yaml new file mode 100644 index 00000000..1a85fbcb --- /dev/null +++ b/manifests/charts/ai-gateway-helm/crds/aigateway.envoyproxy.io_llmbackendtrafficpolicies.yaml @@ -0,0 +1,137 @@ +--- +apiVersion: apiextensions.k8s.io/v1 +kind: CustomResourceDefinition +metadata: + annotations: + controller-gen.kubebuilder.io/version: v0.16.2 + name: llmbackendtrafficpolicies.aigateway.envoyproxy.io +spec: + group: aigateway.envoyproxy.io + names: + kind: LLMBackendTrafficPolicy + listKind: LLMBackendTrafficPolicyList + plural: llmbackendtrafficpolicies + singular: llmbackendtrafficpolicy + scope: Namespaced + versions: + - name: v1alpha1 + schema: + openAPIV3Schema: + description: LLMBackendTrafficPolicy controls the flow of traffic to the backend. + properties: + apiVersion: + description: |- + APIVersion defines the versioned schema of this representation of an object. + Servers should convert recognized schemas to the latest internal value, and + may reject unrecognized values. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources + type: string + kind: + description: |- + Kind is a string value representing the REST resource this object represents. + Servers may infer this from the endpoint the client submits requests to. + Cannot be updated. + In CamelCase. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds + type: string + metadata: + type: object + spec: + description: Spec defines the details of the LLMBackend traffic policy. + properties: + backendRef: + description: |- + BackendRefs lists the LLMBackends that this traffic policy will apply + The namespace is "local", i.e. the same namespace as the LLMRoute. + properties: + name: + description: Name is the name of the LLMBackend in the same namespace + as the LLMRoute. + type: string + required: + - name + type: object + rateLimit: + description: RateLimit defines the rate limit policy. + properties: + rules: + description: Rules defines the rate limit rules. + items: + description: LLMTrafficPolicyRateLimitRule defines the details + of the rate limit policy. + properties: + headers: + description: |- + Headers is a list of request headers to match. Multiple header values are ANDed together, + meaning, a request MUST match all the specified headers. + At least one of headers or sourceCIDR condition must be specified. + items: + description: LLMPolicyRateLimitHeaderMatch defines the + match attributes within the HTTP Headers of the request. + properties: + name: + description: Name of the HTTP header. + maxLength: 256 + minLength: 1 + type: string + type: + description: Type specifies how to match against the + value of the header. + enum: + - Exact + - RegularExpression + - Distinct + type: string + value: + description: |- + Value within the HTTP header. Due to the + case-insensitivity of header names, "foo" and "Foo" are considered equivalent. + Do not set this field when Type="Distinct", implying matching on any/all unique + values within the header. + maxLength: 1024 + type: string + required: + - name + - type + type: object + type: array + limits: + items: + description: LLMPolicyRateLimitValue defines the limits + for rate limiting. + properties: + quantity: + description: Quantity specifies the number of requests + or tokens allowed in the given interval. + type: integer + type: + default: Token + description: Type specifies the type of rate limit. + enum: + - Request + - Token + type: string + unit: + default: Minute + description: Unit specifies the interval for the rate + limit. + enum: + - Second + - Minute + - Hour + - Day + type: string + required: + - quantity + type: object + minItems: 1 + type: array + required: + - limits + type: object + type: array + type: object + type: object + type: object + served: true + storage: true diff --git a/tests/cel-validation/main_test.go b/tests/cel-validation/main_test.go index d6e6e6c6..dbe7a988 100644 --- a/tests/cel-validation/main_test.go +++ b/tests/cel-validation/main_test.go @@ -37,6 +37,7 @@ func runTest(m *testing.M) int { for _, crd := range []string{ "aigateway.envoyproxy.io_llmroutes.yaml", "aigateway.envoyproxy.io_llmbackends.yaml", + "aigateway.envoyproxy.io_llmbackendtrafficpolicies.yaml", } { crds = append(crds, filepath.Join(base, crd)) } @@ -133,3 +134,39 @@ func TestLLMBackends(t *testing.T) { }) } } + +func TestLLMBackendTrafficPolicy(t *testing.T) { + ctx, cancel := context.WithDeadline(context.Background(), time.Now().Add(30*time.Second)) + defer cancel() + + for _, tc := range []struct { + name string + expErr string + }{ + {name: "basic.yaml"}, + { + name: "unknown_ratelimit_type.yaml", + expErr: "spec.rateLimit.rules[0].limits[0].type: Unsupported value: \"Foo\": supported values: \"Request\", \"Token\"", + }, + { + name: "unknown_ratelimit_unit.yaml", + expErr: "spec.rateLimit.rules[0].limits[0].unit: Unsupported value: \"Foo\": supported values: \"Second\", \"Minute\", \"Hour\", \"Day\"", + }, + } { + t.Run(tc.name, func(t *testing.T) { + data, err := tests.ReadFile(path.Join("testdata/llmbackendtrafficpolicies", tc.name)) + require.NoError(t, err) + + llmBackendTrafficPolicy := &aigv1a1.LLMBackendTrafficPolicy{} + err = yaml.UnmarshalStrict(data, llmBackendTrafficPolicy) + require.NoError(t, err) + + if tc.expErr != "" { + require.ErrorContains(t, c.Create(ctx, llmBackendTrafficPolicy), tc.expErr) + } else { + require.NoError(t, c.Create(ctx, llmBackendTrafficPolicy)) + require.NoError(t, c.Delete(ctx, llmBackendTrafficPolicy)) + } + }) + } +} diff --git a/tests/cel-validation/testdata/llmbackendtrafficpolicies/basic.yaml b/tests/cel-validation/testdata/llmbackendtrafficpolicies/basic.yaml new file mode 100644 index 00000000..40a6af25 --- /dev/null +++ b/tests/cel-validation/testdata/llmbackendtrafficpolicies/basic.yaml @@ -0,0 +1,36 @@ +apiVersion: aigateway.envoyproxy.io/v1alpha1 +kind: LLMBackendTrafficPolicy +metadata: + name: dog-backend-traffic-policy + namespace: default +spec: + backendRef: + name: dog + rateLimit: + rules: + - headers: + - name: x-ai-gateway-llm-model-name + type: Exact + value: gpt-4o-mini + - name: x-user-id + type: Distinct + limits: + - type: Request + quantity: 10 + unit: Minute + - type: Token + quantity: 500 + unit: Minute + - headers: + - name: x-ai-gateway-llm-model-name + type: Exact + value: llama-3-8b + limits: + - quantity: 500 + unit: Hour + - headers: + - name: x-ai-gateway-llm-model-name + type: Exact + value: llama-3-70b + limits: + - quantity: 500 diff --git a/tests/cel-validation/testdata/llmbackendtrafficpolicies/unknown_ratelimit_type.yaml b/tests/cel-validation/testdata/llmbackendtrafficpolicies/unknown_ratelimit_type.yaml new file mode 100644 index 00000000..fd9cad2c --- /dev/null +++ b/tests/cel-validation/testdata/llmbackendtrafficpolicies/unknown_ratelimit_type.yaml @@ -0,0 +1,20 @@ +apiVersion: aigateway.envoyproxy.io/v1alpha1 +kind: LLMBackendTrafficPolicy +metadata: + name: dog-backend-traffic-policy + namespace: default +spec: + backendRef: + name: dog + rateLimit: + rules: + - headers: + - name: x-ai-gateway-llm-model-name + type: Exact + value: gpt-4o-mini + - name: x-user-id + type: Distinct + limits: + - type: Foo + quantity: 10 + unit: Minute diff --git a/tests/cel-validation/testdata/llmbackendtrafficpolicies/unknown_ratelimit_unit.yaml b/tests/cel-validation/testdata/llmbackendtrafficpolicies/unknown_ratelimit_unit.yaml new file mode 100644 index 00000000..044e9bf5 --- /dev/null +++ b/tests/cel-validation/testdata/llmbackendtrafficpolicies/unknown_ratelimit_unit.yaml @@ -0,0 +1,20 @@ +apiVersion: aigateway.envoyproxy.io/v1alpha1 +kind: LLMBackendTrafficPolicy +metadata: + name: dog-backend-traffic-policy + namespace: default +spec: + backendRef: + name: dog + rateLimit: + rules: + - headers: + - name: x-ai-gateway-llm-model-name + type: Exact + value: gpt-4o-mini + - name: x-user-id + type: Distinct + limits: + - type: Token + quantity: 10 + unit: Foo