envoyproxy · wengyao04 · Dec 6, 2024 · Dec 6, 2024 · Dec 6, 2024 · Dec 6, 2024
@@ -88,7 +88,7 @@ test-cel: envtest apigen format
 # To build for multiple platforms, set the GOOS_LIST and GOARCH_LIST variables.
 #
 # Example:
-# - `make build.controler GOOS_LIST="linux darwin" GOARCH_LIST="amd64 arm64"`
+# - `make build.controller GOOS_LIST="linux darwin" GOARCH_LIST="amd64 arm64"`
 GOOS_LIST ?= $(shell go env GOOS)
 GOARCH_LIST ?= $(shell go env GOARCH)
 .PHONY: build.%

@@ -123,3 +123,204 @@ const (
 	// https://docs.aws.amazon.com/bedrock/latest/APIReference/API_Operations_Amazon_Bedrock_Runtime.html
 	APISchemaAWSBedrock APISchema = "AWSBedrock"
 )
+
+// +kubebuilder:object:root=true
+
+// LLMBackendTrafficPolicy controls the flow of traffic to the backend.
+type LLMBackendTrafficPolicy struct {
+	metav1.TypeMeta   `json:",inline"`
+	metav1.ObjectMeta `json:"metadata,omitempty"`
+	// Spec defines the details of the LLMBackend traffic policy.
+	Spec LLMBackendTrafficPolicySpec `json:"spec,omitempty"`
+}
+
+// +kubebuilder:object:root=true
+
+// LLMBackendTrafficPolicyList contains a list of LLMBackendTrafficPolicy
+type LLMBackendTrafficPolicyList struct {
+	metav1.TypeMeta `json:",inline"`
+	metav1.ListMeta `json:"metadata,omitempty"`
+	Items           []LLMBackendTrafficPolicy `json:"items"`
+}
+
+// LLMBackendTrafficPolicySpec defines the details of llm backend traffic policy
+// like rateLimit, timeout etc.
+type LLMBackendTrafficPolicySpec struct {
+	// BackendRefs lists the LLMBackends that this traffic policy will apply
+	// The namespace is "local", i.e. the same namespace as the LLMRoute.
+	//
+	BackendRef LLMBackendLocalRef `json:"backendRef,omitempty"`
+	// RateLimit defines the rate limit policy.
+	RateLimit *LLMTrafficPolicyRateLimit `json:"rateLimit,omitempty"`
+}
+
+type LLMTrafficPolicyRateLimit struct {
+	// Rules defines the rate limit rules.
+	Rules []LLMTrafficPolicyRateLimitRule `json:"rules,omitempty"`
+}
+
+// LLMTrafficPolicyRateLimitRule defines the details of the rate limit policy.
+type LLMTrafficPolicyRateLimitRule struct {
+	// Headers is a list of request headers to match. Multiple header values are ANDed together,
+	// meaning, a request MUST match all the specified headers.
+	// At least one of headers or sourceCIDR condition must be specified.
+	Headers []LLMPolicyRateLimitHeaderMatch `json:"headers,omitempty"`
+	// Metadata is a list of metadata to match. Multiple metadata values are ANDed together,
+	Metadata []LLMPolicyRateLimitMetadataMatch `json:"metadata,omitempty"`
+	// Limits holds the rate limit values.
+	// This limit is applied for traffic flows when the selectors
+	// compute to True, causing the request to be counted towards the limit.
+	// The limit is enforced and the request is ratelimited, i.e. a response with
+	// 429 HTTP status code is sent back to the client when
+	// the selected requests have reached the limit.
+	//
+	// +kubebuilder:validation:MinItems=1
+	Limits []LLMPolicyRateLimitValue `json:"limits"`
+}
+
+type LLMPolicyRateLimitModelNameMatch struct {
+	// Type specifies how to match against the value of the model name.
+	// Only "Exact" and "Distinct" are supported.
+	// +kubebuilder:validation:Enum=Exact;Distinct
+	Type LLMPolicyRateLimitStringMatchType `json:"type"`
+	// Value specifies the value of the model name base on the match Type.
+	// It is ignored if the match Type is "Distinct".
+	//
+	// +optional
+	// +kubebuilder:validation:MaxLength=1024
+	Value *string `json:"value"`
+}
+
+// LLMPolicyRateLimitHeaderMatch defines the match attributes within the HTTP Headers of the request.
+type LLMPolicyRateLimitHeaderMatch struct {
+	// Type specifies how to match against the value of the header.
+	Type LLMPolicyRateLimitStringMatchType `json:"type"`
+
+	// Name of the HTTP header.
+	// +kubebuilder:validation:MinLength=1
+	// +kubebuilder:validation:MaxLength=256
+	Name string `json:"name"`
+
+	// Value within the HTTP header. Due to the
+	// case-insensitivity of header names, "foo" and "Foo" are considered equivalent.
+	// Do not set this field when Type="Distinct", implying matching on any/all unique
+	// values within the header.
+	//
+	// +optional
+	// +kubebuilder:validation:MaxLength=1024
+	Value *string `json:"value,omitempty"`
+}
+
+// LLMPolicyRateLimitStringMatchType specifies the semantics of how string values should be compared.
+// Valid LLMPolicyRateLimitStringMatchType values are "Exact", "RegularExpression", and "Distinct".
+//
+// +kubebuilder:validation:Enum=Exact;RegularExpression;Distinct
+type LLMPolicyRateLimitStringMatchType string
+
+// HeaderMatchType constants.
+const (
+	// HeaderMatchExact matches the exact value of the Value field against the value of
+	// the specified HTTP Header.
+	HeaderMatchExact LLMPolicyRateLimitStringMatchType = "Exact"
+	// HeaderMatchRegularExpression matches a regular expression against the value of the
+	// specified HTTP Header. The regex string must adhere to the syntax documented in
+	// https://github.com/google/re2/wiki/Syntax.
+	HeaderMatchRegularExpression LLMPolicyRateLimitStringMatchType = "RegularExpression"
+	// HeaderMatchDistinct matches any and all possible unique values encountered in the
+	// specified HTTP Header. Note that each unique value will receive its own rate limit
+	// bucket.
+	// Note: This is only supported for Global Rate Limits.
+	HeaderMatchDistinct LLMPolicyRateLimitStringMatchType = "Distinct"
+)
+
+// LLMPolicyRateLimitMetadataMatch defines the match attributes within the metadata from dynamic or route entry.
+// The match will be ignored if the metadata is not present.
+type LLMPolicyRateLimitMetadataMatch struct {
+	// Type specifies the type of metadata to match.
+	//
+	// +kubebuilder:default=Dynamic
+	Type LLMPolicyRateLimitMetadataMatchMetadataType `json:"type"`
+	// Name specifies the key of the metadata to match.
+	Name string `json:"name"`
+	// Paths specifies the value of the metadata to match.
+	// +optional
+	// +kubebuilder:validation:MaxItems=32
+	Paths []string `json:"paths,omitempty"`
+	// DefaultValue specifies an optional value to use if “metadata“ is empty.
+	// Default value is "unknown".
+	//
+	// +optional
+	DefaultValue *string `json:"defaultValue,omitempty"`
+}
+
+// LLMPolicyRateLimitMetadataMatchMetadataType specifies the type of metadata to match.
+//
+// +kubebuilder:validation:Enum=Dynamic;RouteEntry
+type LLMPolicyRateLimitMetadataMatchMetadataType string
+
+const (
+	// MetadataTypeDynamic specifies that the source of metadata is dynamic.
+	MetadataTypeDynamic LLMPolicyRateLimitMetadataMatchMetadataType = "Dynamic"
+)
+
+// LLMPolicyRateLimitValue defines the limits for rate limiting.
+type LLMPolicyRateLimitValue struct {
+	// Type specifies the type of rate limit.
+	//
+	// +kubebuilder:default=Request
+	Type LLMPolicyRateLimitType `json:"type"`
+	// Quantity specifies the number of requests or tokens allowed in the given interval.
+	Quantity uint `json:"quantity"`
+	// Unit specifies the interval for the rate limit.
+	//
+	// +kubebuilder:default=Minute
+	Unit LLMPolicyRateLimitUnit `json:"unit"`
+}
+
+// LLMPolicyRateLimitType specifies the type of rate limit.
+// Valid RateLimitType values are "Request" and "Token".
+//
+// +kubebuilder:validation:Enum=Request;Token
+type LLMPolicyRateLimitType string
+
+const (
+	// RateLimitTypeRequest specifies the rate limit to be based on the number of requests.
+	RateLimitTypeRequest LLMPolicyRateLimitType = "Request"
+	// RateLimitTypeToken specifies the rate limit to be based on the number of tokens.
+	RateLimitTypeToken LLMPolicyRateLimitType = "Token"
+)
+
+// LLMPolicyRateLimitUnit specifies the intervals for setting rate limits.
+// Valid RateLimitUnit values are "Second", "Minute", "Hour", and "Day".
+//
+// +kubebuilder:validation:Enum=Second;Minute;Hour;Day
+type LLMPolicyRateLimitUnit string
+
+// RateLimitUnit constants.
+const (
+	// RateLimitUnitSecond specifies the rate limit interval to be 1 second.
+	RateLimitUnitSecond LLMPolicyRateLimitUnit = "Second"
+
+	// RateLimitUnitMinute specifies the rate limit interval to be 1 minute.
+	RateLimitUnitMinute LLMPolicyRateLimitUnit = "Minute"
+
+	// RateLimitUnitHour specifies the rate limit interval to be 1 hour.
+	RateLimitUnitHour LLMPolicyRateLimitUnit = "Hour"
+
+	// RateLimitUnitDay specifies the rate limit interval to be 1 day.
+	RateLimitUnitDay LLMPolicyRateLimitUnit = "Day"
+)
+
+// +kubebuilder:validation:XValidation:rule="has(self.group) ? self.group == 'gateway.networking.k8s.io' : true ", message="group must be gateway.networking.k8s.io"
+type TargetSelector struct {
+	// Group is the group that this selector targets. Defaults to gateway.networking.k8s.io
+	//
+	// +kubebuilder:default:="gateway.networking.k8s.io"
+	Group *gwapiv1a2.Group `json:"group,omitempty"`
+
+	// Kind is the resource kind that this selector targets.
+	Kind gwapiv1a2.Kind `json:"kind"`
+
+	// MatchLabels are the set of label selectors for identifying the targeted resource
+	MatchLabels map[string]string `json:"matchLabels"`
+}