-
Notifications
You must be signed in to change notification settings - Fork 15
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
add LLMBackendTrafficPolicy #35
Changes from 2 commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -123,3 +123,204 @@ const ( | |
// https://docs.aws.amazon.com/bedrock/latest/APIReference/API_Operations_Amazon_Bedrock_Runtime.html | ||
APISchemaAWSBedrock APISchema = "AWSBedrock" | ||
) | ||
|
||
// +kubebuilder:object:root=true | ||
|
||
// LLMBackendTrafficPolicy controls the flow of traffic to the backend. | ||
type LLMBackendTrafficPolicy struct { | ||
metav1.TypeMeta `json:",inline"` | ||
metav1.ObjectMeta `json:"metadata,omitempty"` | ||
// Spec defines the details of the LLMBackend traffic policy. | ||
Spec LLMBackendTrafficPolicySpec `json:"spec,omitempty"` | ||
} | ||
|
||
// +kubebuilder:object:root=true | ||
|
||
// LLMBackendTrafficPolicyList contains a list of LLMBackendTrafficPolicy | ||
type LLMBackendTrafficPolicyList struct { | ||
metav1.TypeMeta `json:",inline"` | ||
metav1.ListMeta `json:"metadata,omitempty"` | ||
Items []LLMBackendTrafficPolicy `json:"items"` | ||
} | ||
|
||
// LLMBackendTrafficPolicySpec defines the details of llm backend traffic policy | ||
// like rateLimit, timeout etc. | ||
type LLMBackendTrafficPolicySpec struct { | ||
// BackendRefs lists the LLMBackends that this traffic policy will apply | ||
// The namespace is "local", i.e. the same namespace as the LLMRoute. | ||
// | ||
BackendRef LLMBackendLocalRef `json:"backendRef,omitempty"` | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The description states "backendrefs lists the llmbackends" which implies that this variable should be updated to:
Do we want a one (traffic policy) to many (backends) relationship? I think it makes sense to have that in the case where we have very similar models that we want to have the same rules for |
||
// RateLimit defines the rate limit policy. | ||
RateLimit *LLMTrafficPolicyRateLimit `json:"rateLimit,omitempty"` | ||
} | ||
|
||
type LLMTrafficPolicyRateLimit struct { | ||
// Rules defines the rate limit rules. | ||
Rules []LLMTrafficPolicyRateLimitRule `json:"rules,omitempty"` | ||
} | ||
|
||
// LLMTrafficPolicyRateLimitRule defines the details of the rate limit policy. | ||
type LLMTrafficPolicyRateLimitRule struct { | ||
// Headers is a list of request headers to match. Multiple header values are ANDed together, | ||
// meaning, a request MUST match all the specified headers. | ||
// At least one of headers or sourceCIDR condition must be specified. | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. it is not matching by sourceCIDR here, we can also document the canonical header such as |
||
Headers []LLMPolicyRateLimitHeaderMatch `json:"headers,omitempty"` | ||
// Metadata is a list of metadata to match. Multiple metadata values are ANDed together, | ||
Metadata []LLMPolicyRateLimitMetadataMatch `json:"metadata,omitempty"` | ||
// Limits holds the rate limit values. | ||
// This limit is applied for traffic flows when the selectors | ||
// compute to True, causing the request to be counted towards the limit. | ||
// The limit is enforced and the request is ratelimited, i.e. a response with | ||
// 429 HTTP status code is sent back to the client when | ||
// the selected requests have reached the limit. | ||
// | ||
// +kubebuilder:validation:MinItems=1 | ||
Limits []LLMPolicyRateLimitValue `json:"limits"` | ||
} | ||
|
||
type LLMPolicyRateLimitModelNameMatch struct { | ||
// Type specifies how to match against the value of the model name. | ||
// Only "Exact" and "Distinct" are supported. | ||
// +kubebuilder:validation:Enum=Exact;Distinct | ||
Type LLMPolicyRateLimitStringMatchType `json:"type"` | ||
// Value specifies the value of the model name base on the match Type. | ||
// It is ignored if the match Type is "Distinct". | ||
// | ||
// +optional | ||
// +kubebuilder:validation:MaxLength=1024 | ||
Value *string `json:"value"` | ||
} | ||
|
||
// LLMPolicyRateLimitHeaderMatch defines the match attributes within the HTTP Headers of the request. | ||
type LLMPolicyRateLimitHeaderMatch struct { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can we reuse the generic envoy gateway headerMatch type? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. yeah, I would like to reuse EG native type as much as possible |
||
// Type specifies how to match against the value of the header. | ||
Type LLMPolicyRateLimitStringMatchType `json:"type"` | ||
mathetake marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
||
// Name of the HTTP header. | ||
// +kubebuilder:validation:MinLength=1 | ||
// +kubebuilder:validation:MaxLength=256 | ||
Name string `json:"name"` | ||
|
||
// Value within the HTTP header. Due to the | ||
// case-insensitivity of header names, "foo" and "Foo" are considered equivalent. | ||
// Do not set this field when Type="Distinct", implying matching on any/all unique | ||
// values within the header. | ||
// | ||
// +optional | ||
// +kubebuilder:validation:MaxLength=1024 | ||
Value *string `json:"value,omitempty"` | ||
} | ||
|
||
// LLMPolicyRateLimitStringMatchType specifies the semantics of how string values should be compared. | ||
// Valid LLMPolicyRateLimitStringMatchType values are "Exact", "RegularExpression", and "Distinct". | ||
// | ||
// +kubebuilder:validation:Enum=Exact;RegularExpression;Distinct | ||
type LLMPolicyRateLimitStringMatchType string | ||
|
||
// HeaderMatchType constants. | ||
const ( | ||
// HeaderMatchExact matches the exact value of the Value field against the value of | ||
// the specified HTTP Header. | ||
HeaderMatchExact LLMPolicyRateLimitStringMatchType = "Exact" | ||
wengyao04 marked this conversation as resolved.
Show resolved
Hide resolved
|
||
// HeaderMatchRegularExpression matches a regular expression against the value of the | ||
// specified HTTP Header. The regex string must adhere to the syntax documented in | ||
// https://github.com/google/re2/wiki/Syntax. | ||
HeaderMatchRegularExpression LLMPolicyRateLimitStringMatchType = "RegularExpression" | ||
// HeaderMatchDistinct matches any and all possible unique values encountered in the | ||
// specified HTTP Header. Note that each unique value will receive its own rate limit | ||
// bucket. | ||
// Note: This is only supported for Global Rate Limits. | ||
HeaderMatchDistinct LLMPolicyRateLimitStringMatchType = "Distinct" | ||
) | ||
|
||
// LLMPolicyRateLimitMetadataMatch defines the match attributes within the metadata from dynamic or route entry. | ||
// The match will be ignored if the metadata is not present. | ||
type LLMPolicyRateLimitMetadataMatch struct { | ||
// Type specifies the type of metadata to match. | ||
// | ||
// +kubebuilder:default=Dynamic | ||
Type LLMPolicyRateLimitMetadataMatchMetadataType `json:"type"` | ||
// Name specifies the key of the metadata to match. | ||
Name string `json:"name"` | ||
// Paths specifies the value of the metadata to match. | ||
// +optional | ||
// +kubebuilder:validation:MaxItems=32 | ||
Paths []string `json:"paths,omitempty"` | ||
// DefaultValue specifies an optional value to use if “metadata“ is empty. | ||
// Default value is "unknown". | ||
// | ||
// +optional | ||
DefaultValue *string `json:"defaultValue,omitempty"` | ||
} | ||
|
||
// LLMPolicyRateLimitMetadataMatchMetadataType specifies the type of metadata to match. | ||
// | ||
// +kubebuilder:validation:Enum=Dynamic;RouteEntry | ||
type LLMPolicyRateLimitMetadataMatchMetadataType string | ||
|
||
const ( | ||
// MetadataTypeDynamic specifies that the source of metadata is dynamic. | ||
MetadataTypeDynamic LLMPolicyRateLimitMetadataMatchMetadataType = "Dynamic" | ||
) | ||
|
||
// LLMPolicyRateLimitValue defines the limits for rate limiting. | ||
type LLMPolicyRateLimitValue struct { | ||
// Type specifies the type of rate limit. | ||
// | ||
// +kubebuilder:default=Request | ||
Type LLMPolicyRateLimitType `json:"type"` | ||
wengyao04 marked this conversation as resolved.
Show resolved
Hide resolved
|
||
// Quantity specifies the number of requests or tokens allowed in the given interval. | ||
Quantity uint `json:"quantity"` | ||
// Unit specifies the interval for the rate limit. | ||
// | ||
// +kubebuilder:default=Minute | ||
Unit LLMPolicyRateLimitUnit `json:"unit"` | ||
} | ||
|
||
// LLMPolicyRateLimitType specifies the type of rate limit. | ||
// Valid RateLimitType values are "Request" and "Token". | ||
// | ||
// +kubebuilder:validation:Enum=Request;Token | ||
type LLMPolicyRateLimitType string | ||
|
||
const ( | ||
// RateLimitTypeRequest specifies the rate limit to be based on the number of requests. | ||
RateLimitTypeRequest LLMPolicyRateLimitType = "Request" | ||
wengyao04 marked this conversation as resolved.
Show resolved
Hide resolved
|
||
// RateLimitTypeToken specifies the rate limit to be based on the number of tokens. | ||
RateLimitTypeToken LLMPolicyRateLimitType = "Token" | ||
) | ||
|
||
// LLMPolicyRateLimitUnit specifies the intervals for setting rate limits. | ||
// Valid RateLimitUnit values are "Second", "Minute", "Hour", and "Day". | ||
// | ||
// +kubebuilder:validation:Enum=Second;Minute;Hour;Day | ||
type LLMPolicyRateLimitUnit string | ||
|
||
// RateLimitUnit constants. | ||
const ( | ||
// RateLimitUnitSecond specifies the rate limit interval to be 1 second. | ||
RateLimitUnitSecond LLMPolicyRateLimitUnit = "Second" | ||
wengyao04 marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
||
// RateLimitUnitMinute specifies the rate limit interval to be 1 minute. | ||
RateLimitUnitMinute LLMPolicyRateLimitUnit = "Minute" | ||
|
||
// RateLimitUnitHour specifies the rate limit interval to be 1 hour. | ||
RateLimitUnitHour LLMPolicyRateLimitUnit = "Hour" | ||
|
||
// RateLimitUnitDay specifies the rate limit interval to be 1 day. | ||
RateLimitUnitDay LLMPolicyRateLimitUnit = "Day" | ||
) | ||
|
||
// +kubebuilder:validation:XValidation:rule="has(self.group) ? self.group == 'gateway.networking.k8s.io' : true ", message="group must be gateway.networking.k8s.io" | ||
type TargetSelector struct { | ||
wengyao04 marked this conversation as resolved.
Show resolved
Hide resolved
|
||
// Group is the group that this selector targets. Defaults to gateway.networking.k8s.io | ||
// | ||
// +kubebuilder:default:="gateway.networking.k8s.io" | ||
Group *gwapiv1a2.Group `json:"group,omitempty"` | ||
|
||
// Kind is the resource kind that this selector targets. | ||
Kind gwapiv1a2.Kind `json:"kind"` | ||
|
||
// MatchLabels are the set of label selectors for identifying the targeted resource | ||
MatchLabels map[string]string `json:"matchLabels"` | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
could you add a bit more documentation here like for example this is used to setup rate limit etc.