From 3deaf80a89ef3b2d5eacb560bc13ac6c05fe9413 Mon Sep 17 00:00:00 2001 From: yweng14 Date: Fri, 6 Dec 2024 15:17:11 -0500 Subject: [PATCH] add cel test and address comments Signed-off-by: yweng14 --- api/v1alpha1/api.go | 104 ++++-------------- api/v1alpha1/registry.go | 1 + api/v1alpha1/zz_generated.deepcopy.go | 80 -------------- ...voyproxy.io_llmbackendtrafficpolicies.yaml | 48 +------- tests/cel-validation/main_test.go | 37 +++++++ .../llmbackendtrafficpolicies/basic.yaml | 36 ++++++ .../unknown_ratelimit_type.yaml | 20 ++++ .../unknown_ratelimit_unit.yaml | 20 ++++ 8 files changed, 134 insertions(+), 212 deletions(-) create mode 100644 tests/cel-validation/testdata/llmbackendtrafficpolicies/basic.yaml create mode 100644 tests/cel-validation/testdata/llmbackendtrafficpolicies/unknown_ratelimit_type.yaml create mode 100644 tests/cel-validation/testdata/llmbackendtrafficpolicies/unknown_ratelimit_unit.yaml diff --git a/api/v1alpha1/api.go b/api/v1alpha1/api.go index 800df8e0..8c92f2f4 100644 --- a/api/v1alpha1/api.go +++ b/api/v1alpha1/api.go @@ -165,32 +165,10 @@ type LLMTrafficPolicyRateLimitRule struct { // meaning, a request MUST match all the specified headers. // At least one of headers or sourceCIDR condition must be specified. Headers []LLMPolicyRateLimitHeaderMatch `json:"headers,omitempty"` - // Metadata is a list of metadata to match. Multiple metadata values are ANDed together, - Metadata []LLMPolicyRateLimitMetadataMatch `json:"metadata,omitempty"` - // Limits holds the rate limit values. - // This limit is applied for traffic flows when the selectors - // compute to True, causing the request to be counted towards the limit. - // The limit is enforced and the request is ratelimited, i.e. a response with - // 429 HTTP status code is sent back to the client when - // the selected requests have reached the limit. - // // +kubebuilder:validation:MinItems=1 Limits []LLMPolicyRateLimitValue `json:"limits"` } -type LLMPolicyRateLimitModelNameMatch struct { - // Type specifies how to match against the value of the model name. - // Only "Exact" and "Distinct" are supported. - // +kubebuilder:validation:Enum=Exact;Distinct - Type LLMPolicyRateLimitStringMatchType `json:"type"` - // Value specifies the value of the model name base on the match Type. - // It is ignored if the match Type is "Distinct". - // - // +optional - // +kubebuilder:validation:MaxLength=1024 - Value *string `json:"value"` -} - // LLMPolicyRateLimitHeaderMatch defines the match attributes within the HTTP Headers of the request. type LLMPolicyRateLimitHeaderMatch struct { // Type specifies how to match against the value of the header. @@ -219,62 +197,32 @@ type LLMPolicyRateLimitStringMatchType string // HeaderMatchType constants. const ( - // HeaderMatchExact matches the exact value of the Value field against the value of + // LLMPolicyRateLimitStringMatchHeaderMatchExact matches the exact value of the Value field against the value of // the specified HTTP Header. - HeaderMatchExact LLMPolicyRateLimitStringMatchType = "Exact" + LLMPolicyRateLimitStringMatchHeaderMatchExact LLMPolicyRateLimitStringMatchType = "Exact" // HeaderMatchRegularExpression matches a regular expression against the value of the // specified HTTP Header. The regex string must adhere to the syntax documented in // https://github.com/google/re2/wiki/Syntax. HeaderMatchRegularExpression LLMPolicyRateLimitStringMatchType = "RegularExpression" - // HeaderMatchDistinct matches any and all possible unique values encountered in the + // LLMPolicyRateLimitStringMatchHeaderMatchDistinct matches any and all possible unique values encountered in the // specified HTTP Header. Note that each unique value will receive its own rate limit // bucket. // Note: This is only supported for Global Rate Limits. - HeaderMatchDistinct LLMPolicyRateLimitStringMatchType = "Distinct" -) - -// LLMPolicyRateLimitMetadataMatch defines the match attributes within the metadata from dynamic or route entry. -// The match will be ignored if the metadata is not present. -type LLMPolicyRateLimitMetadataMatch struct { - // Type specifies the type of metadata to match. - // - // +kubebuilder:default=Dynamic - Type LLMPolicyRateLimitMetadataMatchMetadataType `json:"type"` - // Name specifies the key of the metadata to match. - Name string `json:"name"` - // Paths specifies the value of the metadata to match. - // +optional - // +kubebuilder:validation:MaxItems=32 - Paths []string `json:"paths,omitempty"` - // DefaultValue specifies an optional value to use if “metadata“ is empty. - // Default value is "unknown". - // - // +optional - DefaultValue *string `json:"defaultValue,omitempty"` -} - -// LLMPolicyRateLimitMetadataMatchMetadataType specifies the type of metadata to match. -// -// +kubebuilder:validation:Enum=Dynamic;RouteEntry -type LLMPolicyRateLimitMetadataMatchMetadataType string - -const ( - // MetadataTypeDynamic specifies that the source of metadata is dynamic. - MetadataTypeDynamic LLMPolicyRateLimitMetadataMatchMetadataType = "Dynamic" + LLMPolicyRateLimitStringMatchHeaderMatchDistinct LLMPolicyRateLimitStringMatchType = "Distinct" ) // LLMPolicyRateLimitValue defines the limits for rate limiting. type LLMPolicyRateLimitValue struct { // Type specifies the type of rate limit. // - // +kubebuilder:default=Request - Type LLMPolicyRateLimitType `json:"type"` + // +kubebuilder:default=Token + Type LLMPolicyRateLimitType `json:"type,omitempty"` // Quantity specifies the number of requests or tokens allowed in the given interval. Quantity uint `json:"quantity"` // Unit specifies the interval for the rate limit. // // +kubebuilder:default=Minute - Unit LLMPolicyRateLimitUnit `json:"unit"` + Unit LLMPolicyRateLimitUnit `json:"unit,omitempty"` } // LLMPolicyRateLimitType specifies the type of rate limit. @@ -284,10 +232,10 @@ type LLMPolicyRateLimitValue struct { type LLMPolicyRateLimitType string const ( - // RateLimitTypeRequest specifies the rate limit to be based on the number of requests. - RateLimitTypeRequest LLMPolicyRateLimitType = "Request" - // RateLimitTypeToken specifies the rate limit to be based on the number of tokens. - RateLimitTypeToken LLMPolicyRateLimitType = "Token" + // LLMPolicyRateLimitTypeRequest specifies the rate limit to be based on the number of requests. + LLMPolicyRateLimitTypeRequest LLMPolicyRateLimitType = "Request" + // LLMPolicyRateLimitTypeToken specifies the rate limit to be based on the number of tokens. + LLMPolicyRateLimitTypeToken LLMPolicyRateLimitType = "Token" ) // LLMPolicyRateLimitUnit specifies the intervals for setting rate limits. @@ -298,29 +246,15 @@ type LLMPolicyRateLimitUnit string // RateLimitUnit constants. const ( - // RateLimitUnitSecond specifies the rate limit interval to be 1 second. - RateLimitUnitSecond LLMPolicyRateLimitUnit = "Second" + // LLMPolicyRateLimitUnitSecond specifies the rate limit interval to be 1 second. + LLMPolicyRateLimitUnitSecond LLMPolicyRateLimitUnit = "Second" - // RateLimitUnitMinute specifies the rate limit interval to be 1 minute. - RateLimitUnitMinute LLMPolicyRateLimitUnit = "Minute" + // LLMPolicyRateLimitUnitMinute specifies the rate limit interval to be 1 minute. + LLMPolicyRateLimitUnitMinute LLMPolicyRateLimitUnit = "Minute" - // RateLimitUnitHour specifies the rate limit interval to be 1 hour. - RateLimitUnitHour LLMPolicyRateLimitUnit = "Hour" + // LLMPolicyRateLimitUnitHour specifies the rate limit interval to be 1 hour. + LLMPolicyRateLimitUnitHour LLMPolicyRateLimitUnit = "Hour" - // RateLimitUnitDay specifies the rate limit interval to be 1 day. - RateLimitUnitDay LLMPolicyRateLimitUnit = "Day" + // LLMPolicyRateLimitUnitDay specifies the rate limit interval to be 1 day. + LLMPolicyRateLimitUnitDay LLMPolicyRateLimitUnit = "Day" ) - -// +kubebuilder:validation:XValidation:rule="has(self.group) ? self.group == 'gateway.networking.k8s.io' : true ", message="group must be gateway.networking.k8s.io" -type TargetSelector struct { - // Group is the group that this selector targets. Defaults to gateway.networking.k8s.io - // - // +kubebuilder:default:="gateway.networking.k8s.io" - Group *gwapiv1a2.Group `json:"group,omitempty"` - - // Kind is the resource kind that this selector targets. - Kind gwapiv1a2.Kind `json:"kind"` - - // MatchLabels are the set of label selectors for identifying the targeted resource - MatchLabels map[string]string `json:"matchLabels"` -} diff --git a/api/v1alpha1/registry.go b/api/v1alpha1/registry.go index 7a64bc29..b32a2c3d 100644 --- a/api/v1alpha1/registry.go +++ b/api/v1alpha1/registry.go @@ -8,6 +8,7 @@ import ( func init() { SchemeBuilder.Register(&LLMRoute{}, &LLMRouteList{}) SchemeBuilder.Register(&LLMBackend{}, &LLMBackendList{}) + SchemeBuilder.Register(&LLMBackendTrafficPolicy{}, &LLMBackendTrafficPolicyList{}) } const GroupName = "aigateway.envoyproxy.io" diff --git a/api/v1alpha1/zz_generated.deepcopy.go b/api/v1alpha1/zz_generated.deepcopy.go index 0e033c28..cd012aaf 100644 --- a/api/v1alpha1/zz_generated.deepcopy.go +++ b/api/v1alpha1/zz_generated.deepcopy.go @@ -6,7 +6,6 @@ package v1alpha1 import ( runtime "k8s.io/apimachinery/pkg/runtime" - v1 "sigs.k8s.io/gateway-api/apis/v1" "sigs.k8s.io/gateway-api/apis/v1alpha2" ) @@ -213,51 +212,6 @@ func (in *LLMPolicyRateLimitHeaderMatch) DeepCopy() *LLMPolicyRateLimitHeaderMat return out } -// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. -func (in *LLMPolicyRateLimitMetadataMatch) DeepCopyInto(out *LLMPolicyRateLimitMetadataMatch) { - *out = *in - if in.Paths != nil { - in, out := &in.Paths, &out.Paths - *out = make([]string, len(*in)) - copy(*out, *in) - } - if in.DefaultValue != nil { - in, out := &in.DefaultValue, &out.DefaultValue - *out = new(string) - **out = **in - } -} - -// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new LLMPolicyRateLimitMetadataMatch. -func (in *LLMPolicyRateLimitMetadataMatch) DeepCopy() *LLMPolicyRateLimitMetadataMatch { - if in == nil { - return nil - } - out := new(LLMPolicyRateLimitMetadataMatch) - in.DeepCopyInto(out) - return out -} - -// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. -func (in *LLMPolicyRateLimitModelNameMatch) DeepCopyInto(out *LLMPolicyRateLimitModelNameMatch) { - *out = *in - if in.Value != nil { - in, out := &in.Value, &out.Value - *out = new(string) - **out = **in - } -} - -// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new LLMPolicyRateLimitModelNameMatch. -func (in *LLMPolicyRateLimitModelNameMatch) DeepCopy() *LLMPolicyRateLimitModelNameMatch { - if in == nil { - return nil - } - out := new(LLMPolicyRateLimitModelNameMatch) - in.DeepCopyInto(out) - return out -} - // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *LLMPolicyRateLimitValue) DeepCopyInto(out *LLMPolicyRateLimitValue) { *out = *in @@ -391,13 +345,6 @@ func (in *LLMTrafficPolicyRateLimitRule) DeepCopyInto(out *LLMTrafficPolicyRateL (*in)[i].DeepCopyInto(&(*out)[i]) } } - if in.Metadata != nil { - in, out := &in.Metadata, &out.Metadata - *out = make([]LLMPolicyRateLimitMetadataMatch, len(*in)) - for i := range *in { - (*in)[i].DeepCopyInto(&(*out)[i]) - } - } if in.Limits != nil { in, out := &in.Limits, &out.Limits *out = make([]LLMPolicyRateLimitValue, len(*in)) @@ -414,30 +361,3 @@ func (in *LLMTrafficPolicyRateLimitRule) DeepCopy() *LLMTrafficPolicyRateLimitRu in.DeepCopyInto(out) return out } - -// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. -func (in *TargetSelector) DeepCopyInto(out *TargetSelector) { - *out = *in - if in.Group != nil { - in, out := &in.Group, &out.Group - *out = new(v1.Group) - **out = **in - } - if in.MatchLabels != nil { - in, out := &in.MatchLabels, &out.MatchLabels - *out = make(map[string]string, len(*in)) - for key, val := range *in { - (*out)[key] = val - } - } -} - -// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new TargetSelector. -func (in *TargetSelector) DeepCopy() *TargetSelector { - if in == nil { - return nil - } - out := new(TargetSelector) - in.DeepCopyInto(out) - return out -} diff --git a/manifests/charts/ai-gateway-helm/crds/aigateway.envoyproxy.io_llmbackendtrafficpolicies.yaml b/manifests/charts/ai-gateway-helm/crds/aigateway.envoyproxy.io_llmbackendtrafficpolicies.yaml index 97727c1f..1a85fbcb 100644 --- a/manifests/charts/ai-gateway-helm/crds/aigateway.envoyproxy.io_llmbackendtrafficpolicies.yaml +++ b/manifests/charts/ai-gateway-helm/crds/aigateway.envoyproxy.io_llmbackendtrafficpolicies.yaml @@ -96,13 +96,6 @@ spec: type: object type: array limits: - description: |- - Limits holds the rate limit values. - This limit is applied for traffic flows when the selectors - compute to True, causing the request to be counted towards the limit. - The limit is enforced and the request is ratelimited, i.e. a response with - 429 HTTP status code is sent back to the client when - the selected requests have reached the limit. items: description: LLMPolicyRateLimitValue defines the limits for rate limiting. @@ -112,7 +105,7 @@ spec: or tokens allowed in the given interval. type: integer type: - default: Request + default: Token description: Type specifies the type of rate limit. enum: - Request @@ -130,48 +123,9 @@ spec: type: string required: - quantity - - type - - unit type: object minItems: 1 type: array - metadata: - description: Metadata is a list of metadata to match. Multiple - metadata values are ANDed together, - items: - description: |- - LLMPolicyRateLimitMetadataMatch defines the match attributes within the metadata from dynamic or route entry. - The match will be ignored if the metadata is not present. - properties: - defaultValue: - description: |- - DefaultValue specifies an optional value to use if “metadata“ is empty. - Default value is "unknown". - type: string - name: - description: Name specifies the key of the metadata - to match. - type: string - paths: - description: Paths specifies the value of the metadata - to match. - items: - type: string - maxItems: 32 - type: array - type: - default: Dynamic - description: Type specifies the type of metadata to - match. - enum: - - Dynamic - - RouteEntry - type: string - required: - - name - - type - type: object - type: array required: - limits type: object diff --git a/tests/cel-validation/main_test.go b/tests/cel-validation/main_test.go index d6e6e6c6..dbe7a988 100644 --- a/tests/cel-validation/main_test.go +++ b/tests/cel-validation/main_test.go @@ -37,6 +37,7 @@ func runTest(m *testing.M) int { for _, crd := range []string{ "aigateway.envoyproxy.io_llmroutes.yaml", "aigateway.envoyproxy.io_llmbackends.yaml", + "aigateway.envoyproxy.io_llmbackendtrafficpolicies.yaml", } { crds = append(crds, filepath.Join(base, crd)) } @@ -133,3 +134,39 @@ func TestLLMBackends(t *testing.T) { }) } } + +func TestLLMBackendTrafficPolicy(t *testing.T) { + ctx, cancel := context.WithDeadline(context.Background(), time.Now().Add(30*time.Second)) + defer cancel() + + for _, tc := range []struct { + name string + expErr string + }{ + {name: "basic.yaml"}, + { + name: "unknown_ratelimit_type.yaml", + expErr: "spec.rateLimit.rules[0].limits[0].type: Unsupported value: \"Foo\": supported values: \"Request\", \"Token\"", + }, + { + name: "unknown_ratelimit_unit.yaml", + expErr: "spec.rateLimit.rules[0].limits[0].unit: Unsupported value: \"Foo\": supported values: \"Second\", \"Minute\", \"Hour\", \"Day\"", + }, + } { + t.Run(tc.name, func(t *testing.T) { + data, err := tests.ReadFile(path.Join("testdata/llmbackendtrafficpolicies", tc.name)) + require.NoError(t, err) + + llmBackendTrafficPolicy := &aigv1a1.LLMBackendTrafficPolicy{} + err = yaml.UnmarshalStrict(data, llmBackendTrafficPolicy) + require.NoError(t, err) + + if tc.expErr != "" { + require.ErrorContains(t, c.Create(ctx, llmBackendTrafficPolicy), tc.expErr) + } else { + require.NoError(t, c.Create(ctx, llmBackendTrafficPolicy)) + require.NoError(t, c.Delete(ctx, llmBackendTrafficPolicy)) + } + }) + } +} diff --git a/tests/cel-validation/testdata/llmbackendtrafficpolicies/basic.yaml b/tests/cel-validation/testdata/llmbackendtrafficpolicies/basic.yaml new file mode 100644 index 00000000..40a6af25 --- /dev/null +++ b/tests/cel-validation/testdata/llmbackendtrafficpolicies/basic.yaml @@ -0,0 +1,36 @@ +apiVersion: aigateway.envoyproxy.io/v1alpha1 +kind: LLMBackendTrafficPolicy +metadata: + name: dog-backend-traffic-policy + namespace: default +spec: + backendRef: + name: dog + rateLimit: + rules: + - headers: + - name: x-ai-gateway-llm-model-name + type: Exact + value: gpt-4o-mini + - name: x-user-id + type: Distinct + limits: + - type: Request + quantity: 10 + unit: Minute + - type: Token + quantity: 500 + unit: Minute + - headers: + - name: x-ai-gateway-llm-model-name + type: Exact + value: llama-3-8b + limits: + - quantity: 500 + unit: Hour + - headers: + - name: x-ai-gateway-llm-model-name + type: Exact + value: llama-3-70b + limits: + - quantity: 500 diff --git a/tests/cel-validation/testdata/llmbackendtrafficpolicies/unknown_ratelimit_type.yaml b/tests/cel-validation/testdata/llmbackendtrafficpolicies/unknown_ratelimit_type.yaml new file mode 100644 index 00000000..fd9cad2c --- /dev/null +++ b/tests/cel-validation/testdata/llmbackendtrafficpolicies/unknown_ratelimit_type.yaml @@ -0,0 +1,20 @@ +apiVersion: aigateway.envoyproxy.io/v1alpha1 +kind: LLMBackendTrafficPolicy +metadata: + name: dog-backend-traffic-policy + namespace: default +spec: + backendRef: + name: dog + rateLimit: + rules: + - headers: + - name: x-ai-gateway-llm-model-name + type: Exact + value: gpt-4o-mini + - name: x-user-id + type: Distinct + limits: + - type: Foo + quantity: 10 + unit: Minute diff --git a/tests/cel-validation/testdata/llmbackendtrafficpolicies/unknown_ratelimit_unit.yaml b/tests/cel-validation/testdata/llmbackendtrafficpolicies/unknown_ratelimit_unit.yaml new file mode 100644 index 00000000..044e9bf5 --- /dev/null +++ b/tests/cel-validation/testdata/llmbackendtrafficpolicies/unknown_ratelimit_unit.yaml @@ -0,0 +1,20 @@ +apiVersion: aigateway.envoyproxy.io/v1alpha1 +kind: LLMBackendTrafficPolicy +metadata: + name: dog-backend-traffic-policy + namespace: default +spec: + backendRef: + name: dog + rateLimit: + rules: + - headers: + - name: x-ai-gateway-llm-model-name + type: Exact + value: gpt-4o-mini + - name: x-user-id + type: Distinct + limits: + - type: Token + quantity: 10 + unit: Foo