From 3deaf80a89ef3b2d5eacb560bc13ac6c05fe9413 Mon Sep 17 00:00:00 2001
From: yweng14 <yweng14@bloomberg.net>
Date: Fri, 6 Dec 2024 15:17:11 -0500
Subject: [PATCH] add cel test and address comments

Signed-off-by: yweng14 <yweng14@bloomberg.net>
---
 api/v1alpha1/api.go                           | 104 ++++--------------
 api/v1alpha1/registry.go                      |   1 +
 api/v1alpha1/zz_generated.deepcopy.go         |  80 --------------
 ...voyproxy.io_llmbackendtrafficpolicies.yaml |  48 +-------
 tests/cel-validation/main_test.go             |  37 +++++++
 .../llmbackendtrafficpolicies/basic.yaml      |  36 ++++++
 .../unknown_ratelimit_type.yaml               |  20 ++++
 .../unknown_ratelimit_unit.yaml               |  20 ++++
 8 files changed, 134 insertions(+), 212 deletions(-)
 create mode 100644 tests/cel-validation/testdata/llmbackendtrafficpolicies/basic.yaml
 create mode 100644 tests/cel-validation/testdata/llmbackendtrafficpolicies/unknown_ratelimit_type.yaml
 create mode 100644 tests/cel-validation/testdata/llmbackendtrafficpolicies/unknown_ratelimit_unit.yaml

diff --git a/api/v1alpha1/api.go b/api/v1alpha1/api.go
index 800df8e0..8c92f2f4 100644
--- a/api/v1alpha1/api.go
+++ b/api/v1alpha1/api.go
@@ -165,32 +165,10 @@ type LLMTrafficPolicyRateLimitRule struct {
 	// meaning, a request MUST match all the specified headers.
 	// At least one of headers or sourceCIDR condition must be specified.
 	Headers []LLMPolicyRateLimitHeaderMatch `json:"headers,omitempty"`
-	// Metadata is a list of metadata to match. Multiple metadata values are ANDed together,
-	Metadata []LLMPolicyRateLimitMetadataMatch `json:"metadata,omitempty"`
-	// Limits holds the rate limit values.
-	// This limit is applied for traffic flows when the selectors
-	// compute to True, causing the request to be counted towards the limit.
-	// The limit is enforced and the request is ratelimited, i.e. a response with
-	// 429 HTTP status code is sent back to the client when
-	// the selected requests have reached the limit.
-	//
 	// +kubebuilder:validation:MinItems=1
 	Limits []LLMPolicyRateLimitValue `json:"limits"`
 }
 
-type LLMPolicyRateLimitModelNameMatch struct {
-	// Type specifies how to match against the value of the model name.
-	// Only "Exact" and "Distinct" are supported.
-	// +kubebuilder:validation:Enum=Exact;Distinct
-	Type LLMPolicyRateLimitStringMatchType `json:"type"`
-	// Value specifies the value of the model name base on the match Type.
-	// It is ignored if the match Type is "Distinct".
-	//
-	// +optional
-	// +kubebuilder:validation:MaxLength=1024
-	Value *string `json:"value"`
-}
-
 // LLMPolicyRateLimitHeaderMatch defines the match attributes within the HTTP Headers of the request.
 type LLMPolicyRateLimitHeaderMatch struct {
 	// Type specifies how to match against the value of the header.
@@ -219,62 +197,32 @@ type LLMPolicyRateLimitStringMatchType string
 
 // HeaderMatchType constants.
 const (
-	// HeaderMatchExact matches the exact value of the Value field against the value of
+	// LLMPolicyRateLimitStringMatchHeaderMatchExact matches the exact value of the Value field against the value of
 	// the specified HTTP Header.
-	HeaderMatchExact LLMPolicyRateLimitStringMatchType = "Exact"
+	LLMPolicyRateLimitStringMatchHeaderMatchExact LLMPolicyRateLimitStringMatchType = "Exact"
 	// HeaderMatchRegularExpression matches a regular expression against the value of the
 	// specified HTTP Header. The regex string must adhere to the syntax documented in
 	// https://github.com/google/re2/wiki/Syntax.
 	HeaderMatchRegularExpression LLMPolicyRateLimitStringMatchType = "RegularExpression"
-	// HeaderMatchDistinct matches any and all possible unique values encountered in the
+	// LLMPolicyRateLimitStringMatchHeaderMatchDistinct matches any and all possible unique values encountered in the
 	// specified HTTP Header. Note that each unique value will receive its own rate limit
 	// bucket.
 	// Note: This is only supported for Global Rate Limits.
-	HeaderMatchDistinct LLMPolicyRateLimitStringMatchType = "Distinct"
-)
-
-// LLMPolicyRateLimitMetadataMatch defines the match attributes within the metadata from dynamic or route entry.
-// The match will be ignored if the metadata is not present.
-type LLMPolicyRateLimitMetadataMatch struct {
-	// Type specifies the type of metadata to match.
-	//
-	// +kubebuilder:default=Dynamic
-	Type LLMPolicyRateLimitMetadataMatchMetadataType `json:"type"`
-	// Name specifies the key of the metadata to match.
-	Name string `json:"name"`
-	// Paths specifies the value of the metadata to match.
-	// +optional
-	// +kubebuilder:validation:MaxItems=32
-	Paths []string `json:"paths,omitempty"`
-	// DefaultValue specifies an optional value to use if “metadata“ is empty.
-	// Default value is "unknown".
-	//
-	// +optional
-	DefaultValue *string `json:"defaultValue,omitempty"`
-}
-
-// LLMPolicyRateLimitMetadataMatchMetadataType specifies the type of metadata to match.
-//
-// +kubebuilder:validation:Enum=Dynamic;RouteEntry
-type LLMPolicyRateLimitMetadataMatchMetadataType string
-
-const (
-	// MetadataTypeDynamic specifies that the source of metadata is dynamic.
-	MetadataTypeDynamic LLMPolicyRateLimitMetadataMatchMetadataType = "Dynamic"
+	LLMPolicyRateLimitStringMatchHeaderMatchDistinct LLMPolicyRateLimitStringMatchType = "Distinct"
 )
 
 // LLMPolicyRateLimitValue defines the limits for rate limiting.
 type LLMPolicyRateLimitValue struct {
 	// Type specifies the type of rate limit.
 	//
-	// +kubebuilder:default=Request
-	Type LLMPolicyRateLimitType `json:"type"`
+	// +kubebuilder:default=Token
+	Type LLMPolicyRateLimitType `json:"type,omitempty"`
 	// Quantity specifies the number of requests or tokens allowed in the given interval.
 	Quantity uint `json:"quantity"`
 	// Unit specifies the interval for the rate limit.
 	//
 	// +kubebuilder:default=Minute
-	Unit LLMPolicyRateLimitUnit `json:"unit"`
+	Unit LLMPolicyRateLimitUnit `json:"unit,omitempty"`
 }
 
 // LLMPolicyRateLimitType specifies the type of rate limit.
@@ -284,10 +232,10 @@ type LLMPolicyRateLimitValue struct {
 type LLMPolicyRateLimitType string
 
 const (
-	// RateLimitTypeRequest specifies the rate limit to be based on the number of requests.
-	RateLimitTypeRequest LLMPolicyRateLimitType = "Request"
-	// RateLimitTypeToken specifies the rate limit to be based on the number of tokens.
-	RateLimitTypeToken LLMPolicyRateLimitType = "Token"
+	// LLMPolicyRateLimitTypeRequest specifies the rate limit to be based on the number of requests.
+	LLMPolicyRateLimitTypeRequest LLMPolicyRateLimitType = "Request"
+	// LLMPolicyRateLimitTypeToken specifies the rate limit to be based on the number of tokens.
+	LLMPolicyRateLimitTypeToken LLMPolicyRateLimitType = "Token"
 )
 
 // LLMPolicyRateLimitUnit specifies the intervals for setting rate limits.
@@ -298,29 +246,15 @@ type LLMPolicyRateLimitUnit string
 
 // RateLimitUnit constants.
 const (
-	// RateLimitUnitSecond specifies the rate limit interval to be 1 second.
-	RateLimitUnitSecond LLMPolicyRateLimitUnit = "Second"
+	// LLMPolicyRateLimitUnitSecond specifies the rate limit interval to be 1 second.
+	LLMPolicyRateLimitUnitSecond LLMPolicyRateLimitUnit = "Second"
 
-	// RateLimitUnitMinute specifies the rate limit interval to be 1 minute.
-	RateLimitUnitMinute LLMPolicyRateLimitUnit = "Minute"
+	// LLMPolicyRateLimitUnitMinute specifies the rate limit interval to be 1 minute.
+	LLMPolicyRateLimitUnitMinute LLMPolicyRateLimitUnit = "Minute"
 
-	// RateLimitUnitHour specifies the rate limit interval to be 1 hour.
-	RateLimitUnitHour LLMPolicyRateLimitUnit = "Hour"
+	// LLMPolicyRateLimitUnitHour specifies the rate limit interval to be 1 hour.
+	LLMPolicyRateLimitUnitHour LLMPolicyRateLimitUnit = "Hour"
 
-	// RateLimitUnitDay specifies the rate limit interval to be 1 day.
-	RateLimitUnitDay LLMPolicyRateLimitUnit = "Day"
+	// LLMPolicyRateLimitUnitDay specifies the rate limit interval to be 1 day.
+	LLMPolicyRateLimitUnitDay LLMPolicyRateLimitUnit = "Day"
 )
-
-// +kubebuilder:validation:XValidation:rule="has(self.group) ? self.group == 'gateway.networking.k8s.io' : true ", message="group must be gateway.networking.k8s.io"
-type TargetSelector struct {
-	// Group is the group that this selector targets. Defaults to gateway.networking.k8s.io
-	//
-	// +kubebuilder:default:="gateway.networking.k8s.io"
-	Group *gwapiv1a2.Group `json:"group,omitempty"`
-
-	// Kind is the resource kind that this selector targets.
-	Kind gwapiv1a2.Kind `json:"kind"`
-
-	// MatchLabels are the set of label selectors for identifying the targeted resource
-	MatchLabels map[string]string `json:"matchLabels"`
-}
diff --git a/api/v1alpha1/registry.go b/api/v1alpha1/registry.go
index 7a64bc29..b32a2c3d 100644
--- a/api/v1alpha1/registry.go
+++ b/api/v1alpha1/registry.go
@@ -8,6 +8,7 @@ import (
 func init() {
 	SchemeBuilder.Register(&LLMRoute{}, &LLMRouteList{})
 	SchemeBuilder.Register(&LLMBackend{}, &LLMBackendList{})
+	SchemeBuilder.Register(&LLMBackendTrafficPolicy{}, &LLMBackendTrafficPolicyList{})
 }
 
 const GroupName = "aigateway.envoyproxy.io"
diff --git a/api/v1alpha1/zz_generated.deepcopy.go b/api/v1alpha1/zz_generated.deepcopy.go
index 0e033c28..cd012aaf 100644
--- a/api/v1alpha1/zz_generated.deepcopy.go
+++ b/api/v1alpha1/zz_generated.deepcopy.go
@@ -6,7 +6,6 @@ package v1alpha1
 
 import (
 	runtime "k8s.io/apimachinery/pkg/runtime"
-	v1 "sigs.k8s.io/gateway-api/apis/v1"
 	"sigs.k8s.io/gateway-api/apis/v1alpha2"
 )
 
@@ -213,51 +212,6 @@ func (in *LLMPolicyRateLimitHeaderMatch) DeepCopy() *LLMPolicyRateLimitHeaderMat
 	return out
 }
 
-// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
-func (in *LLMPolicyRateLimitMetadataMatch) DeepCopyInto(out *LLMPolicyRateLimitMetadataMatch) {
-	*out = *in
-	if in.Paths != nil {
-		in, out := &in.Paths, &out.Paths
-		*out = make([]string, len(*in))
-		copy(*out, *in)
-	}
-	if in.DefaultValue != nil {
-		in, out := &in.DefaultValue, &out.DefaultValue
-		*out = new(string)
-		**out = **in
-	}
-}
-
-// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new LLMPolicyRateLimitMetadataMatch.
-func (in *LLMPolicyRateLimitMetadataMatch) DeepCopy() *LLMPolicyRateLimitMetadataMatch {
-	if in == nil {
-		return nil
-	}
-	out := new(LLMPolicyRateLimitMetadataMatch)
-	in.DeepCopyInto(out)
-	return out
-}
-
-// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
-func (in *LLMPolicyRateLimitModelNameMatch) DeepCopyInto(out *LLMPolicyRateLimitModelNameMatch) {
-	*out = *in
-	if in.Value != nil {
-		in, out := &in.Value, &out.Value
-		*out = new(string)
-		**out = **in
-	}
-}
-
-// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new LLMPolicyRateLimitModelNameMatch.
-func (in *LLMPolicyRateLimitModelNameMatch) DeepCopy() *LLMPolicyRateLimitModelNameMatch {
-	if in == nil {
-		return nil
-	}
-	out := new(LLMPolicyRateLimitModelNameMatch)
-	in.DeepCopyInto(out)
-	return out
-}
-
 // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
 func (in *LLMPolicyRateLimitValue) DeepCopyInto(out *LLMPolicyRateLimitValue) {
 	*out = *in
@@ -391,13 +345,6 @@ func (in *LLMTrafficPolicyRateLimitRule) DeepCopyInto(out *LLMTrafficPolicyRateL
 			(*in)[i].DeepCopyInto(&(*out)[i])
 		}
 	}
-	if in.Metadata != nil {
-		in, out := &in.Metadata, &out.Metadata
-		*out = make([]LLMPolicyRateLimitMetadataMatch, len(*in))
-		for i := range *in {
-			(*in)[i].DeepCopyInto(&(*out)[i])
-		}
-	}
 	if in.Limits != nil {
 		in, out := &in.Limits, &out.Limits
 		*out = make([]LLMPolicyRateLimitValue, len(*in))
@@ -414,30 +361,3 @@ func (in *LLMTrafficPolicyRateLimitRule) DeepCopy() *LLMTrafficPolicyRateLimitRu
 	in.DeepCopyInto(out)
 	return out
 }
-
-// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
-func (in *TargetSelector) DeepCopyInto(out *TargetSelector) {
-	*out = *in
-	if in.Group != nil {
-		in, out := &in.Group, &out.Group
-		*out = new(v1.Group)
-		**out = **in
-	}
-	if in.MatchLabels != nil {
-		in, out := &in.MatchLabels, &out.MatchLabels
-		*out = make(map[string]string, len(*in))
-		for key, val := range *in {
-			(*out)[key] = val
-		}
-	}
-}
-
-// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new TargetSelector.
-func (in *TargetSelector) DeepCopy() *TargetSelector {
-	if in == nil {
-		return nil
-	}
-	out := new(TargetSelector)
-	in.DeepCopyInto(out)
-	return out
-}
diff --git a/manifests/charts/ai-gateway-helm/crds/aigateway.envoyproxy.io_llmbackendtrafficpolicies.yaml b/manifests/charts/ai-gateway-helm/crds/aigateway.envoyproxy.io_llmbackendtrafficpolicies.yaml
index 97727c1f..1a85fbcb 100644
--- a/manifests/charts/ai-gateway-helm/crds/aigateway.envoyproxy.io_llmbackendtrafficpolicies.yaml
+++ b/manifests/charts/ai-gateway-helm/crds/aigateway.envoyproxy.io_llmbackendtrafficpolicies.yaml
@@ -96,13 +96,6 @@ spec:
                             type: object
                           type: array
                         limits:
-                          description: |-
-                            Limits holds the rate limit values.
-                            This limit is applied for traffic flows when the selectors
-                            compute to True, causing the request to be counted towards the limit.
-                            The limit is enforced and the request is ratelimited, i.e. a response with
-                            429 HTTP status code is sent back to the client when
-                            the selected requests have reached the limit.
                           items:
                             description: LLMPolicyRateLimitValue defines the limits
                               for rate limiting.
@@ -112,7 +105,7 @@ spec:
                                   or tokens allowed in the given interval.
                                 type: integer
                               type:
-                                default: Request
+                                default: Token
                                 description: Type specifies the type of rate limit.
                                 enum:
                                 - Request
@@ -130,48 +123,9 @@ spec:
                                 type: string
                             required:
                             - quantity
-                            - type
-                            - unit
                             type: object
                           minItems: 1
                           type: array
-                        metadata:
-                          description: Metadata is a list of metadata to match. Multiple
-                            metadata values are ANDed together,
-                          items:
-                            description: |-
-                              LLMPolicyRateLimitMetadataMatch defines the match attributes within the metadata from dynamic or route entry.
-                              The match will be ignored if the metadata is not present.
-                            properties:
-                              defaultValue:
-                                description: |-
-                                  DefaultValue specifies an optional value to use if “metadata“ is empty.
-                                  Default value is "unknown".
-                                type: string
-                              name:
-                                description: Name specifies the key of the metadata
-                                  to match.
-                                type: string
-                              paths:
-                                description: Paths specifies the value of the metadata
-                                  to match.
-                                items:
-                                  type: string
-                                maxItems: 32
-                                type: array
-                              type:
-                                default: Dynamic
-                                description: Type specifies the type of metadata to
-                                  match.
-                                enum:
-                                - Dynamic
-                                - RouteEntry
-                                type: string
-                            required:
-                            - name
-                            - type
-                            type: object
-                          type: array
                       required:
                       - limits
                       type: object
diff --git a/tests/cel-validation/main_test.go b/tests/cel-validation/main_test.go
index d6e6e6c6..dbe7a988 100644
--- a/tests/cel-validation/main_test.go
+++ b/tests/cel-validation/main_test.go
@@ -37,6 +37,7 @@ func runTest(m *testing.M) int {
 	for _, crd := range []string{
 		"aigateway.envoyproxy.io_llmroutes.yaml",
 		"aigateway.envoyproxy.io_llmbackends.yaml",
+		"aigateway.envoyproxy.io_llmbackendtrafficpolicies.yaml",
 	} {
 		crds = append(crds, filepath.Join(base, crd))
 	}
@@ -133,3 +134,39 @@ func TestLLMBackends(t *testing.T) {
 		})
 	}
 }
+
+func TestLLMBackendTrafficPolicy(t *testing.T) {
+	ctx, cancel := context.WithDeadline(context.Background(), time.Now().Add(30*time.Second))
+	defer cancel()
+
+	for _, tc := range []struct {
+		name   string
+		expErr string
+	}{
+		{name: "basic.yaml"},
+		{
+			name:   "unknown_ratelimit_type.yaml",
+			expErr: "spec.rateLimit.rules[0].limits[0].type: Unsupported value: \"Foo\": supported values: \"Request\", \"Token\"",
+		},
+		{
+			name:   "unknown_ratelimit_unit.yaml",
+			expErr: "spec.rateLimit.rules[0].limits[0].unit: Unsupported value: \"Foo\": supported values: \"Second\", \"Minute\", \"Hour\", \"Day\"",
+		},
+	} {
+		t.Run(tc.name, func(t *testing.T) {
+			data, err := tests.ReadFile(path.Join("testdata/llmbackendtrafficpolicies", tc.name))
+			require.NoError(t, err)
+
+			llmBackendTrafficPolicy := &aigv1a1.LLMBackendTrafficPolicy{}
+			err = yaml.UnmarshalStrict(data, llmBackendTrafficPolicy)
+			require.NoError(t, err)
+
+			if tc.expErr != "" {
+				require.ErrorContains(t, c.Create(ctx, llmBackendTrafficPolicy), tc.expErr)
+			} else {
+				require.NoError(t, c.Create(ctx, llmBackendTrafficPolicy))
+				require.NoError(t, c.Delete(ctx, llmBackendTrafficPolicy))
+			}
+		})
+	}
+}
diff --git a/tests/cel-validation/testdata/llmbackendtrafficpolicies/basic.yaml b/tests/cel-validation/testdata/llmbackendtrafficpolicies/basic.yaml
new file mode 100644
index 00000000..40a6af25
--- /dev/null
+++ b/tests/cel-validation/testdata/llmbackendtrafficpolicies/basic.yaml
@@ -0,0 +1,36 @@
+apiVersion: aigateway.envoyproxy.io/v1alpha1
+kind: LLMBackendTrafficPolicy
+metadata:
+  name: dog-backend-traffic-policy
+  namespace: default
+spec:
+  backendRef:
+    name: dog
+  rateLimit:
+    rules:
+      - headers:
+          - name: x-ai-gateway-llm-model-name
+            type: Exact
+            value: gpt-4o-mini
+          - name: x-user-id
+            type: Distinct
+        limits:
+          - type: Request
+            quantity: 10
+            unit: Minute
+          - type: Token
+            quantity: 500
+            unit: Minute
+      - headers:
+          - name: x-ai-gateway-llm-model-name
+            type: Exact
+            value: llama-3-8b
+        limits:
+          - quantity: 500
+            unit: Hour
+      - headers:
+          - name: x-ai-gateway-llm-model-name
+            type: Exact
+            value: llama-3-70b
+        limits:
+          - quantity: 500
diff --git a/tests/cel-validation/testdata/llmbackendtrafficpolicies/unknown_ratelimit_type.yaml b/tests/cel-validation/testdata/llmbackendtrafficpolicies/unknown_ratelimit_type.yaml
new file mode 100644
index 00000000..fd9cad2c
--- /dev/null
+++ b/tests/cel-validation/testdata/llmbackendtrafficpolicies/unknown_ratelimit_type.yaml
@@ -0,0 +1,20 @@
+apiVersion: aigateway.envoyproxy.io/v1alpha1
+kind: LLMBackendTrafficPolicy
+metadata:
+  name: dog-backend-traffic-policy
+  namespace: default
+spec:
+  backendRef:
+    name: dog
+  rateLimit:
+    rules:
+      - headers:
+          - name: x-ai-gateway-llm-model-name
+            type: Exact
+            value: gpt-4o-mini
+          - name: x-user-id
+            type: Distinct
+        limits:
+          - type: Foo
+            quantity: 10
+            unit: Minute
diff --git a/tests/cel-validation/testdata/llmbackendtrafficpolicies/unknown_ratelimit_unit.yaml b/tests/cel-validation/testdata/llmbackendtrafficpolicies/unknown_ratelimit_unit.yaml
new file mode 100644
index 00000000..044e9bf5
--- /dev/null
+++ b/tests/cel-validation/testdata/llmbackendtrafficpolicies/unknown_ratelimit_unit.yaml
@@ -0,0 +1,20 @@
+apiVersion: aigateway.envoyproxy.io/v1alpha1
+kind: LLMBackendTrafficPolicy
+metadata:
+  name: dog-backend-traffic-policy
+  namespace: default
+spec:
+  backendRef:
+    name: dog
+  rateLimit:
+    rules:
+      - headers:
+          - name: x-ai-gateway-llm-model-name
+            type: Exact
+            value: gpt-4o-mini
+          - name: x-user-id
+            type: Distinct
+        limits:
+          - type: Token
+            quantity: 10
+            unit: Foo