Skip to content

Commit

Permalink
Read model name from request body (#290)
Browse files Browse the repository at this point in the history
* Use model name from request body

* rename dummy to reserved router
  • Loading branch information
varungup90 authored Oct 11, 2024
1 parent 8a067d2 commit 7fd4be4
Show file tree
Hide file tree
Showing 8 changed files with 192 additions and 150 deletions.
21 changes: 21 additions & 0 deletions config/gateway/gateway-plugin.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -50,3 +50,24 @@ spec:
fieldRef:
fieldPath: metadata.namespace
serviceAccountName: aibrix-gateway-plugin
---
# this is a dummy route for incoming request and,
# then request is routed to httproute using model name OR
# request is routed based on the target for that model service
# TODO (varun): check if this dummy route can be removed in future
apiVersion: gateway.networking.k8s.io/v1
kind: HTTPRoute
metadata:
name: reserved-router
namespace: aibrix-system
spec:
parentRefs:
- name: aibrix-eg
rules:
- matches:
- path:
type: PathPrefix
value: /
backendRefs:
- name: aibrix-gateway-plugins
port: 50052
13 changes: 4 additions & 9 deletions config/gateway/gateway.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -60,15 +60,10 @@ spec:
match:
prefix: "/"
headers:
- name: "routing-strategy"
string_match:
exact: "random"
- name: "least-request"
string_match:
exact: "random"
- name: "routing-strategy"
string_match:
exact: "throughput"
- name: "routing-strategy"
string_match:
safe_regex:
regex: .*
route:
cluster: original_destination_cluster
timeout: 1000s # Increase route timeout
Expand Down
23 changes: 16 additions & 7 deletions docs/development/app/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -67,32 +67,42 @@ kubectl -n envoy-gateway-system port-forward service/envoy-aibrix-system-aibrix-
# Add rpm/tpm config
```shell
# note: not mandatory to create user to access gateway API

kubectl -n aibrix-system port-forward svc/aibrix-gateway-users 8090:8090 &

curl http://localhost:8090/CreateUser \
-H "Content-Type: application/json" \
-d '{"name": "your-user-name","rpm": 100,"tpm": 1000}'
```
Test request (ensure header model name matches with deployment's model name for routing)
Test request
```shell
curl -v http://localhost:8888/v1/chat/completions \
-H "user: your-user-name" \
-H "model: llama2-70b" \
-H "Content-Type: application/json" \
-H "Authorization: Bearer any_key" \
-d '{
"model": "llama2-70b",
"messages": [{"role": "user", "content": "Say this is a test!"}],
"temperature": 0.7
}' &
}'

curl -v http://localhost:8888/v1/chat/completions \
-H "Content-Type: application/json" \
-H "Authorization: Bearer any_key" \
-H "routing-strategy: random" \
-d '{
"model": "llama2-70b",
"messages": [{"role": "user", "content": "Say this is a test!"}],
"temperature": 0.7
}'


# least-request based
for i in {1..10}; do
curl -v http://localhost:8888/v1/chat/completions \
-H "user: your-user-name" \
-H "routing-strategy: least-request" \
-H "model: llama2-70b" \
-H "Content-Type: application/json" \
-H "Authorization: Bearer any_key" \
-d '{
Expand All @@ -106,8 +116,7 @@ done
for i in {1..10}; do
curl -v http://localhost:8888/v1/chat/completions \
-H "user: your-user-name" \
-H "routing-strategy: throughput" \
-H "model: llama2-70b" \
-H "routing-strategy: random" \
-H "Content-Type: application/json" \
-H "Authorization: Bearer any_key" \
-d '{
Expand Down
2 changes: 1 addition & 1 deletion pkg/plugins/gateway/algorithms/least_request.go
Original file line number Diff line number Diff line change
Expand Up @@ -77,5 +77,5 @@ func (r leastRequestRouter) Route(ctx context.Context, pods map[string]*v1.Pod)
}
}

return targetPodIP, nil
return targetPodIP + ":" + podPort, nil
}
2 changes: 1 addition & 1 deletion pkg/plugins/gateway/algorithms/random.go
Original file line number Diff line number Diff line change
Expand Up @@ -46,5 +46,5 @@ func (r randomRouter) Route(ctx context.Context, pods map[string]*v1.Pod) (strin
k--
}

return selectedPod.Status.PodIP, nil
return selectedPod.Status.PodIP + ":" + podPort, nil
}
2 changes: 1 addition & 1 deletion pkg/plugins/gateway/algorithms/router.go
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ const (
throughput_generation = "avg_generation_throughput_toks_per_s"
latency = "e2e_request_latency_seconds_sum"

podPort = 8000
podPort = "8000"
)

type Router interface {
Expand Down
2 changes: 1 addition & 1 deletion pkg/plugins/gateway/algorithms/throughput.go
Original file line number Diff line number Diff line change
Expand Up @@ -74,5 +74,5 @@ func (r throughputRouter) Route(ctx context.Context, pods map[string]*v1.Pod) (s
}
}

return targetPodIP, nil
return targetPodIP + ":" + podPort, nil
}
Loading

0 comments on commit 7fd4be4

Please sign in to comment.