vllm-project · varungup90 · Feb 28, 2025 · Jeffwan · Mar 1, 2025
diff --git a/pkg/plugins/gateway/algorithms/prefix_cache.go b/pkg/plugins/gateway/algorithms/prefix_cache.go
@@ -21,6 +21,7 @@ import (
 	"fmt"
 	"math/rand"
 	"strconv"
+	"strings"
 
 	"github.com/vllm-project/aibrix/pkg/plugins/gateway/prefixcacheindexer"
 	"github.com/vllm-project/aibrix/pkg/utils"
@@ -72,10 +73,7 @@ func (p prefixCacheRouter) Route(ctx context.Context, pods map[string]*v1.Pod, m
 		}
 	}
 
-	tokens, err := utils.TokenizeInputText(message)
-	if err != nil {
-		return "", err
-	}
+	tokens := strings.Split(message, " ")
 
 	var targetPod *v1.Pod
 	matchedTokens, unMatchedTokens, matchedPods := p.prefixCacheIndexer.MatchPrefix(tokens, model, readyPods)
@@ -96,7 +94,7 @@ func (p prefixCacheRouter) Route(ctx context.Context, pods map[string]*v1.Pod, m
 	for _, p := range readyPods {
 		readyPodNames = append(readyPodNames, p.Status.PodIP)
 	}
-	klog.InfoS("prefix cache route",
+	klog.V(4).InfoS("prefix cache route",
 		"message", message,
 		"tokens", tokens,
 		"matched_tokens", matchedTokens,

diff --git a/pkg/plugins/gateway/prefixcacheindexer/hash.go b/pkg/plugins/gateway/prefixcacheindexer/hash.go
@@ -17,8 +17,6 @@ limitations under the License.
 package prefixcacheindexer
 
 import (
-	"bytes"
-	"encoding/binary"
 	"math/rand"
 	"strconv"
 	"sync"
@@ -121,7 +119,7 @@ func NewPrefixHashTable() PrefixCacheIndexer {
 
 // returns matchedTokens, unMatchedTokens, matchedPods
 // TODO: add an interface with multiple implementations such as hash or radix tree
-func (c *PrefixHashTable) MatchPrefix(tokens []int, model string, pods []*v1.Pod) ([]int, []int, []*v1.Pod) {
+func (c *PrefixHashTable) MatchPrefix(tokens []string, model string, pods []*v1.Pod) ([]string, []string, []*v1.Pod) {
 	c.mu.RLock()
 	defer c.mu.RUnlock()
 	var block, lastMatchedBlock Block
@@ -134,8 +132,9 @@ func (c *PrefixHashTable) MatchPrefix(tokens []int, model string, pods []*v1.Pod
 			end = len(tokens)
 		}
 
-		chunk := tokens[i:end]
-		_, _ = c.hash.Write(IntArrayToByteArray(chunk))
+		for _, b := range stringArrayToByteArray(tokens[i:end]) {
+			_, _ = c.hash.Write(b)
+		}
 		prefixHash := c.hash.Sum64()
 		c.hash.ResetWithSeed(c.seed)
 		block, ok = c.blocks[prefixHash]
@@ -164,7 +163,7 @@ func (c *PrefixHashTable) MatchPrefix(tokens []int, model string, pods []*v1.Pod
 	return matchedTokens, unMatchedTokens, matchedPods
 }
 
-func (c *PrefixHashTable) AddPrefix(unMatchedTokens []int, model, pod string) {
+func (c *PrefixHashTable) AddPrefix(unMatchedTokens []string, model, pod string) {
 	c.mu.Lock()
 	defer c.mu.Unlock()
 
@@ -174,8 +173,9 @@ func (c *PrefixHashTable) AddPrefix(unMatchedTokens []int, model, pod string) {
 			end = len(unMatchedTokens)
 		}
 
-		chunk := unMatchedTokens[i:end]
-		_, _ = c.hash.Write(IntArrayToByteArray(chunk))
+		for _, b := range stringArrayToByteArray(unMatchedTokens[i:end]) {
+			_, _ = c.hash.Write(b)
+		}
 		prefixHash := c.hash.Sum64()
 		c.hash.ResetWithSeed(c.seed)
 		block, ok := c.blocks[prefixHash]
@@ -209,13 +209,11 @@ func (c *PrefixHashTable) Evict(now time.Time) {
 	}
 }
 
-func IntArrayToByteArray(intArray []int) []byte {
-	buf := new(bytes.Buffer)
-	for _, val := range intArray {
-		err := binary.Write(buf, binary.LittleEndian, int32(val))
-		if err != nil {
-			panic(err)
-		}
+// stringArrayToByteArray converts a string array to a byte array.
+func stringArrayToByteArray(stringArray []string) [][]byte {
+	byteArray := make([][]byte, len(stringArray))
+	for i, str := range stringArray {
+		byteArray[i] = []byte(str)
 	}
-	return buf.Bytes()
+	return byteArray
 }
diff --git a/pkg/plugins/gateway/prefixcacheindexer/hash_test.go b/pkg/plugins/gateway/prefixcacheindexer/hash_test.go
@@ -17,14 +17,13 @@ limitations under the License.
 package prefixcacheindexer
 
 import (
-	"fmt"
 	"math/rand"
+	"strings"
 	"testing"
 	"time"
 
 	"github.com/cespare/xxhash/v2"
 	"github.com/stretchr/testify/assert"
-	"github.com/vllm-project/aibrix/pkg/utils"
 	v1 "k8s.io/api/core/v1"
 	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
 )
@@ -42,29 +41,28 @@ func Test_PrefixHashTableE2E(t *testing.T) {
 		{ObjectMeta: metav1.ObjectMeta{Name: "p2"}},
 	}
 
-	inputText := "Hello World! What a Good Day! Good Morning! 你好世界！多么美好的一天啊！早上好！"
-	tokens, err := utils.TokenizeInputText(inputText)
-	assert.Equal(t, nil, err)
+	inputText := "Hello World! What a Good Day! Good day to code and learn new things in LLM!! 你好世界！ 多么美好的一天啊！"
+	tokens := strings.Split(inputText, " ")
 
 	matchedTokens, unMatchedTokens, matchPods := cache.MatchPrefix(tokens, "m1", pods)
 	assert.Equal(t, 0, len(matchedTokens))
 	assert.Equal(t,
-		[]int{9906, 4435, 0, 3639, 264, 7839, 6187, 0, 7839, 29084, 0, 220, 57668, 53901, 3574, 244, 98220, 6447, 43240, 82696, 58666, 53901, 9554, 15120, 36827, 28308, 232, 6447, 6079, 102, 17905, 53901, 6447},
+		[]string{"Hello", "World!", "What", "a", "Good", "Day!", "Good", "day", "to", "code", "and", "learn", "new", "things", "in", "LLM!!", "你好世界！", "多么美好的一天啊！"},
 		unMatchedTokens)
 	assert.Equal(t, 0, len(matchPods))
 
 	cache.AddPrefix(unMatchedTokens, "m1", "p1")
 	matchedTokens, unMatchedTokens, matchPods = cache.MatchPrefix(tokens, "m1", pods)
 	assert.Equal(t,
-		[]int{9906, 4435, 0, 3639, 264, 7839, 6187, 0, 7839, 29084, 0, 220, 57668, 53901, 3574, 244, 98220, 6447, 43240, 82696, 58666, 53901, 9554, 15120, 36827, 28308, 232, 6447, 6079, 102, 17905, 53901, 6447},
+		[]string{"Hello", "World!", "What", "a", "Good", "Day!", "Good", "day", "to", "code", "and", "learn", "new", "things", "in", "LLM!!", "你好世界！", "多么美好的一天啊！"},
 		matchedTokens)
 	assert.Equal(t, 0, len(unMatchedTokens))
 	assert.Equal(t, "p1", matchPods[0].Name)
 
 	cache.Evict(time.Now().Add(60 * time.Minute))
 	_, unMatchedTokens, matchPods = cache.MatchPrefix(tokens, "m1", pods)
 	assert.Equal(t,
-		[]int{9906, 4435, 0, 3639, 264, 7839, 6187, 0, 7839, 29084, 0, 220, 57668, 53901, 3574, 244, 98220, 6447, 43240, 82696, 58666, 53901, 9554, 15120, 36827, 28308, 232, 6447, 6079, 102, 17905, 53901, 6447},
+		[]string{"Hello", "World!", "What", "a", "Good", "Day!", "Good", "day", "to", "code", "and", "learn", "new", "things", "in", "LLM!!", "你好世界！", "多么美好的一天啊！"},
 		unMatchedTokens)
 	assert.Equal(t, 0, len(matchPods))
 }
@@ -78,8 +76,8 @@ func Test_MatchPrefix(t *testing.T) {
 		cache         PrefixHashTable
 		model         string
 		pods          []*v1.Pod
-		matchTokens   []int
-		unMatchTokens []int
+		matchTokens   []string
+		unMatchTokens []string
 		matchPods     []*v1.Pod
 	}{
 		{
@@ -95,16 +93,16 @@ func Test_MatchPrefix(t *testing.T) {
 				{ObjectMeta: metav1.ObjectMeta{Name: "p1"}},
 				{ObjectMeta: metav1.ObjectMeta{Name: "p2"}},
 			},
-			matchTokens:   []int{},
-			unMatchTokens: []int{9906, 4435, 0, 3639, 264, 7839, 6187, 0, 220, 57668, 53901, 3574, 244, 98220, 6447, 43240, 82696, 58666, 53901, 9554, 15120, 36827, 28308, 232, 6447},
+			matchTokens:   []string{},
+			unMatchTokens: []string{"Hello", "World!", "What", "a", "Good", "Day!", "你好世界！多么美好的一天啊！"},
 			matchPods:     nil,
 		},
 		{
 			name:      "token length more than prefix block size, one prefix block exist in the cache",
-			inputText: "Hello World! What a Good Day! 你好世界！多么美好的一天啊！",
+			inputText: "Hello World! What a Good Day! Good day to code and learn new things in LLM!! 你好世界！多么美好的一天啊！",
 			cache: PrefixHashTable{
 				blocks: map[uint64]Block{
-					8954089069687757318: {
+					8439316938363978324: {
 						modelToPods: map[string]map[string]time.Time{
 							"m1": {
 								"p1": time.Now(),
@@ -121,24 +119,19 @@ func Test_MatchPrefix(t *testing.T) {
 				{ObjectMeta: metav1.ObjectMeta{Name: "p1"}},
 				{ObjectMeta: metav1.ObjectMeta{Name: "p2"}},
 			},
-			matchTokens:   []int{9906, 4435, 0, 3639, 264, 7839, 6187, 0, 220, 57668, 53901, 3574, 244, 98220, 6447, 43240},
-			unMatchTokens: []int{82696, 58666, 53901, 9554, 15120, 36827, 28308, 232, 6447},
+			matchTokens:   []string{"Hello", "World!", "What", "a", "Good", "Day!", "Good", "day", "to", "code", "and", "learn", "new", "things", "in", "LLM!!"},
+			unMatchTokens: []string{"你好世界！多么美好的一天啊！"},
 			matchPods: []*v1.Pod{
 				{ObjectMeta: metav1.ObjectMeta{Name: "p1"}},
 			},
 		},
 	}
 
 	for _, tt := range tests {
-		tokens, err := utils.TokenizeInputText(tt.inputText)
-		assert.Equal(t, nil, err)
-		fmt.Println(len(tokens))
-		fmt.Println(tokens)
-
-		matchTokens, unMatchTokens, matchPods := tt.cache.MatchPrefix(tokens, tt.model, tt.pods)
+		matchTokens, unMatchTokens, matchPods := tt.cache.MatchPrefix(strings.Split(tt.inputText, " "), tt.model, tt.pods)
 
-		assert.Equal(t, tt.matchTokens, matchTokens)
-		assert.Equal(t, tt.unMatchTokens, unMatchTokens)
-		assert.Equal(t, tt.matchPods, matchPods)
+		assert.Equal(t, tt.matchTokens, matchTokens, tt.name)
+		assert.Equal(t, tt.unMatchTokens, unMatchTokens, tt.name)
+		assert.Equal(t, tt.matchPods, matchPods, tt.name)
 	}
 }
diff --git a/pkg/plugins/gateway/prefixcacheindexer/indexer.go b/pkg/plugins/gateway/prefixcacheindexer/indexer.go
@@ -25,10 +25,10 @@ import (
 type PrefixCacheIndexer interface {
 	// MatchPrefix matches the longest prefix sequence for input request (passed as input tokens)
 	// and returns matched prefix (as tokens), remaining unmatched input request (as tokens) and pods matching the prefix
-	MatchPrefix(inputTokens []int, model string, pods []*v1.Pod) (matchedTokens []int, unMatchedTokens []int, matchedPods []*v1.Pod)
+	MatchPrefix(inputTokens []string, model string, pods []*v1.Pod) (matchedTokens []string, unMatchedTokens []string, matchedPods []*v1.Pod)
 
 	// AddPrefix adds tokens in internal prefix cache indexer to be used by future requests
-	AddPrefix(tokens []int, model, pod string)
+	AddPrefix(tokens []string, model, pod string)
 
 	// Evict is invoked at fixed internal to clean up expired tokens from prefix cache.
 	// TODO: Add max blocks to cache, add LRU policy along with TTL and add performance benchmark tests.