From d6148a4eb8eec087114777c631a2608bdb88774d Mon Sep 17 00:00:00 2001
From: Ning <113055713+nwangfw@users.noreply.github.com>
Date: Fri, 28 Feb 2025 21:54:46 -0800
Subject: [PATCH] Support benchmarking script by using real application trace
 (#737)

Signed-off-by: Ning Wang <n.wang.chn@hotmail.com>
Co-authored-by: Ning <n.wang.chn@gmail.com>
---
 build/container/Dockerfile.runtime            |   2 +-
 .../optimizer/profiling/benchmark.sh          |  98 +++++-
 .../profiling/gen_benchmark_prompt.py         | 330 ++++++++++++++++++
 .../optimizer/profiling/gpu_benchmark.py      |  38 +-
 python/aibrix/poetry.lock                     | 159 ++++++++-
 python/aibrix/pyproject.toml                  |   2 +-
 6 files changed, 601 insertions(+), 28 deletions(-)
 create mode 100644 python/aibrix/aibrix/gpu_optimizer/optimizer/profiling/gen_benchmark_prompt.py

diff --git a/build/container/Dockerfile.runtime b/build/container/Dockerfile.runtime
index 80b18774..1f63e8b2 100644
--- a/build/container/Dockerfile.runtime
+++ b/build/container/Dockerfile.runtime
@@ -34,7 +34,7 @@ COPY --from=builder /app/dist/*.whl ./
 
 # Install build dependencies and clean up in one step (avoiding creating another new layer)
 RUN apt-get update \
-    && apt-get install -y --no-install-recommends gcc python3-dev \
+    && apt-get install -y --no-install-recommends gcc python3-dev mawk \
     && apt-get clean \
     && rm -rf /var/lib/apt/lists/* \
     && pip install --no-cache-dir ./*.whl \
diff --git a/python/aibrix/aibrix/gpu_optimizer/optimizer/profiling/benchmark.sh b/python/aibrix/aibrix/gpu_optimizer/optimizer/profiling/benchmark.sh
index 1e20c786..ab028707 100755
--- a/python/aibrix/aibrix/gpu_optimizer/optimizer/profiling/benchmark.sh
+++ b/python/aibrix/aibrix/gpu_optimizer/optimizer/profiling/benchmark.sh
@@ -18,8 +18,9 @@
 PATH_PREFIX=`dirname "$0"`
 OUTPUT_FILE=
 MODEL="llama2-7b"
-TOTAL=100
-# TODO: Set your preferred request sizes and rates here.
+TEMPERATURE=0.0  
+
+TOTAL=100  # Set your preferred request sizes and rates here.
 input_start=4
 input_limit=$((2**12)) # 4K
 output_start=4
@@ -29,10 +30,39 @@ rate_limit=$((2**6)) # 64
 workload=
 dry_run=0
 
+
+# Function to generate workload for specific input/output lengths
+generate_workload() {
+    local input_len=$1
+    local output_len=$2
+    local api_key=$3
+    local num_prompts=$4
+    local model=$5
+    local workload_path=$6
+    local output_dir=$7
+
+    local prompt_path
+    prompt_path=$(python $PATH_PREFIX/gen_benchmark_prompt.py \
+        $workload_path  \
+        --input-tokens "$input_len" \
+        --min-output-tokens "$output_len" \
+        --tolerance "0.2" \
+        --qps "2.0" \
+        --host "localhost" \
+        --port "8010" \
+        --api-key "$api_key" \
+        --total-prompts "$num_prompts" \
+        --model "$model" \
+        --temperature "$TEMPERATURE" \
+        --output-dir "$output_dir" 2>&1 | tail -n 1)
+
+    echo "$prompt_path"
+}
+
 while [[ $# -gt 0 ]]; do
   case "$1" in
     -m|--model)
-      MODEL="$2"
+      MODEL=$2
       shift 2
       ;;
     -o|--output)
@@ -71,6 +101,10 @@ while [[ $# -gt 0 ]]; do
       LLM_API_KEY=$2
       shift 2
       ;;
+    --temperature)
+      TEMPERATURE=$2
+      shift 2
+      ;;
     --workload)
       workload="--workload_dataset_file $2"
       shift 2
@@ -82,31 +116,69 @@ while [[ $# -gt 0 ]]; do
   esac
 done
 
-# Make sure the directory exists and clear output file
+
 if [[ -z "$OUTPUT_FILE" ]]; then
+  echo "Use default output path"
   OUTPUT_FILE="${PATH_PREFIX}/result/${MODEL}.jsonl"
 fi
-mkdir -p `dirname "$OUTPUT_FILE"`
+
+dir_path=$(dirname "$OUTPUT_FILE")
+PROMPT_DIR="${dir_path}/prompts"
+
+mkdir -p "$(dirname "$OUTPUT_FILE")"
+mkdir -p "$PROMPT_DIR"
+
+# Clear the workload directory
+echo "Clearing workload directory: $PROMPT_DIR"
+rm -rf "$PROMPT_DIR"/*
+
+# Append Mode: Uncomment the below line if you want the output file to be empty every run
+# > "$OUTPUT_FILE"
 
 # Print the arguments (or use them in your script logic)
-echo "Start benchmark $MODEL, input tokens:[$input_start:$input_limit], output tokens:[$output_start:$output_limit], rates:[$rate_start:$rate_limit], save as: $OUTPUT_FILE"
+echo "Start benchmark $MODEL, input tokens:[$input_start:$input_limit], output tokens:[$output_start:$output_limit], rates:[$rate_start:$rate_limit], save as: $OUTPUT_FILE", workload: "$workload"
+
+
 if [[ $dry_run == 1 ]]; then
-  echo "Dru run enabled, skip profiling."
+  echo "Dry run enabled, skip profiling."
   exit
 fi
 
+# Run the benchmark for each combination
+echo "Starting benchmark..."
 input_len=$input_start
 while [[ $input_len -le $input_limit ]]; do
   output_len=$output_start
   while [[ $output_len -le $output_limit ]]; do
-    req_rate=$rate_start
-    while [[ $req_rate -le $rate_limit ]]; do
-      python $PATH_PREFIX/gpu_benchmark.py --backend=vllm --port 8010 --model=$MODEL --request-rate=$req_rate --num-prompts=$TOTAL --input-len $input_len --output-len $output_len --api-key "$LLM_API_KEY" --stream $workload 1>>${OUTPUT_FILE} 
+
+    if [[ -n "$workload" ]]; then
+      # Make sure all arguments are passed in the correct order
+      WORKLOAD_FILE=$(generate_workload "$input_len" "$output_len" "$LLM_API_KEY" "$TOTAL" "$MODEL" "$workload" "$dir_path")
+      echo "Workload file: $WORKLOAD_FILE"
+    else
+      echo "Skip workload pattern generation, benchmark with fixed prompts"
+    fi
+  
+    # Convert rate_start to integer (multiply by 1000 and remove decimals since -le does not work with floats)
+    req_rate=$(printf "%.0f\n" "$(echo "$rate_start * 1000" | awk '{print $1 * 1000}')")    
+    rate_limit_scaled=$(printf "%.0f\n" "$(echo "$rate_limit * 1000" | awk '{print $1 * 1000}')")
+    while [[ $req_rate -le $rate_limit_scaled ]]; do
+      actual_rate=$(echo "$req_rate" | awk '{ printf "%.3f", $1 / 1000 }')
+      if [[ -n "$workload" ]]; then
+        if [[ -f "$WORKLOAD_FILE" ]]; then
+            echo "run benchmark with workload file: $WORKLOAD_FILE"
+            # If workload file exists, run the benchmark with $WORKLOAD_FILE
+            python $PATH_PREFIX/gpu_benchmark.py --backend=vllm --port 8010 --model=$MODEL --request-rate=$actual_rate --num-prompts=$TOTAL --input-len $input_len --output-len $output_len --api-key "$LLM_API_KEY" --temperature "$TEMPERATURE" --workload_dataset_file "$WORKLOAD_FILE" --stream >> "$OUTPUT_FILE" 
+        fi
+        # If workload file does not exist, print the command to run the benchmark
+      else
+        echo "run benchmark with fixed prompts: input=$input_len, output=$output_len, rate=$actual_rate"
+        python $PATH_PREFIX/gpu_benchmark.py --backend=vllm --port 8010 --model=$MODEL --request-rate=$actual_rate --num-prompts=$TOTAL --input-len $input_len --output-len $output_len --api-key "$LLM_API_KEY" --temperature "$TEMPERATURE" --stream >> "$OUTPUT_FILE" 
+      fi
       req_rate=$((req_rate * 2)) 
     done
     output_len=$((output_len * 2)) 
   done
-  input_len=$((input_len * 2)) 
+  input_len=$((input_len * 2))
 done
-
-echo "Profiling finished."
\ No newline at end of file
+echo "Benchmarking finished."
\ No newline at end of file
diff --git a/python/aibrix/aibrix/gpu_optimizer/optimizer/profiling/gen_benchmark_prompt.py b/python/aibrix/aibrix/gpu_optimizer/optimizer/profiling/gen_benchmark_prompt.py
new file mode 100644
index 00000000..20f9ed28
--- /dev/null
+++ b/python/aibrix/aibrix/gpu_optimizer/optimizer/profiling/gen_benchmark_prompt.py
@@ -0,0 +1,330 @@
+import argparse
+import json
+import os
+import threading
+import time
+from typing import Any, Dict, List, Optional, Tuple
+
+import requests
+import tiktoken
+
+
+def get_tokenizer(pretrained_model_name_or_path: str, trust_remote_code: bool) -> Any:
+    """Get tiktoken tokenizer."""
+    try:
+        # Use cl100k_base for ChatGPT-style models
+        return tiktoken.get_encoding("cl100k_base")
+    except Exception as e:
+        print(f"Error loading cl100k_base tokenizer: {e}")
+        # Fallback to p50k_base (GPT-3 style)
+        return tiktoken.get_encoding("p50k_base")
+
+
+class RateLimiter:
+    def __init__(self, qps: float):
+        self.interval = 1.0 / qps
+        self.last_request_time = 0
+        self.lock = threading.Lock()
+
+    def wait(self):
+        """Wait if necessary to maintain the desired QPS."""
+        with self.lock:
+            current_time = time.time()
+            time_since_last = current_time - self.last_request_time
+            if time_since_last < self.interval:
+                sleep_time = self.interval - time_since_last
+                time.sleep(sleep_time)
+            self.last_request_time = time.time()
+
+
+class PromptSelector:
+    def __init__(
+        self,
+        trace_file: str,
+        model_endpoint: str = "http://localhost:8888/v1/chat/completions",
+        model: str = "deepseek-coder-7b",
+        qps: float = 2.0,
+        temperature: float = 0.0,
+        api_key: str = "any_key",
+        total_prompts: int = 1,
+        output_dir: str = ".",
+    ):
+        self.trace_file = trace_file
+        self.model_endpoint = model_endpoint
+        self.model = model
+        self.tokenizer = get_tokenizer("", False)
+        self.rate_limiter = RateLimiter(qps)
+        self.temperature = temperature
+        self.api_key = api_key
+        self.total_prompts = total_prompts
+        self.output_dir = output_dir
+
+    def count_tokens(self, text: str) -> int:
+        """Estimate token count using VLLM's tokenizer."""
+        return len(self.tokenizer.encode(text))
+
+    def get_completion_tokens(self, prompt: str) -> Tuple[Optional[int], Dict]:
+        """Get actual completion tokens by querying the model with rate limiting."""
+        self.rate_limiter.wait()
+
+        headers = {
+            "Content-Type": "application/json",
+            "Authorization": f"Bearer {self.api_key}",
+        }
+
+        data = {
+            "model": self.model,
+            "messages": [{"role": "user", "content": prompt}],
+            "temperature": self.temperature,
+        }
+
+        try:
+            response = requests.post(self.model_endpoint, headers=headers, json=data)
+            response.raise_for_status()
+            response_data = response.json()
+            completion_tokens = response_data.get("usage", {}).get("completion_tokens")
+            return completion_tokens, response_data
+        except Exception as e:
+            print(f"Error querying model: {e}")
+            return None, {}
+
+    def find_matching_prompts(
+        self,
+        target_input_tokens: int,
+        min_output_tokens: int,
+        input_tolerance: float = 0.1,
+        max_candidates: Optional[int] = None,
+    ) -> Optional[str]:
+        """Find prompts and save results to a file. Returns matching prompts and filename."""
+        matching_prompts = []
+        candidates = []
+
+        input_min = int(target_input_tokens * (1 - input_tolerance))
+        input_max = int(target_input_tokens * (1 + input_tolerance))
+
+        print("Scanning trace file for candidates...")
+        print(f"Input token range: {input_min} - {input_max}")
+
+        # First pass: collect all candidates based on input length
+        with open(self.trace_file, "r") as f:
+            for line in f:
+                try:
+                    data = json.loads(line)
+                    messages = data.get("messages", [])
+                    prompt = "\n".join(msg.get("content", "") for msg in messages)
+                    input_tokens = self.count_tokens(prompt)
+
+                    if input_min <= input_tokens <= input_max:
+                        input_diff = abs(input_tokens - target_input_tokens)
+                        candidates.append((prompt, input_tokens, input_diff))
+
+                except (json.JSONDecodeError, Exception):
+                    continue
+
+        # Sort candidates by input difference
+        candidates.sort(key=lambda x: x[2])
+
+        # If max_candidates is not specified, use all candidates or choosing the first max_candidates number of candidates
+        if max_candidates is not None:
+            candidates = candidates[:max_candidates]
+
+        print(f"Found {len(candidates)} candidates. Querying model for each...")
+        print("-" * 80)
+
+        for _, (prompt, input_tokens, input_diff) in enumerate(candidates, 1):
+            output_tokens, response_data = self.get_completion_tokens(prompt)
+
+            if output_tokens and output_tokens >= min_output_tokens:
+                matching_prompts.append(
+                    (prompt, input_tokens, output_tokens, response_data)
+                )
+                break  # No match found, stop the loop
+
+            print("-" * 80)
+
+        filename = self.save_results(
+            matching_prompts, target_input_tokens, min_output_tokens
+        )
+        return filename
+
+    def save_results(
+        self,
+        matching_prompts: List[Tuple[str, int, int, Dict]],
+        target_input_tokens: int,
+        min_output_tokens: int,
+    ) -> Optional[str]:
+        """Save matching prompts to a JSON file and return the file path."""
+        # Only proceed if there are matching prompts to save
+        if not matching_prompts:
+            print("\nNo matching prompts found, skipping file creation.")
+            return None
+
+        # Get the directory where the script is located
+        prompts_dir = os.path.join(self.output_dir, "prompts")
+        os.makedirs(prompts_dir, exist_ok=True)
+
+        filename = os.path.join(
+            prompts_dir, f"prompt_in{target_input_tokens}_out{min_output_tokens}.json"
+        )
+
+        benchmark_format = []
+        base_timestamp = 1000
+
+        # Create the benchmark-compatible format
+        for prompt, input_tokens, output_tokens, response_data in matching_prompts:
+            for i in range(self.total_prompts):
+                benchmark_format.append(
+                    {
+                        "Timestamp": base_timestamp + (i * 1000),
+                        "Requests": [
+                            {
+                                "Prompt": prompt,
+                                "Prompt Length": input_tokens,
+                                "Output Length": output_tokens,
+                                "Metadata": {
+                                    "model_response": response_data,
+                                    "temperature": self.temperature,
+                                },
+                            }
+                        ],
+                    }
+                )
+
+        # Write the formatted data
+        with open(filename, "w", encoding="utf-8") as f:
+            json.dump(benchmark_format, f, indent=2)
+
+        print(f"\nResults saved to: {filename}")
+        return filename
+
+
+def parse_args():
+    parser = argparse.ArgumentParser(
+        description="Find prompts matching specific token criteria"
+    )
+    parser.add_argument(
+        "--workload_dataset_file",
+        type=str,
+        required=True,
+        help="Path to the workload dataset file",
+    )
+    parser.add_argument(
+        "--host",
+        type=str,
+        default="localhost",
+        help="Model endpoint host (default: localhost)",
+    )
+    parser.add_argument(
+        "--port", type=int, default=8010, help="Model endpoint port (default: 8010)"
+    )
+    parser.add_argument(
+        "--input-tokens", type=int, required=True, help="Target input token count"
+    )
+    parser.add_argument(
+        "--min-output-tokens",
+        type=int,
+        required=True,
+        help="Minimum output token count",
+    )
+    parser.add_argument(
+        "--tolerance",
+        type=float,
+        default=0.1,
+        help="Tolerance for input token matching (default: 0.1)",
+    )
+    parser.add_argument(
+        "--qps",
+        type=float,
+        default=2.0,
+        help="Queries per second rate limit (default: 2.0)",
+    )
+    parser.add_argument(
+        "--max-candidates",
+        type=int,
+        default=None,
+        help="Maximum number of candidates to query (default: None, use all candidates)",
+    )
+    parser.add_argument(
+        "--temperature",
+        type=float,
+        default=0.0,
+        help="Temperature for model inference (default: 0.0)",
+    )
+    parser.add_argument(
+        "--api-key",
+        type=str,
+        default="any_key",
+        help="API key for model access (default: any_key)",
+    )
+    parser.add_argument(
+        "--total-prompts",
+        type=int,
+        default=1,
+        help="Number of prompts to generate (default: 1)",
+    )
+    parser.add_argument(
+        "--model",
+        type=str,
+        default="deepseek-coder-7b",
+        help="Model name to use for completion",
+    )
+    parser.add_argument(
+        "--output-dir",
+        type=str,
+        help="Directory to save output files",
+        default=".",
+    )
+    return parser.parse_args()
+
+
+def main():
+    args = parse_args()
+    start_time = time.time()
+
+    print("\nStarting prompt search with parameters:")
+    print(f"Target input tokens: {args.input_tokens}")
+    print(f"Minimum output tokens: {args.min_output_tokens}")
+    print(f"Tolerance: {args.tolerance}")
+    print(f"QPS: {args.qps}")
+    print(f"Max candidates: {args.max_candidates}")
+    print(f"Model endpoint: http://{args.host}:{args.port}/v1/chat/completions")
+    print(f"Using API key: {args.api_key}")
+    print(f"Workload dataset file: {args.workload_dataset_file}")
+    print("-" * 80)
+
+    model_endpoint = f"http://{args.host}:{args.port}/v1/chat/completions"
+
+    selector = PromptSelector(
+        trace_file=args.workload_dataset_file,
+        model_endpoint=model_endpoint,
+        qps=args.qps,
+        model=args.model,
+        temperature=args.temperature,
+        api_key=args.api_key,
+        total_prompts=args.total_prompts,
+        output_dir=args.output_dir,
+    )
+
+    filename = selector.find_matching_prompts(
+        target_input_tokens=args.input_tokens,
+        min_output_tokens=args.min_output_tokens,
+        input_tolerance=args.tolerance,
+        max_candidates=args.max_candidates,
+    )
+
+    print("Found matching prompts:")
+
+    end_time = time.time()
+    print(f"\nTotal execution time: {end_time - start_time:.2f} seconds")
+
+    # Print just the path at the very end for the shell script to capture
+    if filename:
+        print(filename)
+        return filename
+    else:
+        print(filename)
+        return ""
+
+
+if __name__ == "__main__":
+    main()
diff --git a/python/aibrix/aibrix/gpu_optimizer/optimizer/profiling/gpu_benchmark.py b/python/aibrix/aibrix/gpu_optimizer/optimizer/profiling/gpu_benchmark.py
index fe6117c2..e26a4bc2 100644
--- a/python/aibrix/aibrix/gpu_optimizer/optimizer/profiling/gpu_benchmark.py
+++ b/python/aibrix/aibrix/gpu_optimizer/optimizer/profiling/gpu_benchmark.py
@@ -85,17 +85,16 @@ def sample_requests(
                 # print('the least requests: ', requests[len(requests) - 1])
                 return requests
         except Exception as e:
-            print_err(
-                f"Warning: Failed to load prompt dataset ({e}), falling back to synthetic prompts"
-            )
-
-    # Original synthetic prompt generation
-    requests = []
-    for _ in range(num_requests):
-        synthetic_prompt = "hi " * config_input_len
-        # assign timestamp to -1 for all requests
-        requests.append((synthetic_prompt, config_input_len, config_output_len, -1))
-    return requests
+            print_err(f"Warning: Failed to load prompt dataset ({e})")
+            return []
+    else:
+        # Original synthetic prompt generation
+        requests = []
+        for _ in range(num_requests):
+            synthetic_prompt = "hi " * config_input_len
+            # assign timestamp to -1 for all requests
+            requests.append((synthetic_prompt, config_input_len, config_output_len, -1))
+        return requests
 
 
 async def get_request(
@@ -169,6 +168,7 @@ async def send_request(
     }
     if api_key is not None or api_key != "":
         headers["Authorization"] = f"Bearer {api_key}"
+
     streaming = stream
     if backend == "vllm":
         pload = {
@@ -193,10 +193,12 @@ async def send_request(
     request_start_time = time.perf_counter()
     ts = datetime.now(timezone.utc)
     timeout = aiohttp.ClientTimeout(total=3 * 3600)
+    status_code = None
     async with aiohttp.ClientSession(timeout=timeout) as session:
         while True:
             # print(f"Sending request: {api_url}:{pload}")
             async with session.post(api_url, headers=headers, json=pload) as response:
+                status_code = response.status
                 chunks = []
                 token_latencies = []
                 previous_token_time = time.perf_counter()
@@ -245,12 +247,15 @@ async def send_request(
 
     if trace:
         request_trace = {
+            "request_id": idx,
             "input_tokens": prompt_len,
             "output_tokens": output_len
             if len(token_latencies) == 0
             else len(token_latencies) + 1,
             "timestamp": ts.strftime("%Y-%m-%d %H:%M:%S %Z%z"),
             "E2E": request_latency,
+            "status_code": status_code,
+            "success": status_code == 200 if status_code else False,
         }
         if len(token_latencies) > 0:
             request_trace["TTFT"] = time_to_first
@@ -310,6 +315,10 @@ async def benchmark(
 
 
 def main(args: argparse.Namespace):
+    # Set global temperature from args
+    global TEMPERATURE
+    TEMPERATURE = args.temperature
+
     result = {}
     if args.verbose:
         print(args)
@@ -319,6 +328,7 @@ def main(args: argparse.Namespace):
         result["request_rate"] = args.request_rate
         result["seed"] = args.seed
         result["model"] = args.model
+        result["temperature"] = args.temperature
         result["samples"] = args.num_prompts
 
     random.seed(args.seed)
@@ -498,5 +508,11 @@ def main(args: argparse.Namespace):
         help="Path to a JSON file containing prompts",
     )
     parser.add_argument("--use-workload-interval", action="store_true")
+    parser.add_argument(
+        "--temperature",
+        type=float,
+        default=0.0,
+        help="Temperature for text generation (default: 0.0)",
+    )
     args = parser.parse_args()
     main(args)
diff --git a/python/aibrix/poetry.lock b/python/aibrix/poetry.lock
index 34c4493a..09d839d1 100644
--- a/python/aibrix/poetry.lock
+++ b/python/aibrix/poetry.lock
@@ -1,4 +1,4 @@
-# This file is automatically @generated by Poetry 1.8.4 and should not be changed by hand.
+# This file is automatically @generated by Poetry 1.8.3 and should not be changed by hand.
 
 [[package]]
 name = "aiohappyeyeballs"
@@ -2409,6 +2409,109 @@ async-timeout = {version = ">=4.0.3", markers = "python_full_version < \"3.11.3\
 hiredis = ["hiredis (>=3.0.0)"]
 ocsp = ["cryptography (>=36.0.1)", "pyopenssl (==23.2.1)", "requests (>=2.31.0)"]
 
+[[package]]
+name = "regex"
+version = "2024.11.6"
+description = "Alternative regular expression module, to replace re."
+optional = false
+python-versions = ">=3.8"
+files = [
+    {file = "regex-2024.11.6-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:ff590880083d60acc0433f9c3f713c51f7ac6ebb9adf889c79a261ecf541aa91"},
+    {file = "regex-2024.11.6-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:658f90550f38270639e83ce492f27d2c8d2cd63805c65a13a14d36ca126753f0"},
+    {file = "regex-2024.11.6-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:164d8b7b3b4bcb2068b97428060b2a53be050085ef94eca7f240e7947f1b080e"},
+    {file = "regex-2024.11.6-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d3660c82f209655a06b587d55e723f0b813d3a7db2e32e5e7dc64ac2a9e86fde"},
+    {file = "regex-2024.11.6-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:d22326fcdef5e08c154280b71163ced384b428343ae16a5ab2b3354aed12436e"},
+    {file = "regex-2024.11.6-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:f1ac758ef6aebfc8943560194e9fd0fa18bcb34d89fd8bd2af18183afd8da3a2"},
+    {file = "regex-2024.11.6-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:997d6a487ff00807ba810e0f8332c18b4eb8d29463cfb7c820dc4b6e7562d0cf"},
+    {file = "regex-2024.11.6-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:02a02d2bb04fec86ad61f3ea7f49c015a0681bf76abb9857f945d26159d2968c"},
+    {file = "regex-2024.11.6-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:f02f93b92358ee3f78660e43b4b0091229260c5d5c408d17d60bf26b6c900e86"},
+    {file = "regex-2024.11.6-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:06eb1be98df10e81ebaded73fcd51989dcf534e3c753466e4b60c4697a003b67"},
+    {file = "regex-2024.11.6-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:040df6fe1a5504eb0f04f048e6d09cd7c7110fef851d7c567a6b6e09942feb7d"},
+    {file = "regex-2024.11.6-cp310-cp310-musllinux_1_2_ppc64le.whl", hash = "sha256:fdabbfc59f2c6edba2a6622c647b716e34e8e3867e0ab975412c5c2f79b82da2"},
+    {file = "regex-2024.11.6-cp310-cp310-musllinux_1_2_s390x.whl", hash = "sha256:8447d2d39b5abe381419319f942de20b7ecd60ce86f16a23b0698f22e1b70008"},
+    {file = "regex-2024.11.6-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:da8f5fc57d1933de22a9e23eec290a0d8a5927a5370d24bda9a6abe50683fe62"},
+    {file = "regex-2024.11.6-cp310-cp310-win32.whl", hash = "sha256:b489578720afb782f6ccf2840920f3a32e31ba28a4b162e13900c3e6bd3f930e"},
+    {file = "regex-2024.11.6-cp310-cp310-win_amd64.whl", hash = "sha256:5071b2093e793357c9d8b2929dfc13ac5f0a6c650559503bb81189d0a3814519"},
+    {file = "regex-2024.11.6-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:5478c6962ad548b54a591778e93cd7c456a7a29f8eca9c49e4f9a806dcc5d638"},
+    {file = "regex-2024.11.6-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:2c89a8cc122b25ce6945f0423dc1352cb9593c68abd19223eebbd4e56612c5b7"},
+    {file = "regex-2024.11.6-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:94d87b689cdd831934fa3ce16cc15cd65748e6d689f5d2b8f4f4df2065c9fa20"},
+    {file = "regex-2024.11.6-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1062b39a0a2b75a9c694f7a08e7183a80c63c0d62b301418ffd9c35f55aaa114"},
+    {file = "regex-2024.11.6-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:167ed4852351d8a750da48712c3930b031f6efdaa0f22fa1933716bfcd6bf4a3"},
+    {file = "regex-2024.11.6-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:2d548dafee61f06ebdb584080621f3e0c23fff312f0de1afc776e2a2ba99a74f"},
+    {file = "regex-2024.11.6-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f2a19f302cd1ce5dd01a9099aaa19cae6173306d1302a43b627f62e21cf18ac0"},
+    {file = "regex-2024.11.6-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:bec9931dfb61ddd8ef2ebc05646293812cb6b16b60cf7c9511a832b6f1854b55"},
+    {file = "regex-2024.11.6-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:9714398225f299aa85267fd222f7142fcb5c769e73d7733344efc46f2ef5cf89"},
+    {file = "regex-2024.11.6-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:202eb32e89f60fc147a41e55cb086db2a3f8cb82f9a9a88440dcfc5d37faae8d"},
+    {file = "regex-2024.11.6-cp311-cp311-musllinux_1_2_ppc64le.whl", hash = "sha256:4181b814e56078e9b00427ca358ec44333765f5ca1b45597ec7446d3a1ef6e34"},
+    {file = "regex-2024.11.6-cp311-cp311-musllinux_1_2_s390x.whl", hash = "sha256:068376da5a7e4da51968ce4c122a7cd31afaaec4fccc7856c92f63876e57b51d"},
+    {file = "regex-2024.11.6-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:ac10f2c4184420d881a3475fb2c6f4d95d53a8d50209a2500723d831036f7c45"},
+    {file = "regex-2024.11.6-cp311-cp311-win32.whl", hash = "sha256:c36f9b6f5f8649bb251a5f3f66564438977b7ef8386a52460ae77e6070d309d9"},
+    {file = "regex-2024.11.6-cp311-cp311-win_amd64.whl", hash = "sha256:02e28184be537f0e75c1f9b2f8847dc51e08e6e171c6bde130b2687e0c33cf60"},
+    {file = "regex-2024.11.6-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:52fb28f528778f184f870b7cf8f225f5eef0a8f6e3778529bdd40c7b3920796a"},
+    {file = "regex-2024.11.6-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:fdd6028445d2460f33136c55eeb1f601ab06d74cb3347132e1c24250187500d9"},
+    {file = "regex-2024.11.6-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:805e6b60c54bf766b251e94526ebad60b7de0c70f70a4e6210ee2891acb70bf2"},
+    {file = "regex-2024.11.6-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b85c2530be953a890eaffde05485238f07029600e8f098cdf1848d414a8b45e4"},
+    {file = "regex-2024.11.6-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:bb26437975da7dc36b7efad18aa9dd4ea569d2357ae6b783bf1118dabd9ea577"},
+    {file = "regex-2024.11.6-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:abfa5080c374a76a251ba60683242bc17eeb2c9818d0d30117b4486be10c59d3"},
+    {file = "regex-2024.11.6-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:70b7fa6606c2881c1db9479b0eaa11ed5dfa11c8d60a474ff0e095099f39d98e"},
+    {file = "regex-2024.11.6-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:0c32f75920cf99fe6b6c539c399a4a128452eaf1af27f39bce8909c9a3fd8cbe"},
+    {file = "regex-2024.11.6-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:982e6d21414e78e1f51cf595d7f321dcd14de1f2881c5dc6a6e23bbbbd68435e"},
+    {file = "regex-2024.11.6-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:a7c2155f790e2fb448faed6dd241386719802296ec588a8b9051c1f5c481bc29"},
+    {file = "regex-2024.11.6-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:149f5008d286636e48cd0b1dd65018548944e495b0265b45e1bffecce1ef7f39"},
+    {file = "regex-2024.11.6-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:e5364a4502efca094731680e80009632ad6624084aff9a23ce8c8c6820de3e51"},
+    {file = "regex-2024.11.6-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:0a86e7eeca091c09e021db8eb72d54751e527fa47b8d5787caf96d9831bd02ad"},
+    {file = "regex-2024.11.6-cp312-cp312-win32.whl", hash = "sha256:32f9a4c643baad4efa81d549c2aadefaeba12249b2adc5af541759237eee1c54"},
+    {file = "regex-2024.11.6-cp312-cp312-win_amd64.whl", hash = "sha256:a93c194e2df18f7d264092dc8539b8ffb86b45b899ab976aa15d48214138e81b"},
+    {file = "regex-2024.11.6-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:a6ba92c0bcdf96cbf43a12c717eae4bc98325ca3730f6b130ffa2e3c3c723d84"},
+    {file = "regex-2024.11.6-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:525eab0b789891ac3be914d36893bdf972d483fe66551f79d3e27146191a37d4"},
+    {file = "regex-2024.11.6-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:086a27a0b4ca227941700e0b31425e7a28ef1ae8e5e05a33826e17e47fbfdba0"},
+    {file = "regex-2024.11.6-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:bde01f35767c4a7899b7eb6e823b125a64de314a8ee9791367c9a34d56af18d0"},
+    {file = "regex-2024.11.6-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:b583904576650166b3d920d2bcce13971f6f9e9a396c673187f49811b2769dc7"},
+    {file = "regex-2024.11.6-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:1c4de13f06a0d54fa0d5ab1b7138bfa0d883220965a29616e3ea61b35d5f5fc7"},
+    {file = "regex-2024.11.6-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3cde6e9f2580eb1665965ce9bf17ff4952f34f5b126beb509fee8f4e994f143c"},
+    {file = "regex-2024.11.6-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:0d7f453dca13f40a02b79636a339c5b62b670141e63efd511d3f8f73fba162b3"},
+    {file = "regex-2024.11.6-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:59dfe1ed21aea057a65c6b586afd2a945de04fc7db3de0a6e3ed5397ad491b07"},
+    {file = "regex-2024.11.6-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:b97c1e0bd37c5cd7902e65f410779d39eeda155800b65fc4d04cc432efa9bc6e"},
+    {file = "regex-2024.11.6-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:f9d1e379028e0fc2ae3654bac3cbbef81bf3fd571272a42d56c24007979bafb6"},
+    {file = "regex-2024.11.6-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:13291b39131e2d002a7940fb176e120bec5145f3aeb7621be6534e46251912c4"},
+    {file = "regex-2024.11.6-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:4f51f88c126370dcec4908576c5a627220da6c09d0bff31cfa89f2523843316d"},
+    {file = "regex-2024.11.6-cp313-cp313-win32.whl", hash = "sha256:63b13cfd72e9601125027202cad74995ab26921d8cd935c25f09c630436348ff"},
+    {file = "regex-2024.11.6-cp313-cp313-win_amd64.whl", hash = "sha256:2b3361af3198667e99927da8b84c1b010752fa4b1115ee30beaa332cabc3ef1a"},
+    {file = "regex-2024.11.6-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:3a51ccc315653ba012774efca4f23d1d2a8a8f278a6072e29c7147eee7da446b"},
+    {file = "regex-2024.11.6-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:ad182d02e40de7459b73155deb8996bbd8e96852267879396fb274e8700190e3"},
+    {file = "regex-2024.11.6-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:ba9b72e5643641b7d41fa1f6d5abda2c9a263ae835b917348fc3c928182ad467"},
+    {file = "regex-2024.11.6-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:40291b1b89ca6ad8d3f2b82782cc33807f1406cf68c8d440861da6304d8ffbbd"},
+    {file = "regex-2024.11.6-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:cdf58d0e516ee426a48f7b2c03a332a4114420716d55769ff7108c37a09951bf"},
+    {file = "regex-2024.11.6-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:a36fdf2af13c2b14738f6e973aba563623cb77d753bbbd8d414d18bfaa3105dd"},
+    {file = "regex-2024.11.6-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d1cee317bfc014c2419a76bcc87f071405e3966da434e03e13beb45f8aced1a6"},
+    {file = "regex-2024.11.6-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:50153825ee016b91549962f970d6a4442fa106832e14c918acd1c8e479916c4f"},
+    {file = "regex-2024.11.6-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:ea1bfda2f7162605f6e8178223576856b3d791109f15ea99a9f95c16a7636fb5"},
+    {file = "regex-2024.11.6-cp38-cp38-musllinux_1_2_aarch64.whl", hash = "sha256:df951c5f4a1b1910f1a99ff42c473ff60f8225baa1cdd3539fe2819d9543e9df"},
+    {file = "regex-2024.11.6-cp38-cp38-musllinux_1_2_i686.whl", hash = "sha256:072623554418a9911446278f16ecb398fb3b540147a7828c06e2011fa531e773"},
+    {file = "regex-2024.11.6-cp38-cp38-musllinux_1_2_ppc64le.whl", hash = "sha256:f654882311409afb1d780b940234208a252322c24a93b442ca714d119e68086c"},
+    {file = "regex-2024.11.6-cp38-cp38-musllinux_1_2_s390x.whl", hash = "sha256:89d75e7293d2b3e674db7d4d9b1bee7f8f3d1609428e293771d1a962617150cc"},
+    {file = "regex-2024.11.6-cp38-cp38-musllinux_1_2_x86_64.whl", hash = "sha256:f65557897fc977a44ab205ea871b690adaef6b9da6afda4790a2484b04293a5f"},
+    {file = "regex-2024.11.6-cp38-cp38-win32.whl", hash = "sha256:6f44ec28b1f858c98d3036ad5d7d0bfc568bdd7a74f9c24e25f41ef1ebfd81a4"},
+    {file = "regex-2024.11.6-cp38-cp38-win_amd64.whl", hash = "sha256:bb8f74f2f10dbf13a0be8de623ba4f9491faf58c24064f32b65679b021ed0001"},
+    {file = "regex-2024.11.6-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:5704e174f8ccab2026bd2f1ab6c510345ae8eac818b613d7d73e785f1310f839"},
+    {file = "regex-2024.11.6-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:220902c3c5cc6af55d4fe19ead504de80eb91f786dc102fbd74894b1551f095e"},
+    {file = "regex-2024.11.6-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:5e7e351589da0850c125f1600a4c4ba3c722efefe16b297de54300f08d734fbf"},
+    {file = "regex-2024.11.6-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5056b185ca113c88e18223183aa1a50e66507769c9640a6ff75859619d73957b"},
+    {file = "regex-2024.11.6-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:2e34b51b650b23ed3354b5a07aab37034d9f923db2a40519139af34f485f77d0"},
+    {file = "regex-2024.11.6-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:5670bce7b200273eee1840ef307bfa07cda90b38ae56e9a6ebcc9f50da9c469b"},
+    {file = "regex-2024.11.6-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:08986dce1339bc932923e7d1232ce9881499a0e02925f7402fb7c982515419ef"},
+    {file = "regex-2024.11.6-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:93c0b12d3d3bc25af4ebbf38f9ee780a487e8bf6954c115b9f015822d3bb8e48"},
+    {file = "regex-2024.11.6-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:764e71f22ab3b305e7f4c21f1a97e1526a25ebdd22513e251cf376760213da13"},
+    {file = "regex-2024.11.6-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:f056bf21105c2515c32372bbc057f43eb02aae2fda61052e2f7622c801f0b4e2"},
+    {file = "regex-2024.11.6-cp39-cp39-musllinux_1_2_i686.whl", hash = "sha256:69ab78f848845569401469da20df3e081e6b5a11cb086de3eed1d48f5ed57c95"},
+    {file = "regex-2024.11.6-cp39-cp39-musllinux_1_2_ppc64le.whl", hash = "sha256:86fddba590aad9208e2fa8b43b4c098bb0ec74f15718bb6a704e3c63e2cef3e9"},
+    {file = "regex-2024.11.6-cp39-cp39-musllinux_1_2_s390x.whl", hash = "sha256:684d7a212682996d21ca12ef3c17353c021fe9de6049e19ac8481ec35574a70f"},
+    {file = "regex-2024.11.6-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:a03e02f48cd1abbd9f3b7e3586d97c8f7a9721c436f51a5245b3b9483044480b"},
+    {file = "regex-2024.11.6-cp39-cp39-win32.whl", hash = "sha256:41758407fc32d5c3c5de163888068cfee69cb4c2be844e7ac517a52770f9af57"},
+    {file = "regex-2024.11.6-cp39-cp39-win_amd64.whl", hash = "sha256:b2837718570f95dd41675328e111345f9b7095d821bac435aac173ac80b19983"},
+    {file = "regex-2024.11.6.tar.gz", hash = "sha256:7ab159b063c52a0333c884e4679f8d7a85112ee3078fe3d9004b2dd875585519"},
+]
+
 [[package]]
 name = "requests"
 version = "2.32.3"
@@ -2727,6 +2830,58 @@ files = [
     {file = "threadpoolctl-3.5.0.tar.gz", hash = "sha256:082433502dd922bf738de0d8bcc4fdcbf0979ff44c42bd40f5af8a282f6fa107"},
 ]
 
+[[package]]
+name = "tiktoken"
+version = "0.7.0"
+description = "tiktoken is a fast BPE tokeniser for use with OpenAI's models"
+optional = false
+python-versions = ">=3.8"
+files = [
+    {file = "tiktoken-0.7.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:485f3cc6aba7c6b6ce388ba634fbba656d9ee27f766216f45146beb4ac18b25f"},
+    {file = "tiktoken-0.7.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:e54be9a2cd2f6d6ffa3517b064983fb695c9a9d8aa7d574d1ef3c3f931a99225"},
+    {file = "tiktoken-0.7.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:79383a6e2c654c6040e5f8506f3750db9ddd71b550c724e673203b4f6b4b4590"},
+    {file = "tiktoken-0.7.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5d4511c52caacf3c4981d1ae2df85908bd31853f33d30b345c8b6830763f769c"},
+    {file = "tiktoken-0.7.0-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:13c94efacdd3de9aff824a788353aa5749c0faee1fbe3816df365ea450b82311"},
+    {file = "tiktoken-0.7.0-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:8e58c7eb29d2ab35a7a8929cbeea60216a4ccdf42efa8974d8e176d50c9a3df5"},
+    {file = "tiktoken-0.7.0-cp310-cp310-win_amd64.whl", hash = "sha256:21a20c3bd1dd3e55b91c1331bf25f4af522c525e771691adbc9a69336fa7f702"},
+    {file = "tiktoken-0.7.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:10c7674f81e6e350fcbed7c09a65bca9356eaab27fb2dac65a1e440f2bcfe30f"},
+    {file = "tiktoken-0.7.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:084cec29713bc9d4189a937f8a35dbdfa785bd1235a34c1124fe2323821ee93f"},
+    {file = "tiktoken-0.7.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:811229fde1652fedcca7c6dfe76724d0908775b353556d8a71ed74d866f73f7b"},
+    {file = "tiktoken-0.7.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:86b6e7dc2e7ad1b3757e8a24597415bafcfb454cebf9a33a01f2e6ba2e663992"},
+    {file = "tiktoken-0.7.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:1063c5748be36344c7e18c7913c53e2cca116764c2080177e57d62c7ad4576d1"},
+    {file = "tiktoken-0.7.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:20295d21419bfcca092644f7e2f2138ff947a6eb8cfc732c09cc7d76988d4a89"},
+    {file = "tiktoken-0.7.0-cp311-cp311-win_amd64.whl", hash = "sha256:959d993749b083acc57a317cbc643fb85c014d055b2119b739487288f4e5d1cb"},
+    {file = "tiktoken-0.7.0-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:71c55d066388c55a9c00f61d2c456a6086673ab7dec22dd739c23f77195b1908"},
+    {file = "tiktoken-0.7.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:09ed925bccaa8043e34c519fbb2f99110bd07c6fd67714793c21ac298e449410"},
+    {file = "tiktoken-0.7.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:03c6c40ff1db0f48a7b4d2dafeae73a5607aacb472fa11f125e7baf9dce73704"},
+    {file = "tiktoken-0.7.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d20b5c6af30e621b4aca094ee61777a44118f52d886dbe4f02b70dfe05c15350"},
+    {file = "tiktoken-0.7.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:d427614c3e074004efa2f2411e16c826f9df427d3c70a54725cae860f09e4bf4"},
+    {file = "tiktoken-0.7.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:8c46d7af7b8c6987fac9b9f61041b452afe92eb087d29c9ce54951280f899a97"},
+    {file = "tiktoken-0.7.0-cp312-cp312-win_amd64.whl", hash = "sha256:0bc603c30b9e371e7c4c7935aba02af5994a909fc3c0fe66e7004070858d3f8f"},
+    {file = "tiktoken-0.7.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:2398fecd38c921bcd68418675a6d155fad5f5e14c2e92fcf5fe566fa5485a858"},
+    {file = "tiktoken-0.7.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:8f5f6afb52fb8a7ea1c811e435e4188f2bef81b5e0f7a8635cc79b0eef0193d6"},
+    {file = "tiktoken-0.7.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:861f9ee616766d736be4147abac500732b505bf7013cfaf019b85892637f235e"},
+    {file = "tiktoken-0.7.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:54031f95c6939f6b78122c0aa03a93273a96365103793a22e1793ee86da31685"},
+    {file = "tiktoken-0.7.0-cp38-cp38-musllinux_1_2_aarch64.whl", hash = "sha256:fffdcb319b614cf14f04d02a52e26b1d1ae14a570f90e9b55461a72672f7b13d"},
+    {file = "tiktoken-0.7.0-cp38-cp38-musllinux_1_2_x86_64.whl", hash = "sha256:c72baaeaefa03ff9ba9688624143c858d1f6b755bb85d456d59e529e17234769"},
+    {file = "tiktoken-0.7.0-cp38-cp38-win_amd64.whl", hash = "sha256:131b8aeb043a8f112aad9f46011dced25d62629091e51d9dc1adbf4a1cc6aa98"},
+    {file = "tiktoken-0.7.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:cabc6dc77460df44ec5b879e68692c63551ae4fae7460dd4ff17181df75f1db7"},
+    {file = "tiktoken-0.7.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:8d57f29171255f74c0aeacd0651e29aa47dff6f070cb9f35ebc14c82278f3b25"},
+    {file = "tiktoken-0.7.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2ee92776fdbb3efa02a83f968c19d4997a55c8e9ce7be821ceee04a1d1ee149c"},
+    {file = "tiktoken-0.7.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e215292e99cb41fbc96988ef62ea63bb0ce1e15f2c147a61acc319f8b4cbe5bf"},
+    {file = "tiktoken-0.7.0-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:8a81bac94769cab437dd3ab0b8a4bc4e0f9cf6835bcaa88de71f39af1791727a"},
+    {file = "tiktoken-0.7.0-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:d6d73ea93e91d5ca771256dfc9d1d29f5a554b83821a1dc0891987636e0ae226"},
+    {file = "tiktoken-0.7.0-cp39-cp39-win_amd64.whl", hash = "sha256:2bcb28ddf79ffa424f171dfeef9a4daff61a94c631ca6813f43967cb263b83b9"},
+    {file = "tiktoken-0.7.0.tar.gz", hash = "sha256:1077266e949c24e0291f6c350433c6f0971365ece2b173a23bc3b9f9defef6b6"},
+]
+
+[package.dependencies]
+regex = ">=2022.1.18"
+requests = ">=2.26.0"
+
+[package.extras]
+blobfile = ["blobfile (>=2)"]
+
 [[package]]
 name = "tomli"
 version = "2.2.1"
@@ -3245,4 +3400,4 @@ type = ["pytest-mypy"]
 [metadata]
 lock-version = "2.0"
 python-versions = ">=3.10,<3.13"
-content-hash = "b7cb70ce0069c23b8e48120bd16be9ea86e3a068b243d4f7785bd8b5571a06d9"
+content-hash = "9c45901078835616ed5defd034259ff5941195249b092dc2914b5aa21dedd46d"
diff --git a/python/aibrix/pyproject.toml b/python/aibrix/pyproject.toml
index 526443e6..672f6728 100644
--- a/python/aibrix/pyproject.toml
+++ b/python/aibrix/pyproject.toml
@@ -58,7 +58,7 @@ aiohttp = "^3.11.7"
 dash = "^2.18.2"
 matplotlib = "^3.9.2"
 filelock = "^3.16.1"
-
+tiktoken = "^0.7.0"
 
 [tool.poetry.group.dev.dependencies]
 mypy = "1.11.1"