diff --git a/build/container/Dockerfile.runtime b/build/container/Dockerfile.runtime
index 80b18774..1f63e8b2 100644
--- a/build/container/Dockerfile.runtime
+++ b/build/container/Dockerfile.runtime
@@ -34,7 +34,7 @@ COPY --from=builder /app/dist/*.whl ./
 
 # Install build dependencies and clean up in one step (avoiding creating another new layer)
 RUN apt-get update \
-    && apt-get install -y --no-install-recommends gcc python3-dev \
+    && apt-get install -y --no-install-recommends gcc python3-dev mawk \
     && apt-get clean \
     && rm -rf /var/lib/apt/lists/* \
     && pip install --no-cache-dir ./*.whl \
diff --git a/python/aibrix/aibrix/gpu_optimizer/optimizer/profiling/benchmark.sh b/python/aibrix/aibrix/gpu_optimizer/optimizer/profiling/benchmark.sh
index 9f6f651f..07f8b1e6 100755
--- a/python/aibrix/aibrix/gpu_optimizer/optimizer/profiling/benchmark.sh
+++ b/python/aibrix/aibrix/gpu_optimizer/optimizer/profiling/benchmark.sh
@@ -16,7 +16,7 @@
 
 # Result files will be added to 'PATH_PREFIX' directory.
 PATH_PREFIX=`dirname "$0"`
-FILE_NAME="result"
+OUTPUT_FILE=
 MODEL="llama2-7b"
 TEMPERATURE=0.0  
 
@@ -40,8 +40,9 @@ generate_workload() {
     local num_prompts=$4
     local model=$5
     local workload_path=$6
+    local output_dir=$7
 
-    echo "Generating workload for input=$input_len, output=$output_len, API_KEY=$api_key, num_prompts=$num_prompts, model=$model, temperature=$TEMPERATURE, workload_path=$workload_path"
+    echo "Generating workload for input=$input_len, output=$output_len, API_KEY=$api_key, num_prompts=$num_prompts, model=$model, temperature=$TEMPERATURE, workload_path=$workload_path, output_dir=$output_dir"
 
     python $PATH_PREFIX/gen_benchmark_prompt.py \
         $workload_path  \
@@ -54,7 +55,8 @@ generate_workload() {
         --api-key "$api_key" \
         --total-prompts "$num_prompts" \
         --model "$model" \
-        --temperature "$TEMPERATURE"
+        --temperature "$TEMPERATURE" \
+        --output-dir "$output_dir"
 }
 
 while [[ $# -gt 0 ]]; do
@@ -64,7 +66,7 @@ while [[ $# -gt 0 ]]; do
       shift 2
       ;;
     -o|--output)
-      FILE_NAME="$2"
+      OUTPUT_FILE="$2"
       shift 2
       ;;
     --input-start)
@@ -115,10 +117,15 @@ while [[ $# -gt 0 ]]; do
 done
 
 
-# Make sure the directory exists and clear output file
-OUTPUT_FILE="${PATH_PREFIX}/result/${FILE_NAME}.jsonl"
-PROMPT_DIR="${PATH_PREFIX}/result/prompts"
-mkdir -p `dirname "$OUTPUT_FILE"`
+if [[ -z "$OUTPUT_FILE" ]]; then
+  echo "Use default output path"
+  OUTPUT_FILE="${PATH_PREFIX}/result/${MODEL}.jsonl"
+fi
+
+dir_path=$(dirname "$OUTPUT_FILE")
+PROMPT_DIR="${dir_path}/prompts"
+
+mkdir -p "$(dirname "$OUTPUT_FILE")"
 mkdir -p "$PROMPT_DIR"
 
 # Clear the workload directory
@@ -146,17 +153,16 @@ while [[ $input_len -le $input_limit ]]; do
 
     if [[ -n "$workload" ]]; then
       # Make sure all arguments are passed in the correct order
-      generate_workload "$input_len" "$output_len" "$LLM_API_KEY" "$TOTAL" "$MODEL" "$workload"
+      generate_workload "$input_len" "$output_len" "$LLM_API_KEY" "$TOTAL" "$MODEL" "$workload" "$dir_path"
     else
       echo "Skip workload pattern generation, benchmark with fixed prompts"
     fi
   
-    # Convert rate_start to integer (multiply by 1000 and remove decimals)
-    req_rate=$(echo "$rate_start * 1000" | bc | cut -d. -f1)
-    rate_limit_scaled=$(echo "$rate_limit * 1000" | bc | cut -d. -f1)
+    # Convert rate_start to integer (multiply by 1000 and remove decimals since -le does not work with floats)
+    req_rate=$(printf "%.0f\n" "$(echo "$rate_start * 1000" | awk '{print $1 * 1000}')")    
+    rate_limit_scaled=$(printf "%.0f\n" "$(echo "$rate_limit * 1000" | awk '{print $1 * 1000}')")
     while [[ $req_rate -le $rate_limit_scaled ]]; do
-      actual_rate=$(echo "scale=3; $req_rate / 1000" | bc)
-
+      actual_rate=$(echo "$req_rate" | awk '{ printf "%.3f", $1 / 1000 }')
       if [[ -n "$workload" ]]; then
         WORKLOAD_FILE="$PROMPT_DIR/prompt_in${input_len}_out${output_len}.json"
         if [[ -f "$WORKLOAD_FILE" ]]; then
diff --git a/python/aibrix/aibrix/gpu_optimizer/optimizer/profiling/gen_benchmark_prompt.py b/python/aibrix/aibrix/gpu_optimizer/optimizer/profiling/gen_benchmark_prompt.py
index 50824d96..4a4c8309 100644
--- a/python/aibrix/aibrix/gpu_optimizer/optimizer/profiling/gen_benchmark_prompt.py
+++ b/python/aibrix/aibrix/gpu_optimizer/optimizer/profiling/gen_benchmark_prompt.py
@@ -47,6 +47,7 @@ def __init__(
         temperature: float = 0.0,
         api_key: str = "any_key",
         total_prompts: int = 1,
+        output_dir: str = ".",
     ):
         self.trace_file = trace_file
         self.model_endpoint = model_endpoint
@@ -56,6 +57,7 @@ def __init__(
         self.temperature = temperature
         self.api_key = api_key
         self.total_prompts = total_prompts
+        self.output_dir = output_dir
 
     def count_tokens(self, text: str) -> int:
         """Estimate token count using VLLM's tokenizer."""
@@ -163,9 +165,7 @@ def save_results(
             return
 
         # Get the directory where the script is located
-        script_dir = os.path.dirname(os.path.abspath(__file__))
-        # Create the prompts directory relative to the script location
-        prompts_dir = os.path.join(script_dir, "result", "prompts")
+        prompts_dir = os.path.join(self.output_dir, "prompts")
         os.makedirs(prompts_dir, exist_ok=True)
 
         filename = os.path.join(
@@ -272,6 +272,12 @@ def parse_args():
         default="deepseek-coder-7b",
         help="Model name to use for completion",
     )
+    parser.add_argument(
+        "--output-dir",
+        type=str,
+        help="Directory to save output files",
+        default=".",
+    )
     return parser.parse_args()
 
 
@@ -300,6 +306,7 @@ def main():
         temperature=args.temperature,
         api_key=args.api_key,
         total_prompts=args.total_prompts,
+        output_dir=args.output_dir,
     )
 
     matching_prompts = selector.find_matching_prompts(
@@ -319,12 +326,12 @@ def main():
         print(f"Output tokens: {output_tokens}")
         print(f"Complete usage data: {response_data.get('usage', {})}")
         print("-" * 40)
-        print("Prompt:")
-        print(prompt)
-        print("-" * 40)
-        print("Model completion:")
-        if "choices" in response_data:
-            print(response_data["choices"][0].get("message", {}).get("content", ""))
+        # print("Prompt:")
+        # print(prompt)
+        # print("-" * 40)
+        # print("Model completion:")
+        # if "choices" in response_data:
+        #     print(response_data["choices"][0].get("message", {}).get("content", ""))
 
     end_time = time.time()
     print(f"\nTotal execution time: {end_time - start_time:.2f} seconds")
diff --git a/python/aibrix/aibrix/gpu_optimizer/optimizer/profiling/gpu_benchmark.py b/python/aibrix/aibrix/gpu_optimizer/optimizer/profiling/gpu_benchmark.py
index cabe3edb..e26a4bc2 100644
--- a/python/aibrix/aibrix/gpu_optimizer/optimizer/profiling/gpu_benchmark.py
+++ b/python/aibrix/aibrix/gpu_optimizer/optimizer/profiling/gpu_benchmark.py
@@ -254,8 +254,8 @@ async def send_request(
             else len(token_latencies) + 1,
             "timestamp": ts.strftime("%Y-%m-%d %H:%M:%S %Z%z"),
             "E2E": request_latency,
-            "status_code": status_code,  # Add status code to trace
-            "success": status_code == 200 if status_code else False,  # Add success flag
+            "status_code": status_code,
+            "success": status_code == 200 if status_code else False,
         }
         if len(token_latencies) > 0:
             request_trace["TTFT"] = time_to_first