llm-d · pancak3 · Oct 4, 2025 · Oct 4, 2025 · Oct 4, 2025 · Oct 4, 2025
diff --git a/.github/workflows/ci-nighly-benchmark-ocp.yaml b/.github/workflows/ci-nighly-benchmark-ocp.yaml
@@ -62,73 +62,75 @@ jobs:
       - name: Run install_deps.sh
         run: |
           sudo apt-get update
-          ./setup/install_deps.sh
+          curl -sSL https://pdm-project.org/install-pdm.py | python3 -
+          cd llm_d_benchmark && ./setup/install_deps.sh
         shell: bash
 
-      - name: Install config explorer dependencies
-        run: pip install -r config_explorer/requirements.txt
+      - name: Install python dependencies
+        run: |
+          pdm install
         shell: bash
 
       - name: Cleanup target cloud (modelservice)
         env:
           LLMDBENCH_HF_TOKEN: ${{ secrets.LLMDBENCH_HF_TOKEN }}
-        run: ./setup/teardown.sh -c ocp_L40_fb -t modelservice -d
+        run: cd llm_d_benchmark && ./setup/teardown.sh -c ocp_L40_fb -t modelservice -d
 
       - name: Cleanup target cloud (standalone)
         env:
           LLMDBENCH_HF_TOKEN: ${{ secrets.LLMDBENCH_HF_TOKEN }}
-        run: ./setup/teardown.sh -c ocp_L40_fb -t standalone -d
+        run: cd llm_d_benchmark && ./setup/teardown.sh -c ocp_L40_fb -t standalone -d
 
       - name: Standup target cloud (standalone)
         env:
           LLMDBENCH_HF_TOKEN: ${{ secrets.LLMDBENCH_HF_TOKEN }}
-        run: ./setup/standup.sh -c ocp_L40_fb -t standalone
+        run: cd llm_d_benchmark && ./setup/standup.sh -c ocp_L40_fb -t standalone
 
       - name: Run benchmark (standalone, inference-perf)
         env:
           LLMDBENCH_HF_TOKEN: ${{ secrets.LLMDBENCH_HF_TOKEN }}
-        run: ./setup/run.sh -c ocp_L40_fb -t standalone
+        run: cd llm_d_benchmark && ./setup/run.sh -c ocp_L40_fb -t standalone
 
       - name: Run benchmark (standalone, fmperf)
         env:
           LLMDBENCH_HF_TOKEN: ${{ secrets.LLMDBENCH_HF_TOKEN }}
-        run: ./setup/run.sh -c ocp_L40_fb -t standalone -l fmperf -w sanity_short-input
+        run: cd llm_d_benchmark && ./setup/run.sh -c ocp_L40_fb -t standalone -l fmperf -w sanity_short-input
 
       - name: Run benchmark (standalone, guidellm)
         env:
           LLMDBENCH_HF_TOKEN: ${{ secrets.LLMDBENCH_HF_TOKEN }}
-        run: ./setup/run.sh -c ocp_L40_fb -t standalone -l guidellm -w sanity_concurrent
+        run: cd llm_d_benchmark && ./setup/run.sh -c ocp_L40_fb -t standalone -l guidellm -w sanity_concurrent
 
       - name: Run benchmark (standalone, vllm-benchmark)
         env:
           LLMDBENCH_HF_TOKEN: ${{ secrets.LLMDBENCH_HF_TOKEN }}
-        run: ./setup/run.sh -c ocp_L40_fb -t standalone -l vllm-benchmark
+        run: cd llm_d_benchmark && ./setup/run.sh -c ocp_L40_fb -t standalone -l vllm-benchmark
 
       - name: Cleanup target cloud (standalone)
         env:
           LLMDBENCH_HF_TOKEN: ${{ secrets.LLMDBENCH_HF_TOKEN }}
-        run: ./setup/teardown.sh -c ocp_L40_fb -t standalone -d
+        run: cd llm_d_benchmark && ./setup/teardown.sh -c ocp_L40_fb -t standalone -d
 
       - name: E2E target cloud (modelservice, inference-perf)
         env:
           LLMDBENCH_HF_TOKEN: ${{ secrets.LLMDBENCH_HF_TOKEN }}
-        run: ./setup/e2e.sh -c ocp_L40_fb -t modelservice --deep
+        run: cd llm_d_benchmark && ./setup/e2e.sh -c ocp_L40_fb -t modelservice --deep
 
       - name: E2E target cloud (modelservice, fmperf)
         env:
           LLMDBENCH_HF_TOKEN: ${{ secrets.LLMDBENCH_HF_TOKEN }}
-        run: ./setup/e2e.sh -c ocp_L40_fb -t modelservice --deep -l fmperf -w sanity_short-input.yaml
+        run: cd llm_d_benchmark && ./setup/e2e.sh -c ocp_L40_fb -t modelservice --deep -l fmperf -w sanity_short-input.yaml
 
       - name: E2E target cloud (modelservice, guidellm)
         env:
           LLMDBENCH_HF_TOKEN: ${{ secrets.LLMDBENCH_HF_TOKEN }}
-        run: ./setup/e2e.sh -c ocp_L40_fb -t modelservice --deep -l guidellm -w sanity_concurrent.yaml
+        run: cd llm_d_benchmark && ./setup/e2e.sh -c ocp_L40_fb -t modelservice --deep -l guidellm -w sanity_concurrent.yaml
 
 
       - name: E2E target cloud (modelservice, vllm-benchmark)
         env:
           LLMDBENCH_HF_TOKEN: ${{ secrets.LLMDBENCH_HF_TOKEN }}
-        run: ./setup/e2e.sh -c ocp_L40_fb -t modelservice --deep -l vllm-benchmark
+        run: cd llm_d_benchmark && ./setup/e2e.sh -c ocp_L40_fb -t modelservice --deep -l vllm-benchmark
 
 
       - name: Install AWS CLI

diff --git a/.github/workflows/ci-pr-benchmark.yaml b/.github/workflows/ci-pr-benchmark.yaml
@@ -1,8 +1,8 @@
 name: CI - PR Benchmark Run
 
 on:
-  pull_request:
-
+  pull_request: {}
+  workflow_dispatch: {}
 jobs:
 
   run-benchmark-sh:
@@ -37,23 +37,28 @@ jobs:
       - name: Run install_deps
         run: |
           sudo apt-get update
-          ./setup/install_deps.sh
+          curl -sSL https://pdm-project.org/install-pdm.py | python3 -
+          cd llm_d_benchmark && ./setup/install_deps.sh
         shell: bash
 
-      - name: Install config explorer dependencies
-        run: pip install -r config_explorer/requirements.txt
+      - name: Install python dependencies
+        run: |
+          pdm install
         shell: bash
 
       - name: Standup a modelservice using llm-d-inference-sim
         run: |
-          ./setup/standup.sh -c kind_sim_fb -t modelservice -s 0,1,2,4,7,8,9
+          cd llm_d_benchmark
+          pdm run bash -x ./setup/standup.sh -c kind_sim_fb -t modelservice -s 0,1,2,4,7,8,9
 
       - name: Run harness (mock)
         env:
           LLMD_CONTROL_DRY_RUN: 1 # TODO: harness doesn't work now for kind bc no harness endpoint
         run: |
-          ./setup/run.sh -c kind_sim_fb --dry-run
+          cd llm_d_benchmark
+          pdm run bash -x ./setup/run.sh -c kind_sim_fb --dry-run
 
       - name: Teardown
         run: |
-          ./setup/teardown.sh -c kind_sim_fb
+          cd llm_d_benchmark
+          pdm run bash -x ./setup/teardown.sh -c kind_sim_fb
diff --git a/.github/workflows/ci-pr-checks.yaml b/.github/workflows/ci-pr-checks.yaml
@@ -4,6 +4,8 @@ on:
   pull_request:
     branches:
       - main
+  workflow_dispatch: {}
+
 
 jobs:
   lint-and-test:

diff --git a/.github/workflows/ci-release.yaml b/.github/workflows/ci-release.yaml
@@ -6,6 +6,7 @@ on:
       - 'v*'  # Runs when a tag like v0.1.0 is pushed
   release:
     types: [published]  # Also runs when a GitHub release is published
+  workflow_dispatch: {}
 
 jobs:
   docker-build-and-push:

diff --git a/.github/workflows/config-explorer-test.yaml → .github/workflows/python-test.yaml b/.github/workflows/config-explorer-test.yaml → .github/workflows/python-test.yaml
@@ -1,13 +1,13 @@
-name: Config Explorer Test
+name: Python Test
 
-on: [push, pull_request]
+on: [push, pull_request, workflow_dispatch]
 
 jobs:
   config-explorer-pytest:
     runs-on: ubuntu-latest
     strategy:
       matrix:
-        python-version: ["3.11", "3.12", "3.13"]
+        python-version: ["3.12", "3.13"]
 
     steps:
       - uses: actions/checkout@v5
@@ -23,11 +23,17 @@ jobs:
 
       - name: Install dependencies
         run: |
-          python -m pip install --upgrade pip
-          pip install -r config_explorer/requirements.txt
+          python -m pip install pdm
+          pdm install
 
-      - name: Test with pytest
+      - name: Format check
         run: |
-          pip install pytest pytest-cov
-          cd config_explorer
-          pytest -s tests/ --doctest-modules --junitxml=junit/test-results.xml --cov=config_explorer --cov-report=xml --cov-report=html
+          pdm run format-check
+
+      - name: Lint check
+        run: |
+          pdm run lint-check
+
+      - name: Test coverage
+        run: |
+          pdm run test-cov
diff --git a/.gitignore b/.gitignore
@@ -43,6 +43,16 @@ data/**/logs/
 
 # Python
 __pycache__/
+*.py[cod]
+*$py.class
+*.egg-info/
+dist/
+build/
+
+# PDM
+__pypackages__/
+.pdm-python
+.pdm.toml
 
 # Jupyter Notebook
 .ipynb_checkpoints
@@ -57,7 +67,14 @@ env.bak/
 venv.bak/
 environment/
 
-scenarios/none.sh
+llm_d_benchmark/scenarios/none.sh
 
 # Python specifics
-**/*.egg-info
+**/*.egg-info
+
+# coverage
+**/.coverage
+
+# cache
+.pytest_cache/
+.ruff_cache/
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -1,12 +1,31 @@
 repos:
   - repo: local
     hooks:
-      - id: basic_unit_test
-        name: Basic Unit Test
-        entry: bash -c './setup/standup.sh -c kind_sim_fb -n'
+      - id: format-lint
+        name: Format and Lint
+        entry: pdm run format-lint
+        language: system
+        pass_filenames: false
+        types: [python]
+
+  - repo: local
+    hooks:
+      - id: py_unit_test
+        name: Python Unit Test
+        entry: pdm run pytest 
+        require_serial: true
+        pass_filenames: false
+        language: system
+
+  - repo: local
+    hooks:
+      - id: basic_shell_unit_test
+        name: Basic Shell Unit Test
+        entry: bash -c 'cd llm_d_benchmark && pdm run bash -x ./setup/standup.sh -c kind_sim_fb -n'
         require_serial: true
         pass_filenames: false
         language: system
+
   - repo: https://github.com/ibm/detect-secrets
     # If you desire to use a specific version of detect-secrets, you can replace `master` with other git revisions such as branch, tag or commit sha.
     # You are encouraged to use static refs such as tags, instead of branch name
@@ -21,3 +40,4 @@ repos:
         # when "--baseline" with "--use-all-plugins", pre-commit scan with all available plugins
         # add "--fail-on-unaudited" to fail pre-commit for unaudited potential secrets
         args: [--baseline, .secrets.baseline, --use-all-plugins]
+        additional_dependencies: ['boxsdk==3.9.2']
diff --git a/Makefile b/Makefile
@@ -12,35 +12,16 @@ CONTAINER_TOOL := $(shell if command -v docker >/dev/null 2>&1; then echo docker
 BUILDER := $(shell command -v buildah >/dev/null 2>&1 && echo buildah || echo $(CONTAINER_TOOL))
 PLATFORMS ?= linux/amd64,linux/arm64 # linux/s390x,linux/ppc64le
 
-# go source files
-SRC = $(shell find . -type f -name '*.go')
-
 .PHONY: help
 help: ## Print help
 	@awk 'BEGIN {FS = ":.*##"; printf "\nUsage:\n  make \033[36m<target>\033[0m\n"} /^[a-zA-Z_0-9-]+:.*?##/ { printf "  \033[36m%-15s\033[0m %s\n", $$1, $$2 } /^##@/ { printf "\n\033[1m%s\033[0m\n", substr($$0, 5) } ' $(MAKEFILE_LIST)
 
-##@ Development
-
-.PHONY: format
-format: ## Format Go source files
-	@printf "\033[33;1m==== Running gofmt ====\033[0m\n"
-	@gofmt -l -w $(SRC)
-
-.PHONY: test
-test: check-ginkgo ## Run tests
-	@printf "\033[33;1m==== Running tests ====\033[0m\n"
-	ginkgo -r -v
 
 .PHONY: post-deploy-test
 post-deploy-test: ## Run post deployment tests
 	echo Success!
 	@echo "Post-deployment tests passed."
 
-.PHONY: lint
-lint: check-golangci-lint ## Run lint
-	@printf "\033[33;1m==== Running linting ====\033[0m\n"
-	golangci-lint run
-
 ##@ Container Build/Push
 
 .PHONY: buildah-build
@@ -243,9 +224,6 @@ env: load-version-json ## Print environment variables
 
 .PHONY: check-tools
 check-tools: \
-  check-go \
-  check-ginkgo \
-  check-golangci-lint \
   check-jq \
   check-kustomize \
   check-envsubst \
@@ -255,21 +233,6 @@ check-tools: \
   check-podman
 	@echo "✅ All required tools are installed."
 
-.PHONY: check-go
-check-go:
-	@command -v go >/dev/null 2>&1 || { \
-	  echo "❌ Go is not installed. Install it from https://golang.org/dl/"; exit 1; }
-
-.PHONY: check-ginkgo
-check-ginkgo:
-	@command -v ginkgo >/dev/null 2>&1 || { \
-	  echo "❌ ginkgo is not installed. Install with: go install github.com/onsi/ginkgo/v2/ginkgo@latest"; exit 1; }
-
-.PHONY: check-golangci-lint
-check-golangci-lint:
-	@command -v golangci-lint >/dev/null 2>&1 || { \
-	  echo "❌ golangci-lint is not installed. Install from https://golangci-lint.run/usage/install/"; exit 1; }
-
 .PHONY: check-jq
 check-jq:
 	@command -v jq >/dev/null 2>&1 || { \

diff --git a/README.md b/README.md
@@ -7,20 +7,26 @@ This repository provides an automated workflow for benchmarking LLM inference us
 Provide a single source of automation for repeatable and reproducible experiments and performance evaluation on `llm-d`.
 
 ### 📦 Repository Setup
+Install pdm following the official instructions: https://pdm-project.org/en/latest/#installation
 
+Then, clone this repository and install the dependencies:
 ```
-git clone https://github.com/llm-d/llm-d-benchmark.git
-cd llm-d-benchmark
-./setup/install_deps.sh
-pip install -r config_explorer/requirements.txt
+git clone https://github.com/llm-d/llm-d-benchmark.git && cd llm-d-benchmark/
+pdm install && pdm run $SHELL
+pre-commit install
+```
+
+Finally, install additional dependencies:
+```
+cd llm_d_benchmark/ && ./setup/install_deps.sh
 ```
 
 ## Quickstart
 
 **Out of the box:** **`standup`** a `llm-d` stack (default method is `llm-d-modelservice`, serving `meta-llama/Llama-3.2-1B-Instruct` model), **`run`** a harness (default `inference-perf`) with a load profile (default `sanity_random`) and then **`teardown`** the deployed stack.
 
 ```
-./e2e.sh
+cd llm_d_benchmark/ && ./e2e.sh
 ```
 
 > [!TIP]
@@ -29,6 +35,7 @@ pip install -r config_explorer/requirements.txt
 The same above example could be explicitly split in three separate parts.
 
 ```
+cd llm_d_benchmark/
 ./setup/standup.sh
 ./run.sh
 ./setup/teardown.sh
@@ -37,6 +44,7 @@ The same above example could be explicitly split in three separate parts.
 A user can elect to  **`standup`** an `llm-d` stack once, and then **`run`** the `inference-perf` harness with a different load profile (i.e., `chatbot_synthetic`)
 
 ```
+cd llm_d_benchmark/
 ./run.sh --harness inference-perf --workload chatbot_synthetic --methods <a string that matches a inference service or pod>`
 ```