Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
32 changes: 17 additions & 15 deletions .github/workflows/ci-nighly-benchmark-ocp.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -62,73 +62,75 @@ jobs:
- name: Run install_deps.sh
run: |
sudo apt-get update
./setup/install_deps.sh
curl -sSL https://pdm-project.org/install-pdm.py | python3 -
cd llm_d_benchmark && ./setup/install_deps.sh
shell: bash

- name: Install config explorer dependencies
run: pip install -r config_explorer/requirements.txt
- name: Install python dependencies
run: |
pdm install
shell: bash

- name: Cleanup target cloud (modelservice)
env:
LLMDBENCH_HF_TOKEN: ${{ secrets.LLMDBENCH_HF_TOKEN }}
run: ./setup/teardown.sh -c ocp_L40_fb -t modelservice -d
run: cd llm_d_benchmark && ./setup/teardown.sh -c ocp_L40_fb -t modelservice -d

- name: Cleanup target cloud (standalone)
env:
LLMDBENCH_HF_TOKEN: ${{ secrets.LLMDBENCH_HF_TOKEN }}
run: ./setup/teardown.sh -c ocp_L40_fb -t standalone -d
run: cd llm_d_benchmark && ./setup/teardown.sh -c ocp_L40_fb -t standalone -d

- name: Standup target cloud (standalone)
env:
LLMDBENCH_HF_TOKEN: ${{ secrets.LLMDBENCH_HF_TOKEN }}
run: ./setup/standup.sh -c ocp_L40_fb -t standalone
run: cd llm_d_benchmark && ./setup/standup.sh -c ocp_L40_fb -t standalone

- name: Run benchmark (standalone, inference-perf)
env:
LLMDBENCH_HF_TOKEN: ${{ secrets.LLMDBENCH_HF_TOKEN }}
run: ./setup/run.sh -c ocp_L40_fb -t standalone
run: cd llm_d_benchmark && ./setup/run.sh -c ocp_L40_fb -t standalone

- name: Run benchmark (standalone, fmperf)
env:
LLMDBENCH_HF_TOKEN: ${{ secrets.LLMDBENCH_HF_TOKEN }}
run: ./setup/run.sh -c ocp_L40_fb -t standalone -l fmperf -w sanity_short-input
run: cd llm_d_benchmark && ./setup/run.sh -c ocp_L40_fb -t standalone -l fmperf -w sanity_short-input

- name: Run benchmark (standalone, guidellm)
env:
LLMDBENCH_HF_TOKEN: ${{ secrets.LLMDBENCH_HF_TOKEN }}
run: ./setup/run.sh -c ocp_L40_fb -t standalone -l guidellm -w sanity_concurrent
run: cd llm_d_benchmark && ./setup/run.sh -c ocp_L40_fb -t standalone -l guidellm -w sanity_concurrent

- name: Run benchmark (standalone, vllm-benchmark)
env:
LLMDBENCH_HF_TOKEN: ${{ secrets.LLMDBENCH_HF_TOKEN }}
run: ./setup/run.sh -c ocp_L40_fb -t standalone -l vllm-benchmark
run: cd llm_d_benchmark && ./setup/run.sh -c ocp_L40_fb -t standalone -l vllm-benchmark

- name: Cleanup target cloud (standalone)
env:
LLMDBENCH_HF_TOKEN: ${{ secrets.LLMDBENCH_HF_TOKEN }}
run: ./setup/teardown.sh -c ocp_L40_fb -t standalone -d
run: cd llm_d_benchmark && ./setup/teardown.sh -c ocp_L40_fb -t standalone -d

- name: E2E target cloud (modelservice, inference-perf)
env:
LLMDBENCH_HF_TOKEN: ${{ secrets.LLMDBENCH_HF_TOKEN }}
run: ./setup/e2e.sh -c ocp_L40_fb -t modelservice --deep
run: cd llm_d_benchmark && ./setup/e2e.sh -c ocp_L40_fb -t modelservice --deep

- name: E2E target cloud (modelservice, fmperf)
env:
LLMDBENCH_HF_TOKEN: ${{ secrets.LLMDBENCH_HF_TOKEN }}
run: ./setup/e2e.sh -c ocp_L40_fb -t modelservice --deep -l fmperf -w sanity_short-input.yaml
run: cd llm_d_benchmark && ./setup/e2e.sh -c ocp_L40_fb -t modelservice --deep -l fmperf -w sanity_short-input.yaml

- name: E2E target cloud (modelservice, guidellm)
env:
LLMDBENCH_HF_TOKEN: ${{ secrets.LLMDBENCH_HF_TOKEN }}
run: ./setup/e2e.sh -c ocp_L40_fb -t modelservice --deep -l guidellm -w sanity_concurrent.yaml
run: cd llm_d_benchmark && ./setup/e2e.sh -c ocp_L40_fb -t modelservice --deep -l guidellm -w sanity_concurrent.yaml


- name: E2E target cloud (modelservice, vllm-benchmark)
env:
LLMDBENCH_HF_TOKEN: ${{ secrets.LLMDBENCH_HF_TOKEN }}
run: ./setup/e2e.sh -c ocp_L40_fb -t modelservice --deep -l vllm-benchmark
run: cd llm_d_benchmark && ./setup/e2e.sh -c ocp_L40_fb -t modelservice --deep -l vllm-benchmark


- name: Install AWS CLI
Expand Down
21 changes: 13 additions & 8 deletions .github/workflows/ci-pr-benchmark.yaml
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
name: CI - PR Benchmark Run

on:
pull_request:

pull_request: {}
workflow_dispatch: {}
jobs:

run-benchmark-sh:
Expand Down Expand Up @@ -37,23 +37,28 @@ jobs:
- name: Run install_deps
run: |
sudo apt-get update
./setup/install_deps.sh
curl -sSL https://pdm-project.org/install-pdm.py | python3 -
cd llm_d_benchmark && ./setup/install_deps.sh
shell: bash

- name: Install config explorer dependencies
run: pip install -r config_explorer/requirements.txt
- name: Install python dependencies
run: |
pdm install
shell: bash

- name: Standup a modelservice using llm-d-inference-sim
run: |
./setup/standup.sh -c kind_sim_fb -t modelservice -s 0,1,2,4,7,8,9
cd llm_d_benchmark
pdm run bash -x ./setup/standup.sh -c kind_sim_fb -t modelservice -s 0,1,2,4,7,8,9

- name: Run harness (mock)
env:
LLMD_CONTROL_DRY_RUN: 1 # TODO: harness doesn't work now for kind bc no harness endpoint
run: |
./setup/run.sh -c kind_sim_fb --dry-run
cd llm_d_benchmark
pdm run bash -x ./setup/run.sh -c kind_sim_fb --dry-run

- name: Teardown
run: |
./setup/teardown.sh -c kind_sim_fb
cd llm_d_benchmark
pdm run bash -x ./setup/teardown.sh -c kind_sim_fb
2 changes: 2 additions & 0 deletions .github/workflows/ci-pr-checks.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@ on:
pull_request:
branches:
- main
workflow_dispatch: {}


jobs:
lint-and-test:
Expand Down
1 change: 1 addition & 0 deletions .github/workflows/ci-release.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ on:
- 'v*' # Runs when a tag like v0.1.0 is pushed
release:
types: [published] # Also runs when a GitHub release is published
workflow_dispatch: {}

jobs:
docker-build-and-push:
Expand Down
Original file line number Diff line number Diff line change
@@ -1,13 +1,13 @@
name: Config Explorer Test
name: Python Test

on: [push, pull_request]
on: [push, pull_request, workflow_dispatch]

jobs:
config-explorer-pytest:
runs-on: ubuntu-latest
strategy:
matrix:
python-version: ["3.11", "3.12", "3.13"]
python-version: ["3.12", "3.13"]

steps:
- uses: actions/checkout@v5
Expand All @@ -23,11 +23,17 @@ jobs:

- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install -r config_explorer/requirements.txt
python -m pip install pdm
pdm install

- name: Test with pytest
- name: Format check
run: |
pip install pytest pytest-cov
cd config_explorer
pytest -s tests/ --doctest-modules --junitxml=junit/test-results.xml --cov=config_explorer --cov-report=xml --cov-report=html
pdm run format-check

- name: Lint check
run: |
pdm run lint-check

- name: Test coverage
run: |
pdm run test-cov
21 changes: 19 additions & 2 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,16 @@ data/**/logs/

# Python
__pycache__/
*.py[cod]
*$py.class
*.egg-info/
dist/
build/

# PDM
__pypackages__/
.pdm-python
.pdm.toml

# Jupyter Notebook
.ipynb_checkpoints
Expand All @@ -57,7 +67,14 @@ env.bak/
venv.bak/
environment/

scenarios/none.sh
llm_d_benchmark/scenarios/none.sh

# Python specifics
**/*.egg-info
**/*.egg-info

# coverage
**/.coverage

# cache
.pytest_cache/
.ruff_cache/
26 changes: 23 additions & 3 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
@@ -1,12 +1,31 @@
repos:
- repo: local
hooks:
- id: basic_unit_test
name: Basic Unit Test
entry: bash -c './setup/standup.sh -c kind_sim_fb -n'
- id: format-lint
name: Format and Lint
entry: pdm run format-lint
language: system
pass_filenames: false
types: [python]

- repo: local
hooks:
- id: py_unit_test
name: Python Unit Test
entry: pdm run pytest
require_serial: true
pass_filenames: false
language: system

- repo: local
hooks:
- id: basic_shell_unit_test
name: Basic Shell Unit Test
entry: bash -c 'cd llm_d_benchmark && pdm run bash -x ./setup/standup.sh -c kind_sim_fb -n'
require_serial: true
pass_filenames: false
language: system

- repo: https://github.com/ibm/detect-secrets
# If you desire to use a specific version of detect-secrets, you can replace `master` with other git revisions such as branch, tag or commit sha.
# You are encouraged to use static refs such as tags, instead of branch name
Expand All @@ -21,3 +40,4 @@ repos:
# when "--baseline" with "--use-all-plugins", pre-commit scan with all available plugins
# add "--fail-on-unaudited" to fail pre-commit for unaudited potential secrets
args: [--baseline, .secrets.baseline, --use-all-plugins]
additional_dependencies: ['boxsdk==3.9.2']
37 changes: 0 additions & 37 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -12,35 +12,16 @@ CONTAINER_TOOL := $(shell if command -v docker >/dev/null 2>&1; then echo docker
BUILDER := $(shell command -v buildah >/dev/null 2>&1 && echo buildah || echo $(CONTAINER_TOOL))
PLATFORMS ?= linux/amd64,linux/arm64 # linux/s390x,linux/ppc64le

# go source files
SRC = $(shell find . -type f -name '*.go')

.PHONY: help
help: ## Print help
@awk 'BEGIN {FS = ":.*##"; printf "\nUsage:\n make \033[36m<target>\033[0m\n"} /^[a-zA-Z_0-9-]+:.*?##/ { printf " \033[36m%-15s\033[0m %s\n", $$1, $$2 } /^##@/ { printf "\n\033[1m%s\033[0m\n", substr($$0, 5) } ' $(MAKEFILE_LIST)

##@ Development

.PHONY: format
format: ## Format Go source files
@printf "\033[33;1m==== Running gofmt ====\033[0m\n"
@gofmt -l -w $(SRC)

.PHONY: test
test: check-ginkgo ## Run tests
@printf "\033[33;1m==== Running tests ====\033[0m\n"
ginkgo -r -v

.PHONY: post-deploy-test
post-deploy-test: ## Run post deployment tests
echo Success!
@echo "Post-deployment tests passed."

.PHONY: lint
lint: check-golangci-lint ## Run lint
@printf "\033[33;1m==== Running linting ====\033[0m\n"
golangci-lint run

##@ Container Build/Push

.PHONY: buildah-build
Expand Down Expand Up @@ -243,9 +224,6 @@ env: load-version-json ## Print environment variables

.PHONY: check-tools
check-tools: \
check-go \
check-ginkgo \
check-golangci-lint \
check-jq \
check-kustomize \
check-envsubst \
Expand All @@ -255,21 +233,6 @@ check-tools: \
check-podman
@echo "✅ All required tools are installed."

.PHONY: check-go
check-go:
@command -v go >/dev/null 2>&1 || { \
echo "❌ Go is not installed. Install it from https://golang.org/dl/"; exit 1; }

.PHONY: check-ginkgo
check-ginkgo:
@command -v ginkgo >/dev/null 2>&1 || { \
echo "❌ ginkgo is not installed. Install with: go install github.com/onsi/ginkgo/v2/ginkgo@latest"; exit 1; }

.PHONY: check-golangci-lint
check-golangci-lint:
@command -v golangci-lint >/dev/null 2>&1 || { \
echo "❌ golangci-lint is not installed. Install from https://golangci-lint.run/usage/install/"; exit 1; }

.PHONY: check-jq
check-jq:
@command -v jq >/dev/null 2>&1 || { \
Expand Down
18 changes: 13 additions & 5 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,20 +7,26 @@ This repository provides an automated workflow for benchmarking LLM inference us
Provide a single source of automation for repeatable and reproducible experiments and performance evaluation on `llm-d`.

### 📦 Repository Setup
Install pdm following the official instructions: https://pdm-project.org/en/latest/#installation

Then, clone this repository and install the dependencies:
```
git clone https://github.com/llm-d/llm-d-benchmark.git
cd llm-d-benchmark
./setup/install_deps.sh
pip install -r config_explorer/requirements.txt
git clone https://github.com/llm-d/llm-d-benchmark.git && cd llm-d-benchmark/
pdm install && pdm run $SHELL
pre-commit install
```

Finally, install additional dependencies:
```
cd llm_d_benchmark/ && ./setup/install_deps.sh
```

## Quickstart

**Out of the box:** **`standup`** a `llm-d` stack (default method is `llm-d-modelservice`, serving `meta-llama/Llama-3.2-1B-Instruct` model), **`run`** a harness (default `inference-perf`) with a load profile (default `sanity_random`) and then **`teardown`** the deployed stack.

```
./e2e.sh
cd llm_d_benchmark/ && ./e2e.sh
```

> [!TIP]
Expand All @@ -29,6 +35,7 @@ pip install -r config_explorer/requirements.txt
The same above example could be explicitly split in three separate parts.

```
cd llm_d_benchmark/
./setup/standup.sh
./run.sh
./setup/teardown.sh
Expand All @@ -37,6 +44,7 @@ The same above example could be explicitly split in three separate parts.
A user can elect to **`standup`** an `llm-d` stack once, and then **`run`** the `inference-perf` harness with a different load profile (i.e., `chatbot_synthetic`)

```
cd llm_d_benchmark/
./run.sh --harness inference-perf --workload chatbot_synthetic --methods <a string that matches a inference service or pod>`
```

Expand Down
Loading