Skip to content

Commit ef9eb94

Browse files
authored
GuideLLM v0.4.0 Enablement (#479)
* Bump guidellm in container to devel build * Update guidellm harness to use scenarios and fix convert script * Add a few basic workload examples * Bump GuideLLM * GuideLLM now converts single dataset to a list automatically * Install CPU version of torch to cut down on install size
1 parent 0f65966 commit ef9eb94

File tree

8 files changed

+78
-12
lines changed

8 files changed

+78
-12
lines changed

build/Dockerfile

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -63,11 +63,12 @@ RUN cd vllm; \
6363

6464
ARG GUIDELLM_REPO=https://github.com/vllm-project/guidellm.git
6565
ARG GUIDELLM_BRANCH=main
66-
ARG GUIDELLM_COMMIT=72374efdf7d4432173fafec3924dc94ac3b11449
66+
ARG GUIDELLM_COMMIT=ba51acf5b0ba377c5edc35109a78cd3ebb402922
6767
RUN git clone --branch ${GUIDELLM_BRANCH} ${GUIDELLM_REPO}
6868
RUN cd guidellm; \
69+
pip install torch --index-url https://download.pytorch.org/whl/cpu; \
6970
git checkout ${GUIDELLM_COMMIT}; \
70-
pip install .
71+
pip install .[recommended]
7172

7273
RUN echo "fmperf: ${FM_PERF_REPO} ${FM_PERF_BRANCH}" > /workspace/repos.txt; \
7374
echo "inference-perf: ${INFERENCE_PERF_REPO} ${INFERENCE_PERF_BRANCH}" >> /workspace/repos.txt; \

workload/harnesses/guidellm-llm-d-benchmark.sh

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,9 @@
11
#!/usr/bin/env bash
2+
3+
echo Using experiment result dir: "$LLMDBENCH_RUN_EXPERIMENT_RESULTS_DIR"
24
mkdir -p "$LLMDBENCH_RUN_EXPERIMENT_RESULTS_DIR"
3-
cd ${LLMDBENCH_RUN_WORKSPACE_DIR}/guidellm/
4-
cp -f ${LLMDBENCH_RUN_WORKSPACE_DIR}/profiles/guidellm/${LLMDBENCH_RUN_EXPERIMENT_HARNESS_WORKLOAD_NAME} $LLMDBENCH_RUN_EXPERIMENT_RESULTS_DIR/${LLMDBENCH_RUN_EXPERIMENT_HARNESS_WORKLOAD_NAME}
5-
guidellm benchmark --$(cat ${LLMDBENCH_RUN_WORKSPACE_DIR}/profiles/guidellm/${LLMDBENCH_RUN_EXPERIMENT_HARNESS_WORKLOAD_NAME} | yq -r 'to_entries | map("\(.key)=\(.value)") | join(" --")' | sed -e 's^=none ^^g' -e 's^=none$^^g') --output-path=$LLMDBENCH_RUN_EXPERIMENT_RESULTS_DIR/results.json > >(tee -a $LLMDBENCH_RUN_EXPERIMENT_RESULTS_DIR/stdout.log) 2> >(tee -a $LLMDBENCH_RUN_EXPERIMENT_RESULTS_DIR/stderr.log >&2)
5+
pushd "$LLMDBENCH_RUN_EXPERIMENT_RESULTS_DIR"
6+
guidellm benchmark --scenario "${LLMDBENCH_RUN_WORKSPACE_DIR}/profiles/guidellm/${LLMDBENCH_RUN_EXPERIMENT_HARNESS_WORKLOAD_NAME}" --output-path "${LLMDBENCH_RUN_EXPERIMENT_RESULTS_DIR}/results.json" --disable-progress > >(tee -a $LLMDBENCH_RUN_EXPERIMENT_RESULTS_DIR/stdout.log) 2> >(tee -a $LLMDBENCH_RUN_EXPERIMENT_RESULTS_DIR/stderr.log >&2)
67
export LLMDBENCH_RUN_EXPERIMENT_HARNESS_RC=$?
78

89
# If benchmark harness returned with an error, exit here
Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
target: REPLACE_ENV_LLMDBENCH_HARNESS_STACK_ENDPOINT_URL
2+
model: REPLACE_ENV_LLMDBENCH_DEPLOY_CURRENT_MODEL
3+
request_type: text_completions
4+
profile: constant
5+
rate: [1,2,4,8]
6+
max_seconds: 120
7+
data:
8+
prompt_tokens_min: 10
9+
prompt_tokens_max: 8192
10+
prompt_tokens: 4096
11+
prompt_tokens_stdev: 2048
12+
output_tokens_min: 10
13+
output_tokens_max: 2048
14+
output_tokens: 1024
15+
output_tokens_stdev: 512
16+
samples: 1000
Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,9 @@
11
target: REPLACE_ENV_LLMDBENCH_HARNESS_STACK_ENDPOINT_URL
2-
rate-type: concurrent
2+
model: REPLACE_ENV_LLMDBENCH_DEPLOY_CURRENT_MODEL
3+
profile: concurrent
4+
request_type: text_completions
35
rate: 2
4-
max-seconds: 30
5-
data: prompt_tokens=256,output_tokens=128
6+
max_seconds: 30
7+
data:
8+
prompt_tokens: 256
9+
output_tokens: 128
Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
target: REPLACE_ENV_LLMDBENCH_HARNESS_STACK_ENDPOINT_URL
2+
model: REPLACE_ENV_LLMDBENCH_DEPLOY_CURRENT_MODEL
3+
request_type: text_completions
4+
profile: constant
5+
rate: 1
6+
max_seconds: 30
7+
data:
8+
prompt_tokens: 50
9+
prompt_tokens_stdev: 10
10+
prompt_tokens_min: 10
11+
prompt_tokens_max: 100
12+
output_tokens: 50
13+
output_tokens_stdev: 10
14+
output_tokens_min: 10
15+
output_tokens_max: 100
Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
target: REPLACE_ENV_LLMDBENCH_HARNESS_STACK_ENDPOINT_URL
2+
model: REPLACE_ENV_LLMDBENCH_DEPLOY_CURRENT_MODEL
3+
request_type: text_completions
4+
profile: constant
5+
rate: [2,5,8,10,12,15,20]
6+
max_seconds: 50
7+
data:
8+
prefix_tokens: 2048
9+
prefix_count: 32
10+
prompt_tokens: 256
11+
output_tokens: 256
Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
target: REPLACE_ENV_LLMDBENCH_HARNESS_STACK_ENDPOINT_URL
2+
model: REPLACE_ENV_LLMDBENCH_DEPLOY_CURRENT_MODEL
3+
request_type: text_completions
4+
profile: constant
5+
rate: [1,2,4,8]
6+
max_seconds: 120
7+
data:
8+
prompt_tokens_min: 10
9+
prompt_tokens_max: 4096
10+
prompt_tokens: 2048
11+
prompt_tokens_stdev: 1024
12+
output_tokens_min: 10
13+
output_tokens_max: 256
14+
output_tokens: 128
15+
output_tokens_stdev: 64
16+
samples: 1000

workload/report/convert.py

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -419,19 +419,21 @@ def import_guidellm(results_file: str) -> BenchmarkReport:
419419
"""
420420
check_file(results_file)
421421

422-
# Everything falls under ['benchmarks'][0], so just grab that part
423-
results = import_yaml(results_file)['benchmarks'][0]
422+
data = import_yaml(results_file)
423+
424+
# TODO: Read each benchmark in file
425+
results = data["benchmarks"][0]
424426

425427
# Get environment variables from llm-d-benchmark run as a dict following the
426428
# schema of BenchmarkReport
427429
br_dict = _get_llmd_benchmark_envars()
428430
# Append to that dict the data from GuideLLM
429431
update_dict(br_dict, {
430432
"scenario": {
431-
"model": {"name": results['worker']['backend_model']},
433+
"model": {"name": data["args"].get("model", "unknown")},
432434
"load": {
433435
"name": WorkloadGenerator.GUIDELLM,
434-
"args": results['args'],
436+
"args": data['args'],
435437
},
436438
},
437439
"metrics": {

0 commit comments

Comments
 (0)