Skip to content

Commit ba6f019

Browse files
authored
Merge pull request #449 from ROCm/upstream_merge_25_02_24
Upstream merge 25 02 24
2 parents 18689d8 + 46c1c97 commit ba6f019

File tree

447 files changed

+12691
-7769
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

447 files changed

+12691
-7769
lines changed

.buildkite/nightly-benchmarks/benchmark-pipeline.yaml

+91-5
Original file line numberDiff line numberDiff line change
@@ -10,12 +10,18 @@ steps:
1010
- image: badouralix/curl-jq
1111
command:
1212
- sh .buildkite/nightly-benchmarks/scripts/wait-for-image.sh
13-
13+
- label: "Cleanup H100"
14+
agents:
15+
queue: H100
16+
depends_on: ~
17+
command: docker system prune -a --volumes --force
18+
1419
- label: "A100"
1520
# skip: "use this flag to conditionally skip the benchmark step, useful for PR testing"
1621
agents:
1722
queue: A100
1823
depends_on: wait-for-container-image
24+
if: build.branch == "main"
1925
plugins:
2026
- kubernetes:
2127
podSpec:
@@ -50,6 +56,7 @@ steps:
5056
agents:
5157
queue: H200
5258
depends_on: wait-for-container-image
59+
if: build.branch == "main"
5360
plugins:
5461
- docker#v5.12.0:
5562
image: public.ecr.aws/q9t5s3a7/vllm-ci-postmerge-repo:$BUILDKITE_COMMIT
@@ -70,20 +77,99 @@ steps:
7077
#key: block-h100
7178
#depends_on: ~
7279

73-
- label: "Cleanup H100"
80+
- label: "H100"
81+
# skip: "use this flag to conditionally skip the benchmark step, useful for PR testing"
7482
agents:
7583
queue: H100
76-
depends_on: ~
77-
command: docker system prune -a --volumes --force
84+
depends_on: wait-for-container-image
85+
if: build.branch == "main"
86+
plugins:
87+
- docker#v5.12.0:
88+
image: public.ecr.aws/q9t5s3a7/vllm-ci-postmerge-repo:$BUILDKITE_COMMIT
89+
command:
90+
- bash
91+
- .buildkite/nightly-benchmarks/scripts/run-performance-benchmarks.sh
92+
mount-buildkite-agent: true
93+
propagate-environment: true
94+
ipc: host
95+
gpus: all # see CUDA_VISIBLE_DEVICES for actual GPUs used
96+
volumes:
97+
- /data/benchmark-hf-cache:/root/.cache/huggingface
98+
environment:
99+
- VLLM_USAGE_SOURCE
100+
- HF_TOKEN
101+
102+
# Premerge benchmark
103+
- label: "A100"
104+
# skip: "use this flag to conditionally skip the benchmark step, useful for PR testing"
105+
agents:
106+
queue: A100
107+
depends_on: wait-for-container-image
108+
if: build.branch != "main"
109+
plugins:
110+
- kubernetes:
111+
podSpec:
112+
priorityClassName: perf-benchmark
113+
containers:
114+
- image: public.ecr.aws/q9t5s3a7/vllm-ci-test-repo:$BUILDKITE_COMMIT
115+
command:
116+
- bash .buildkite/nightly-benchmarks/scripts/run-performance-benchmarks.sh
117+
resources:
118+
limits:
119+
nvidia.com/gpu: 8
120+
volumeMounts:
121+
- name: devshm
122+
mountPath: /dev/shm
123+
env:
124+
- name: VLLM_USAGE_SOURCE
125+
value: ci-test
126+
- name: HF_TOKEN
127+
valueFrom:
128+
secretKeyRef:
129+
name: hf-token-secret
130+
key: token
131+
nodeSelector:
132+
nvidia.com/gpu.product: NVIDIA-A100-SXM4-80GB
133+
volumes:
134+
- name: devshm
135+
emptyDir:
136+
medium: Memory
137+
138+
- label: "H200"
139+
# skip: "use this flag to conditionally skip the benchmark step, useful for PR testing"
140+
agents:
141+
queue: H200
142+
depends_on: wait-for-container-image
143+
if: build.branch != "main"
144+
plugins:
145+
- docker#v5.12.0:
146+
image: public.ecr.aws/q9t5s3a7/vllm-ci-test-repo:$BUILDKITE_COMMIT
147+
command:
148+
- bash
149+
- .buildkite/nightly-benchmarks/scripts/run-performance-benchmarks.sh
150+
mount-buildkite-agent: true
151+
propagate-environment: true
152+
ipc: host
153+
gpus: 4,5,6,7
154+
volumes:
155+
- /data/benchmark-hf-cache:/root/.cache/huggingface
156+
environment:
157+
- VLLM_USAGE_SOURCE
158+
- HF_TOKEN
159+
160+
#- block: "Run H100 Benchmark"
161+
#key: block-h100
162+
#depends_on: ~
78163

79164
- label: "H100"
80165
# skip: "use this flag to conditionally skip the benchmark step, useful for PR testing"
81166
agents:
82167
queue: H100
83168
depends_on: wait-for-container-image
169+
if: build.branch != "main"
84170
plugins:
85171
- docker#v5.12.0:
86-
image: public.ecr.aws/q9t5s3a7/vllm-ci-postmerge-repo:$BUILDKITE_COMMIT
172+
image: public.ecr.aws/q9t5s3a7/vllm-ci-test-repo:$BUILDKITE_COMMIT
87173
command:
88174
- bash
89175
- .buildkite/nightly-benchmarks/scripts/run-performance-benchmarks.sh

.buildkite/nightly-benchmarks/scripts/convert-results-json-to-markdown.py

+21-6
Original file line numberDiff line numberDiff line change
@@ -84,8 +84,13 @@ def results_to_json(latency, throughput, serving):
8484
# this result is generated via `benchmark_serving.py`
8585

8686
# attach the benchmarking command to raw_result
87-
with open(test_file.with_suffix(".commands")) as f:
88-
command = json.loads(f.read())
87+
try:
88+
with open(test_file.with_suffix(".commands")) as f:
89+
command = json.loads(f.read())
90+
except OSError as e:
91+
print(e)
92+
continue
93+
8994
raw_result.update(command)
9095

9196
# update the test name of this result
@@ -99,8 +104,13 @@ def results_to_json(latency, throughput, serving):
99104
# this result is generated via `benchmark_latency.py`
100105

101106
# attach the benchmarking command to raw_result
102-
with open(test_file.with_suffix(".commands")) as f:
103-
command = json.loads(f.read())
107+
try:
108+
with open(test_file.with_suffix(".commands")) as f:
109+
command = json.loads(f.read())
110+
except OSError as e:
111+
print(e)
112+
continue
113+
104114
raw_result.update(command)
105115

106116
# update the test name of this result
@@ -121,8 +131,13 @@ def results_to_json(latency, throughput, serving):
121131
# this result is generated via `benchmark_throughput.py`
122132

123133
# attach the benchmarking command to raw_result
124-
with open(test_file.with_suffix(".commands")) as f:
125-
command = json.loads(f.read())
134+
try:
135+
with open(test_file.with_suffix(".commands")) as f:
136+
command = json.loads(f.read())
137+
except OSError as e:
138+
print(e)
139+
continue
140+
126141
raw_result.update(command)
127142

128143
# update the test name of this result

.buildkite/nightly-benchmarks/scripts/run-performance-benchmarks.sh

+3
Original file line numberDiff line numberDiff line change
@@ -309,11 +309,14 @@ run_serving_tests() {
309309

310310
new_test_name=$test_name"_qps_"$qps
311311

312+
# pass the tensor parallel size to the client so that it can be displayed
313+
# on the benchmark dashboard
312314
client_command="python3 benchmark_serving.py \
313315
--save-result \
314316
--result-dir $RESULTS_FOLDER \
315317
--result-filename ${new_test_name}.json \
316318
--request-rate $qps \
319+
--metadata "tensor_parallel_size=$tp" \
317320
$client_args"
318321

319322
echo "Running test case $test_name with qps $qps"

.buildkite/nightly-benchmarks/scripts/wait-for-image.sh

+5-1
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,10 @@
11
#!/bin/sh
22
TOKEN=$(curl -s -L "https://public.ecr.aws/token?service=public.ecr.aws&scope=repository:q9t5s3a7/vllm-ci-postmerge-repo:pull" | jq -r .token)
3-
URL="https://public.ecr.aws/v2/q9t5s3a7/vllm-ci-postmerge-repo/manifests/$BUILDKITE_COMMIT"
3+
if [[ "$BUILDKITE_BRANCH" == "main" ]]; then
4+
URL="https://public.ecr.aws/v2/q9t5s3a7/vllm-ci-postmerge-repo/manifests/$BUILDKITE_COMMIT"
5+
else
6+
URL="https://public.ecr.aws/v2/q9t5s3a7/vllm-ci-test-repo/manifests/$BUILDKITE_COMMIT"
7+
fi
48

59
TIMEOUT_SECONDS=10
610

.buildkite/nightly-benchmarks/tests/serving-tests.json

+1-2
Original file line numberDiff line numberDiff line change
@@ -66,8 +66,7 @@
6666
"swap_space": 16,
6767
"speculative_model": "turboderp/Qwama-0.5B-Instruct",
6868
"num_speculative_tokens": 4,
69-
"speculative_draft_tensor_parallel_size": 1,
70-
"use_v2_block_manager": ""
69+
"speculative_draft_tensor_parallel_size": 1
7170
},
7271
"client_parameters": {
7372
"model": "meta-llama/Meta-Llama-3.1-70B-Instruct",

.buildkite/nightly-benchmarks/tests/throughput-tests.json

+1-1
Original file line numberDiff line numberDiff line change
@@ -32,4 +32,4 @@
3232
"backend": "vllm"
3333
}
3434
}
35-
]
35+
]

.buildkite/run-amd-test.sh

+4
Original file line numberDiff line numberDiff line change
@@ -121,6 +121,8 @@ if [[ $commands == *"--shard-id="* ]]; then
121121
--rm \
122122
-e HIP_VISIBLE_DEVICES="${GPU}" \
123123
-e HF_TOKEN \
124+
-e AWS_ACCESS_KEY_ID \
125+
-e AWS_SECRET_ACCESS_KEY \
124126
-v "${HF_CACHE}:${HF_MOUNT}" \
125127
-e "HF_HOME=${HF_MOUNT}" \
126128
--name "${container_name}_${GPU}" \
@@ -149,6 +151,8 @@ else
149151
--rm \
150152
-e HIP_VISIBLE_DEVICES=0 \
151153
-e HF_TOKEN \
154+
-e AWS_ACCESS_KEY_ID \
155+
-e AWS_SECRET_ACCESS_KEY \
152156
-v "${HF_CACHE}:${HF_MOUNT}" \
153157
-e "HF_HOME=${HF_MOUNT}" \
154158
--name "${container_name}" \

.buildkite/run-cpu-test.sh

+1-1
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@ function cpu_tests() {
3030
# offline inference
3131
docker exec cpu-test-"$BUILDKITE_BUILD_NUMBER"-avx2-"$NUMA_NODE" bash -c "
3232
set -e
33-
python3 examples/offline_inference/basic.py"
33+
python3 examples/offline_inference/basic/generate.py --model facebook/opt-125m"
3434

3535
# Run basic model test
3636
docker exec cpu-test-"$BUILDKITE_BUILD_NUMBER"-"$NUMA_NODE" bash -c "

.buildkite/run-gh200-test.sh

+1-1
Original file line numberDiff line numberDiff line change
@@ -24,5 +24,5 @@ remove_docker_container
2424

2525
# Run the image and test offline inference
2626
docker run -e HF_TOKEN -v /root/.cache/huggingface:/root/.cache/huggingface --name gh200-test --gpus=all --entrypoint="" gh200-test bash -c '
27-
python3 examples/offline_inference/cli.py --model meta-llama/Llama-3.2-1B
27+
python3 examples/offline_inference/basic/generate.py --model meta-llama/Llama-3.2-1B
2828
'

.buildkite/run-hpu-test.sh

+1-1
Original file line numberDiff line numberDiff line change
@@ -20,5 +20,5 @@ trap remove_docker_container_and_exit EXIT
2020
remove_docker_container
2121

2222
# Run the image and launch offline inference
23-
docker run --runtime=habana --name=hpu-test --network=host -e HABANA_VISIBLE_DEVICES=all -e VLLM_SKIP_WARMUP=true --entrypoint="" hpu-test-env python3 examples/offline_inference/basic.py
23+
docker run --runtime=habana --name=hpu-test --network=host -e HABANA_VISIBLE_DEVICES=all -e VLLM_SKIP_WARMUP=true --entrypoint="" hpu-test-env python3 examples/offline_inference/basic/generate.py --model facebook/opt-125m
2424
EXITCODE=$?

.buildkite/run-openvino-test.sh

+1-1
Original file line numberDiff line numberDiff line change
@@ -13,4 +13,4 @@ trap remove_docker_container EXIT
1313
remove_docker_container
1414

1515
# Run the image and launch offline inference
16-
docker run --network host --env VLLM_OPENVINO_KVCACHE_SPACE=1 --name openvino-test openvino-test python3 /workspace/examples/offline_inference/basic.py
16+
docker run --network host --env VLLM_OPENVINO_KVCACHE_SPACE=1 --name openvino-test openvino-test python3 /workspace/examples/offline_inference/basic/generate.py --model facebook/opt-125m

.buildkite/run-xpu-test.sh

+2-2
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,6 @@ remove_docker_container
1414

1515
# Run the image and test offline inference/tensor parallel
1616
docker run --name xpu-test --device /dev/dri -v /dev/dri/by-path:/dev/dri/by-path --entrypoint="" xpu-test sh -c '
17-
python3 examples/offline_inference/basic.py
18-
python3 examples/offline_inference/cli.py -tp 2
17+
python3 examples/offline_inference/basic/generate.py --model facebook/opt-125m
18+
python3 examples/offline_inference/basic/generate.py --model facebook/opt-125m -tp 2
1919
'

0 commit comments

Comments
 (0)