Skip to content

Commit 172d679

Browse files
committed
Skip image build when eligible
1 parent a7e18cd commit 172d679

File tree

3 files changed

+181
-13
lines changed

3 files changed

+181
-13
lines changed

buildkite/bootstrap.sh

Lines changed: 45 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,3 @@
1-
21
#!/bin/bash
32

43
set -euo pipefail
@@ -86,6 +85,8 @@ upload_pipeline() {
8685
-D vllm_use_precompiled="$VLLM_USE_PRECOMPILED" \
8786
-D cov_enabled="$COV_ENABLED" \
8887
-D vllm_ci_branch="$VLLM_CI_BRANCH" \
88+
-D skip_image_build="$SKIP_IMAGE_BUILD" \
89+
-D docker_image_override="$DOCKER_IMAGE_OVERRIDE" \
8990
| sed '/^[[:space:]]*$/d' \
9091
> pipeline.yaml
9192
)
@@ -196,14 +197,56 @@ done
196197
# Relies on existing patterns array as a basis.
197198
if [[ -n "${VLLM_USE_PRECOMPILED:-}" ]]; then
198199
echo "VLLM_USE_PRECOMPILED is already set to: $VLLM_USE_PRECOMPILED"
199-
elif [[ $RUN_ALL -eq 1 ]]; then
200+
elif [[ $RUN_ALL -eq 1 || "${BUILDKITE_BRANCH}" == "main" ]]; then
200201
export VLLM_USE_PRECOMPILED=0
201202
echo "Detected critical changes, building wheels from source"
202203
else
203204
export VLLM_USE_PRECOMPILED=1
204205
echo "No critical changes, using precompiled wheels"
205206
fi
206207

208+
# Decide whether to skip building docker images (pull & mount code instead)
209+
# Honor manual override if provided.
210+
if [[ -n "${SKIP_IMAGE_BUILD:-}" ]]; then
211+
echo "SKIP_IMAGE_BUILD is preset to: ${SKIP_IMAGE_BUILD}"
212+
else
213+
# Auto decision:
214+
# - No critical changes (RUN_ALL==0)
215+
# - VLLM_USE_PRECOMPILED==1
216+
if [[ "${VLLM_USE_PRECOMPILED:-}" == "1" && "$RUN_ALL" -eq 0 ]]; then
217+
SKIP_IMAGE_BUILD=1
218+
else
219+
SKIP_IMAGE_BUILD=0
220+
fi
221+
fi
222+
223+
# Determine the lowest common ancestor (LCA) commit with main branch if skipping image build
224+
if [[ "${SKIP_IMAGE_BUILD}" == "1" ]]; then
225+
LCA_COMMIT=""
226+
if git rev-parse --is-inside-work-tree >/dev/null 2>&1; then
227+
LCA_COMMIT=$(git merge-base origin/main HEAD)
228+
fi
229+
if [[ -n "$LCA_COMMIT" ]]; then
230+
IMAGE_TAG="public.ecr.aws/q9t5s3a7/vllm-ci-postmerge-repo:$LCA_COMMIT"
231+
echo "Checking for Docker image for LCA: $IMAGE_TAG"
232+
# Check if the image exists on the registry
233+
if docker manifest inspect "$IMAGE_TAG" >/dev/null 2>&1; then
234+
DOCKER_IMAGE_OVERRIDE="$IMAGE_TAG"
235+
echo "Using Docker image for LCA commit: $DOCKER_IMAGE_OVERRIDE"
236+
else
237+
echo "LCA image not found, falling back to build image"
238+
SKIP_IMAGE_BUILD=0
239+
VLLM_USE_PRECOMPILED=0
240+
fi
241+
else
242+
DOCKER_IMAGE_OVERRIDE="public.ecr.aws/q9t5s3a7/vllm-ci-postmerge-repo:latest"
243+
echo "Could not determine LCA commit, using latest Docker image: $DOCKER_IMAGE_OVERRIDE"
244+
fi
245+
fi
246+
247+
echo "Final SKIP_IMAGE_BUILD=${SKIP_IMAGE_BUILD} (RUN_ALL=${RUN_ALL}, VLLM_USE_PRECOMPILED=${VLLM_USE_PRECOMPILED:-unset})"
248+
249+
################## end WIP #####################
207250

208251
LIST_FILE_DIFF=$(get_diff | tr ' ' '|')
209252
if [[ $BUILDKITE_BRANCH == "main" ]]; then

buildkite/test-template-ci.j2

Lines changed: 61 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,13 @@
1010
{% set docker_image_cu118 = "public.ecr.aws/q9t5s3a7/vllm-ci-postmerge-repo:$BUILDKITE_COMMIT-cu118" %}
1111
{% set docker_image_cpu = "public.ecr.aws/q9t5s3a7/vllm-ci-postmerge-repo:$BUILDKITE_COMMIT-cpu" %}
1212
{% endif %}
13+
{% set skip_image_build = (skip_image_build | default("0")) %}
14+
{% if skip_image_build == "1" and docker_image_override is defined and docker_image_override %}
15+
{% set docker_image = docker_image_override %}
16+
{% set docker_image_torch_nightly = docker_image_override %}
17+
{% set docker_image_cu118 = docker_image_override %}
18+
{% set docker_image_cpu = docker_image_override %}
19+
{% endif %}
1320
{% set docker_image_amd = "rocm/vllm-ci:$BUILDKITE_COMMIT" %}
1421
{% set default_working_dir = "/vllm-workspace/tests" %}
1522
{% set hf_home = "/root/.cache/huggingface" %}
@@ -30,6 +37,32 @@
3037
{%- set tests_only = (tests_acc.only_tests and tests_acc.any) %}
3138
{%- set changed_tests = tests_acc.changed %}
3239

40+
{% macro vllm_checkoutoverlay_script(step,default_working_dir,skip_image_build,fail_fast,cov_enabled) -%}
41+
set {% if fail_fast == "true" %}-xeuo pipefail{% else %}-xuo{% endif %}
42+
echo "SKIP_IMAGE_BUILD={{ skip_image_build }}"
43+
{% if skip_image_build == "1" %}
44+
45+
# Copy in the code from the checkout to the workspace
46+
rm -rf /vllm-workspace/vllm || true
47+
cp -a /workdir/. /vllm-workspace/
48+
49+
# Overlay the pure-Python vllm into the install package dir
50+
export SITEPKG="$(python3 -c 'import sysconfig; print(sysconfig.get_paths()["purelib"])')"
51+
cp -a /vllm-workspace/vllm/* "$$SITEPKG/vllm/"
52+
53+
# Restore src/ layout, as Dockerfile does. Hides code from tests, but allows setup.
54+
rm -rf /vllm-workspace/src || true
55+
mkdir -p /vllm-workspace/src
56+
mv /vllm-workspace/vllm /vllm-workspace/src/vllm
57+
58+
(command -v nvidia-smi >/dev/null && nvidia-smi || true)
59+
export VLLM_LOGGING_LEVEL=DEBUG
60+
export VLLM_ALLOW_DEPRECATED_BEAM_SEARCH=1
61+
cd {{ (step.working_dir or default_working_dir) | safe }}
62+
63+
# Run tests with intelligent targeting and coverage
64+
{{ add_docker_pytest_coverage(step, cov_enabled) }}
65+
{%- endmacro %}
3366
{% macro add_pytest_coverage(cmd, coverage_file) %}
3467
{% if "pytest " in cmd %}
3568
COVERAGE_FILE={{ coverage_file }} {{ cmd | replace("pytest ", "pytest --cov=vllm --cov-report=xml --cov-append --durations=0 ") }} || true
@@ -141,7 +174,11 @@ plugins:
141174
{% if step.label == "Benchmarks" or step.mount_buildkite_agent or cov_enabled %}
142175
mount-buildkite-agent: true
143176
{% endif %}
144-
command: ["bash", "{% if fail_fast == "true" %}-xce{% else %}-xc{% endif %}", "(command nvidia-smi || true) && export VLLM_ALLOW_DEPRECATED_BEAM_SEARCH=1 && cd {{ (step.working_dir or default_working_dir) | safe }} && {{ add_docker_pytest_coverage(step, cov_enabled) }}"]
177+
command:
178+
- "/bin/bash"
179+
- "-c"
180+
- |
181+
{{ vllm_checkoutoverlay_script(step,default_working_dir,skip_image_build,fail_fast,cov_enabled) | indent(12,true) }}
145182
environment:
146183
- VLLM_USAGE_SOURCE=ci-test
147184
- NCCL_CUMEM_HOST_ENABLE=0
@@ -160,13 +197,18 @@ plugins:
160197
volumes:
161198
- /dev/shm:/dev/shm
162199
- {{ hf_home_fsx }}:{{ hf_home_fsx }}
200+
- $PWD:/workdir
163201
{% elif step.gpu == "h200" %}
164202
- docker#v5.2.0:
165203
image: {{ image }}
166204
always-pull: true
167205
propagate-environment: true
168206
gpus: all
169-
command: ["bash", "{% if fail_fast == "true" %}-xce{% else %}-xc{% endif %}", "(command nvidia-smi || true) && export VLLM_ALLOW_DEPRECATED_BEAM_SEARCH=1 && cd {{ (step.working_dir or default_working_dir) | safe }} && {{ add_docker_pytest_coverage(step, cov_enabled) }}"]
207+
command:
208+
- "/bin/bash"
209+
- "-c"
210+
- |
211+
{{ vllm_checkoutoverlay_script(step,default_working_dir,skip_image_build,fail_fast,cov_enabled) | indent(12,true) }}
170212
environment:
171213
- VLLM_USAGE_SOURCE=ci-test
172214
- NCCL_CUMEM_HOST_ENABLE=0
@@ -183,14 +225,19 @@ plugins:
183225
- /dev/shm:/dev/shm
184226
- /data/benchmark-hf-cache:/benchmark-hf-cache
185227
- /data/benchmark-vllm-cache:/root/.cache/vllm
228+
- $PWD:/workdir
186229
{% elif step.gpu == "b200" %}
187230
- docker#v5.2.0:
188231
image: {{ image }}
189232
always-pull: true
190233
propagate-environment: true
191234
# gpus will be configured by BUILDKITE_PLUGIN_DOCKER_GPUS in per host environment variable.
192235
# gpus: all
193-
command: ["bash", "{% if fail_fast == "true" %}-xce{% else %}-xc{% endif %}", "(command nvidia-smi || true) && export VLLM_ALLOW_DEPRECATED_BEAM_SEARCH=1 && cd {{ (step.working_dir or default_working_dir) | safe }} && {{ add_docker_pytest_coverage(step, cov_enabled) }}"]
236+
command:
237+
- "/bin/bash"
238+
- "-c"
239+
- |
240+
{{ vllm_checkoutoverlay_script(step,default_working_dir,skip_image_build,fail_fast,cov_enabled) | indent(12,true) }}
194241
environment:
195242
- VLLM_USAGE_SOURCE=ci-test
196243
- NCCL_CUMEM_HOST_ENABLE=0
@@ -207,6 +254,7 @@ plugins:
207254
- /dev/shm:/dev/shm
208255
- /data/benchmark-hf-cache:/benchmark-hf-cache
209256
- /data/benchmark-vllm-cache:/root/.cache/vllm
257+
- $PWD:/workdir
210258
{% else %}
211259
- kubernetes:
212260
podSpec:
@@ -251,6 +299,7 @@ plugins:
251299

252300

253301
steps:
302+
{% if skip_image_build != "1" %}
254303
- label: ":docker: build image"
255304
key: image-build
256305
depends_on: ~
@@ -376,6 +425,7 @@ steps:
376425
limit: 2
377426
- exit_status: -10 # Agent was lost
378427
limit: 2
428+
{% endif %}
379429

380430
{% for step in steps %}
381431
{% if step.fast_check_only != true %}
@@ -404,15 +454,23 @@ steps:
404454

405455
{% if ns.blocked == 1 or (step.optional and nightly != "1") %}
406456
- block: "Run {{ step.label }}"
457+
{% if skip_image_build != "1" %}
407458
depends_on: image-build
459+
{% else %}
460+
depends_on: ~
461+
{% endif %}
408462
key: block-{{ step.label | replace(" ", "-") | lower | replace("(", "") | replace(")", "") | replace("%", "") | replace(",", "-") | replace("+", "-") }}
409463
{% endif %}
410464

411465
- label: "{{ step.label }}"
412466
{% if ns.blocked == 1 or (step.optional and nightly != "1") %}
413467
depends_on: block-{{ step.label | replace(" ", "-") | lower | replace("(", "") | replace(")", "") | replace("%", "") | replace(",", "-") | replace("+", "-") }}
414468
{% else %}
469+
{% if skip_image_build != "1" %}
415470
depends_on: {{ "image-build-cpu" if step.no_gpu else "image-build" }}
471+
{% else %}
472+
depends_on: ~
473+
{% endif %}
416474
{% endif %}
417475
soft_fail: {{ step.soft_fail or false }}
418476
{{ render_cuda_config(step, docker_image_cpu if step.no_gpu else docker_image, default_working_dir, hf_home_fsx, hf_home, branch) | indent(4, true) }}

buildkite/test-template-fastcheck.j2

Lines changed: 75 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,59 @@
11
{% set docker_image = "public.ecr.aws/q9t5s3a7/vllm-ci-test-repo:$BUILDKITE_COMMIT" %}
22
{% set docker_image_amd = "rocm/vllm-ci:$BUILDKITE_COMMIT" %}
3+
{% set skip_image_build = (skip_image_build | default("0")) %}
4+
{% set requirements_changed = (requirements_changed | default("0")) %}
5+
{% if skip_image_build == "1" and docker_image_override is defined and docker_image_override %}
6+
{% set docker_image = docker_image_override %}
7+
{% endif %}
38
{% set default_working_dir = "/vllm-workspace/tests" %}
49
{% set hf_home = "/root/.cache/huggingface" %}
510
{% set hf_home_efs = "/mnt/efs/hf_cache" %}
611
{% set hf_home_fsx = "/fsx/hf_cache" %}
712

13+
{% macro vllm_checkoutoverlay_script(step,default_working_dir,skip_image_build,requirements_changed,fail_fast) -%}
14+
set {% if fail_fast == "true" %}-xeuo pipefail{% else %}-xuo{% endif %}
15+
echo "SKIP_IMAGE_BUILD={{ skip_image_build }} REQUIREMENTS_CHANGED={{ requirements_changed }}"
16+
{% if skip_image_build == "1" %}
17+
18+
# Copy in the code from the checkout to the workspace
19+
rm -rf /vllm-workspace/vllm || true
20+
cp -a /workdir/. /vllm-workspace/
21+
22+
# Overlay the pure-Python vllm into the install package dir
23+
export SITEPKG="$(python3 -c 'import sysconfig; print(sysconfig.get_paths()["purelib"])')"
24+
cp -a /vllm-workspace/vllm/* "$$SITEPKG/vllm/"
25+
26+
# Restore src/ layout, as Dockerfile does. Hides code from tests, but allows setup.
27+
rm -rf /vllm-workspace/src || true
28+
mkdir -p /vllm-workspace/src
29+
mv /vllm-workspace/vllm /vllm-workspace/src/vllm
30+
31+
# If deps changed, re-install (system-wide in the container)
32+
{% if requirements_changed == '1' %}
33+
cd /vllm-workspace
34+
uv pip install --system -r requirements/common.txt -r requirements/build.txt -r requirements/test.txt
35+
{% endif %}
36+
{% endif %}
37+
38+
(command -v nvidia-smi >/dev/null && nvidia-smi || true)
39+
export VLLM_LOGGING_LEVEL=DEBUG
40+
export VLLM_ALLOW_DEPRECATED_BEAM_SEARCH=1
41+
cd {{ (step.working_dir or default_working_dir) | safe }}
42+
43+
{# Emit commands exactly as input. #}
44+
{% if step.command -%}
45+
{{ step.command | replace(' \\', '') | trim | safe }}
46+
{%- elif step.commands -%}
47+
{%- for cmd in step.commands %}
48+
{{ cmd | replace(' \\', '') | trim | safe }}
49+
{%- endfor %}
50+
{%- else -%}
51+
echo "No command(s) defined for this step." >&2; exit 2
52+
{%- endif %}
53+
{%- endmacro %}
54+
855
steps:
56+
{% if skip_image_build != "1" %}
957
- label: ":docker: build image"
1058
key: image-build
1159
agents:
@@ -39,6 +87,7 @@ steps:
3987
limit: 5
4088
- exit_status: -10 # Agent was lost
4189
limit: 5
90+
{% endif %}
4291

4392
- block: Run Neuron Test
4493
depends_on: ~
@@ -54,7 +103,11 @@ steps:
54103
{% for step in steps %}
55104
{% if step.gpu != "a100" and step.fast_check == true and step.num_nodes < 2 %}
56105
- label: "{{ step.label }}"
106+
{% if skip_image_build != "1" %}
57107
depends_on: image-build
108+
{% else %}
109+
depends_on: ~
110+
{% endif %}
58111
agents:
59112
{% if step.label == "Documentation Build" %}
60113
queue: small_cpu_queue_premerge
@@ -86,10 +139,11 @@ steps:
86139
{% if step.label == "Benchmarks" %}
87140
mount-buildkite-agent: true
88141
{% endif %}
89-
command:
90-
- "bash"
91-
- "{% if fail_fast == "true" %}-xce{% else %}-xc{% endif %}"
92-
- "(command nvidia-smi || true) && export VLLM_ALLOW_DEPRECATED_BEAM_SEARCH=1 && cd {{ (step.working_dir or default_working_dir) | safe }} && {{ step.command or (step.commands | join(' && ')) | safe }}"
142+
command:
143+
- "/bin/bash"
144+
- "-xce"
145+
- |
146+
{{ vllm_checkoutoverlay_script(step,default_working_dir,skip_image_build,requirements_changed,fail_fast) | indent(14,true) }}
93147
environment:
94148
- VLLM_USAGE_SOURCE=ci-test
95149
- NCCL_CUMEM_HOST_ENABLE=0
@@ -111,7 +165,11 @@ steps:
111165
{% if step.gpu != "a100" and step.fast_check != true and step.num_nodes < 2 %}
112166
- block: "Run {{ step.label }}"
113167
key: block-{{ step.label | replace(" ", "-") | lower | replace("(", "") | replace(")", "") | replace("%", "") | replace(",", "-") | replace("+", "-") }}
168+
{% if skip_image_build != "1" %}
114169
depends_on: image-build
170+
{% else %}
171+
depends_on: ~
172+
{% endif %}
115173

116174
- label: "{{ step.label }}"
117175
depends_on: block-{{ step.label | replace(" ", "-") | lower | replace("(", "") | replace(")", "") | replace("%", "") | replace(",", "-") | replace("+", "-") }}
@@ -146,10 +204,11 @@ steps:
146204
{% if step.label == "Benchmarks" %}
147205
mount-buildkite-agent: true
148206
{% endif %}
149-
command:
150-
- "bash"
151-
- "{% if fail_fast == "true" %}-xce{% else %}-xc{% endif %}"
152-
- "(command nvidia-smi || true) && export VLLM_ALLOW_DEPRECATED_BEAM_SEARCH=1 && cd {{ (step.working_dir or default_working_dir) | safe }} && {{ step.command or (step.commands | join(' && ')) | safe }}"
207+
command:
208+
- "/bin/bash"
209+
- "-xce"
210+
- |
211+
{{ vllm_checkoutoverlay_script(step,default_working_dir,skip_image_build,requirements_changed,fail_fast) | indent(14,true) }}
153212
environment:
154213
- VLLM_USAGE_SOURCE=ci-test
155214
- NCCL_CUMEM_HOST_ENABLE=0
@@ -171,7 +230,11 @@ steps:
171230
{% if step.num_nodes >= 2 %}
172231
- block: "Run {{ step.label }}"
173232
key: block-{{ step.label | replace(" ", "-") | lower | replace("(", "") | replace(")", "") | replace("%", "") | replace(",", "-") | replace("+", "-") }}
233+
{% if skip_image_build != "1" %}
174234
depends_on: image-build
235+
{% else %}
236+
depends_on: ~
237+
{% endif %}
175238

176239
- label: "{{ step.label }}"
177240
depends_on: block-{{ step.label | replace(" ", "-") | lower | replace("(", "") | replace(")", "") | replace("%", "") | replace(",", "-") | replace("+", "-") }}
@@ -183,7 +246,11 @@ steps:
183246
{% endfor %}
184247

185248
- block: "Run A100 tests"
249+
{% if skip_image_build != "1" %}
186250
depends_on: image-build
251+
{% else %}
252+
depends_on: ~
253+
{% endif %}
187254

188255
{% for step in steps %}
189256
{% if step.gpu == "a100" %}

0 commit comments

Comments
 (0)