Skip to content

Commit 4b03e9b

Browse files
committed
Skip image build when eligible for CI speed up.
Latest iteration accounts for main branch changes. Signed-off-by: dougbtv <[email protected]>
1 parent a7e18cd commit 4b03e9b

File tree

3 files changed

+268
-62
lines changed

3 files changed

+268
-62
lines changed

buildkite/bootstrap.sh

Lines changed: 56 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,3 @@
1-
21
#!/bin/bash
32

43
set -euo pipefail
@@ -86,6 +85,8 @@ upload_pipeline() {
8685
-D vllm_use_precompiled="$VLLM_USE_PRECOMPILED" \
8786
-D cov_enabled="$COV_ENABLED" \
8887
-D vllm_ci_branch="$VLLM_CI_BRANCH" \
88+
-D skip_image_build="$SKIP_IMAGE_BUILD" \
89+
-D docker_image_override="$DOCKER_IMAGE_OVERRIDE" \
8990
| sed '/^[[:space:]]*$/d' \
9091
> pipeline.yaml
9192
)
@@ -163,9 +164,9 @@ ignore_patterns=(
163164
"cmake/hipify.py"
164165
"cmake/cpu_extension.cmake"
165166
)
166-
167+
# Detect if there are critical changes matching patterns
168+
CRITICAL_CHANGE_DETECTED=0
167169
for file in $file_diff; do
168-
# First check if file matches any pattern
169170
matches_pattern=0
170171
for pattern in "${patterns[@]}"; do
171172
if [[ $file == $pattern* ]] || [[ $file == $pattern ]]; then
@@ -174,7 +175,6 @@ for file in $file_diff; do
174175
fi
175176
done
176177

177-
# If file matches pattern, check it's not in ignore patterns
178178
if [[ $matches_pattern -eq 1 ]]; then
179179
matches_ignore=0
180180
for ignore in "${ignore_patterns[@]}"; do
@@ -185,25 +185,72 @@ for file in $file_diff; do
185185
done
186186

187187
if [[ $matches_ignore -eq 0 ]]; then
188-
RUN_ALL=1
189-
echo "Found changes: $file. Run all tests"
188+
CRITICAL_CHANGE_DETECTED=1
189+
echo "Found critical changes: $file"
190190
break
191191
fi
192192
fi
193193
done
194194

195+
# RUN_ALL can be set manually, but also set it when critical changes are detected
196+
if [[ -z "${RUN_ALL:-}" ]]; then
197+
RUN_ALL=0
198+
fi
199+
if [[ $CRITICAL_CHANGE_DETECTED -eq 1 ]]; then
200+
RUN_ALL=1
201+
echo "RUN_ALL set due to critical changes"
202+
fi
203+
195204
# Decide whether to use precompiled wheels
196-
# Relies on existing patterns array as a basis.
197205
if [[ -n "${VLLM_USE_PRECOMPILED:-}" ]]; then
198206
echo "VLLM_USE_PRECOMPILED is already set to: $VLLM_USE_PRECOMPILED"
199-
elif [[ $RUN_ALL -eq 1 ]]; then
207+
elif [[ $CRITICAL_CHANGE_DETECTED -eq 1 || "${BUILDKITE_BRANCH}" == "main" ]]; then
200208
export VLLM_USE_PRECOMPILED=0
201-
echo "Detected critical changes, building wheels from source"
209+
echo "Detected critical changes or main branch, building wheels from source"
202210
else
203211
export VLLM_USE_PRECOMPILED=1
204212
echo "No critical changes, using precompiled wheels"
205213
fi
206214

215+
# Decide whether to skip building docker images (pull & mount code instead)
216+
if [[ -n "${SKIP_IMAGE_BUILD:-}" ]]; then
217+
echo "SKIP_IMAGE_BUILD is preset to: ${SKIP_IMAGE_BUILD}"
218+
else
219+
if [[ "${VLLM_USE_PRECOMPILED:-}" == "1" && "$CRITICAL_CHANGE_DETECTED" -eq 0 ]]; then
220+
SKIP_IMAGE_BUILD=1
221+
else
222+
SKIP_IMAGE_BUILD=0
223+
fi
224+
fi
225+
226+
# Determine the lowest common ancestor (LCA) commit with main branch if skipping image build
227+
DOCKER_IMAGE_OVERRIDE=""
228+
if [[ "${SKIP_IMAGE_BUILD}" == "1" ]]; then
229+
LCA_COMMIT=""
230+
if git rev-parse --is-inside-work-tree >/dev/null 2>&1; then
231+
LCA_COMMIT=$(git merge-base origin/main HEAD)
232+
fi
233+
if [[ -n "$LCA_COMMIT" ]]; then
234+
IMAGE_TAG="public.ecr.aws/q9t5s3a7/vllm-ci-postmerge-repo:$LCA_COMMIT"
235+
echo "Checking for Docker image for LCA: $IMAGE_TAG"
236+
# Check if the image exists on the registry
237+
if docker manifest inspect "$IMAGE_TAG" >/dev/null 2>&1; then
238+
DOCKER_IMAGE_OVERRIDE="$IMAGE_TAG"
239+
echo "Using Docker image for LCA commit: $DOCKER_IMAGE_OVERRIDE"
240+
else
241+
echo "LCA image not found, falling back to build image"
242+
SKIP_IMAGE_BUILD=0
243+
VLLM_USE_PRECOMPILED=0
244+
fi
245+
else
246+
DOCKER_IMAGE_OVERRIDE="public.ecr.aws/q9t5s3a7/vllm-ci-postmerge-repo:latest"
247+
echo "Could not determine LCA commit, using latest Docker image: $DOCKER_IMAGE_OVERRIDE"
248+
fi
249+
fi
250+
251+
echo "Final SKIP_IMAGE_BUILD=${SKIP_IMAGE_BUILD} (RUN_ALL=${RUN_ALL}, VLLM_USE_PRECOMPILED=${VLLM_USE_PRECOMPILED:-unset})"
252+
253+
################## end WIP #####################
207254

208255
LIST_FILE_DIFF=$(get_diff | tr ' ' '|')
209256
if [[ $BUILDKITE_BRANCH == "main" ]]; then

buildkite/test-template-ci.j2

Lines changed: 135 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,13 @@
1010
{% set docker_image_cu118 = "public.ecr.aws/q9t5s3a7/vllm-ci-postmerge-repo:$BUILDKITE_COMMIT-cu118" %}
1111
{% set docker_image_cpu = "public.ecr.aws/q9t5s3a7/vllm-ci-postmerge-repo:$BUILDKITE_COMMIT-cpu" %}
1212
{% endif %}
13+
{% set skip_image_build = (skip_image_build | default("0")) %}
14+
{% if skip_image_build == "1" and docker_image_override is defined and docker_image_override %}
15+
{% set docker_image = docker_image_override %}
16+
{% set docker_image_torch_nightly = docker_image_override %}
17+
{% set docker_image_cu118 = docker_image_override %}
18+
{% set docker_image_cpu = docker_image_override %}
19+
{% endif %}
1320
{% set docker_image_amd = "rocm/vllm-ci:$BUILDKITE_COMMIT" %}
1421
{% set default_working_dir = "/vllm-workspace/tests" %}
1522
{% set hf_home = "/root/.cache/huggingface" %}
@@ -30,12 +37,71 @@
3037
{%- set tests_only = (tests_acc.only_tests and tests_acc.any) %}
3138
{%- set changed_tests = tests_acc.changed %}
3239

33-
{% macro add_pytest_coverage(cmd, coverage_file) %}
34-
{% if "pytest " in cmd %}
35-
COVERAGE_FILE={{ coverage_file }} {{ cmd | replace("pytest ", "pytest --cov=vllm --cov-report=xml --cov-append --durations=0 ") }} || true
36-
{% else %}
37-
{{ cmd }}
40+
{# --- helpers ------------------------------------------------------------ #}
41+
42+
{# collapse backslash-newline-indentation to a single space, trim ends #}
43+
{% macro clean_cmd(cmd) -%}
44+
{{- cmd
45+
| replace(' \\', '') | trim | safe
46+
-}}
47+
{%- endmacro %}
48+
49+
{# emit step.command / step.commands, cleaned and joined safely #}
50+
{% macro emit_step_commands(step) -%}
51+
{%- if step.command -%}
52+
{{ clean_cmd(step.command) }}
53+
{%- elif step.commands -%}
54+
{%- for c in step.commands -%}
55+
{{ clean_cmd(c) }}{{ " && " if not loop.last else "" }}
56+
{%- endfor -%}
57+
{%- else -%}
58+
echo "No command(s) defined for this step." >&2; exit 2
59+
{%- endif -%}
60+
{%- endmacro %}
61+
62+
{# wrap pytest with coverage flags if present #}
63+
{% macro add_pytest_coverage(cmd, coverage_file) -%}
64+
{%- set c = clean_cmd(cmd) -%}
65+
{%- if "pytest " in c -%}
66+
COVERAGE_FILE={{ coverage_file }} {{ c | replace("pytest ", "pytest --cov=vllm --cov-report= --cov-append --durations=0 ") }} || true
67+
{%- else -%}
68+
{{ c }}
69+
{%- endif -%}
70+
{%- endmacro %}
71+
72+
{# --- main macros -------------------------------------------------------- #}
73+
74+
{% macro vllm_checkoutoverlay_script(step, default_working_dir, skip_image_build, fail_fast, cov_enabled) %}
75+
{% if fail_fast == "true" -%}
76+
set -xeuo pipefail
77+
{%- else -%}
78+
set -xuo pipefail
79+
{%- endif %}
80+
81+
echo "SKIP_IMAGE_BUILD={{ skip_image_build }}"
82+
83+
{% if skip_image_build == "1" %}
84+
# Copy in the code from the checkout to the workspace
85+
rm -rf /vllm-workspace/vllm || true
86+
cp -a /workdir/. /vllm-workspace/
87+
88+
# Overlay the pure-Python vllm into the install package dir
89+
export SITEPKG="$(python3 -c 'import sysconfig; print(sysconfig.get_paths()["purelib"])')"
90+
cp -a /vllm-workspace/vllm/* "$$SITEPKG/vllm/"
91+
92+
# Restore src/ layout, as Dockerfile does. Hides code from tests, but allows setup.
93+
rm -rf /vllm-workspace/src || true
94+
mkdir -p /vllm-workspace/src
95+
mv /vllm-workspace/vllm /vllm-workspace/src/vllm
3896
{% endif %}
97+
98+
(command -v nvidia-smi >/dev/null && nvidia-smi || true)
99+
export VLLM_LOGGING_LEVEL=DEBUG
100+
export VLLM_ALLOW_DEPRECATED_BEAM_SEARCH=1
101+
cd {{ (step.working_dir or default_working_dir) | safe }}
102+
103+
# Run tests with intelligent targeting and coverage
104+
{{ add_docker_pytest_coverage(step, cov_enabled) }}
39105
{% endmacro %}
40106

41107
{% macro add_docker_pytest_coverage(step, cov_enabled) %}
@@ -47,53 +113,55 @@ COVERAGE_FILE={{ coverage_file }} {{ cmd | replace("pytest ", "pytest --cov=vllm
47113
{# Intelligent test targeting: Build matched test targets for this step when only tests changed #}
48114
{%- set match_ns = namespace(targets=[]) %}
49115
{%- if tests_only and step.source_file_dependencies %}
50-
{%- for dep in step.source_file_dependencies %}
51-
{%- if dep[:6] == 'tests/' %}
52-
{%- set dep_rel = dep[6:] %}
53-
{# Handle deps that already end with '/' (e.g., tests/benchmarks/) #}
54-
{%- if dep_rel[-1:] == '/' %}
55-
{%- set dep_dir_prefix = dep_rel %}
56-
{%- set dep_file_name = dep_rel[:-1] ~ '.py' %}
57-
{%- else %}
58-
{%- set dep_dir_prefix = dep_rel ~ '/' %}
59-
{%- set dep_file_name = dep_rel ~ '.py' %}
60-
{%- endif %}
61-
{%- for t in changed_tests %}
62-
{# Check if t starts with dep_dir_prefix (for directories) or equals dep_file_name (for files) #}
63-
{%- set prefix_len = dep_dir_prefix | length %}
64-
{%- set t_prefix = t[:prefix_len] %}
65-
{%- set cond1 = (t | length >= prefix_len and t_prefix == dep_dir_prefix) %}
66-
{%- set cond2 = (t == dep_file_name) %}
67-
{%- if cond1 or cond2 %}
68-
{%- set match_ns.targets = match_ns.targets + [t] %}
69-
{%- endif %}
70-
{%- endfor %}
71-
{%- endif %}
72-
{%- endfor %}
116+
{%- for dep in step.source_file_dependencies %}
117+
{%- if dep[:6] == 'tests/' %}
118+
{%- set dep_rel = dep[6:] %}
119+
{%- if dep_rel[-1:] == '/' %}
120+
{%- set dep_dir_prefix = dep_rel %}
121+
{%- set dep_file_name = dep_rel[:-1] ~ '.py' %}
122+
{%- else %}
123+
{%- set dep_dir_prefix = dep_rel ~ '/' %}
124+
{%- set dep_file_name = dep_rel ~ '.py' %}
125+
{%- endif %}
126+
{%- for t in changed_tests %}
127+
{%- set prefix_len = dep_dir_prefix | length %}
128+
{%- set t_prefix = t[:prefix_len] %}
129+
{%- set cond1 = (t | length >= prefix_len and t_prefix == dep_dir_prefix) %}
130+
{%- set cond2 = (t == dep_file_name) %}
131+
{%- if cond1 or cond2 %}
132+
{%- set match_ns.targets = match_ns.targets + [t] %}
133+
{%- endif %}
134+
{%- endfor %}
135+
{%- endif %}
136+
{%- endfor %}
73137
{%- endif %}
74138
{%- set matched_targets = match_ns.targets %}
75139

76140
{# If we have matched targets, run only those specific tests #}
77141
{% if matched_targets | length > 0 %}
78142
pytest -v -s {{ matched_targets | join(' ') }}
79143
{% else %}
80-
{# Default behavior: preserve original commands with optional coverage injection #}
81-
{% if cov_enabled %}
82-
{% set ns = namespace(has_pytest=false) %}
83-
{% if step.command %}
84-
{% if "pytest " in step.command %}{% set ns.has_pytest = true %}{% endif %}
85-
{{ add_pytest_coverage(step.command, coverage_file) }}
86-
{% else %}
87-
{% for cmd in step.commands %}
88-
{% if "pytest " in cmd %}{% set ns.has_pytest = true %}{% endif %}
89-
{{ add_pytest_coverage(cmd, coverage_file) }}{{ " && " if not loop.last else "" }}{% endfor %}
90-
{% endif %}{% if ns.has_pytest %} && curl -sSL https://raw.githubusercontent.com/vllm-project/ci-infra/{{ vllm_ci_branch | default('main') }}/buildkite/scripts/upload_codecov.sh | bash -s -- \"{{ step.label }}\"{% endif %}
91-
{% else %}
92-
{{ step.command or (step.commands | join(' && ')) | safe }}
93-
{% endif %}
144+
{# Default behavior: preserve original commands with optional coverage injection, all cleaned #}
145+
{% if cov_enabled %}
146+
{% set ns = namespace(has_pytest=false) %}
147+
{% if step.command %}
148+
{% if "pytest " in clean_cmd(step.command) %}{% set ns.has_pytest = true %}{% endif %}
149+
{{ add_pytest_coverage(step.command, coverage_file) }}
150+
{% else %}
151+
{%- for cmd in step.commands -%}
152+
{{ add_pytest_coverage(cmd, coverage_file) }}{{ " && " if not loop.last else "" }}
153+
{%- endfor -%}
154+
{% endif %}
155+
{%- if ns.has_pytest -%}
156+
&& curl -sSL https://raw.githubusercontent.com/vllm-project/ci-infra/{{ vllm_ci_branch | default('main') }}/buildkite/scripts/upload_codecov.sh | bash -s -- "{{ step.label }}"
157+
{%- endif -%}
158+
{% else %}
159+
{{ emit_step_commands(step) }}
160+
{% endif %}
94161
{% endif %}
95162
{% endmacro %}
96163

164+
97165
{% macro render_cuda_config(step, image, default_working_dir, hf_home_fsx, hf_home, branch) %}
98166
agents:
99167
{% if step.label == "Documentation Build" %}
@@ -141,7 +209,11 @@ plugins:
141209
{% if step.label == "Benchmarks" or step.mount_buildkite_agent or cov_enabled %}
142210
mount-buildkite-agent: true
143211
{% endif %}
144-
command: ["bash", "{% if fail_fast == "true" %}-xce{% else %}-xc{% endif %}", "(command nvidia-smi || true) && export VLLM_ALLOW_DEPRECATED_BEAM_SEARCH=1 && cd {{ (step.working_dir or default_working_dir) | safe }} && {{ add_docker_pytest_coverage(step, cov_enabled) }}"]
212+
command:
213+
- "/bin/bash"
214+
- "-c"
215+
- |
216+
{{ vllm_checkoutoverlay_script(step,default_working_dir,skip_image_build,fail_fast,cov_enabled) | indent(12,true) }}
145217
environment:
146218
- VLLM_USAGE_SOURCE=ci-test
147219
- NCCL_CUMEM_HOST_ENABLE=0
@@ -166,7 +238,11 @@ plugins:
166238
always-pull: true
167239
propagate-environment: true
168240
gpus: all
169-
command: ["bash", "{% if fail_fast == "true" %}-xce{% else %}-xc{% endif %}", "(command nvidia-smi || true) && export VLLM_ALLOW_DEPRECATED_BEAM_SEARCH=1 && cd {{ (step.working_dir or default_working_dir) | safe }} && {{ add_docker_pytest_coverage(step, cov_enabled) }}"]
241+
command:
242+
- "/bin/bash"
243+
- "-c"
244+
- |
245+
{{ vllm_checkoutoverlay_script(step,default_working_dir,skip_image_build,fail_fast,cov_enabled) | indent(12,true) }}
170246
environment:
171247
- VLLM_USAGE_SOURCE=ci-test
172248
- NCCL_CUMEM_HOST_ENABLE=0
@@ -190,7 +266,11 @@ plugins:
190266
propagate-environment: true
191267
# gpus will be configured by BUILDKITE_PLUGIN_DOCKER_GPUS in per host environment variable.
192268
# gpus: all
193-
command: ["bash", "{% if fail_fast == "true" %}-xce{% else %}-xc{% endif %}", "(command nvidia-smi || true) && export VLLM_ALLOW_DEPRECATED_BEAM_SEARCH=1 && cd {{ (step.working_dir or default_working_dir) | safe }} && {{ add_docker_pytest_coverage(step, cov_enabled) }}"]
269+
command:
270+
- "/bin/bash"
271+
- "-c"
272+
- |
273+
{{ vllm_checkoutoverlay_script(step,default_working_dir,skip_image_build,fail_fast,cov_enabled) | indent(12,true) }}
194274
environment:
195275
- VLLM_USAGE_SOURCE=ci-test
196276
- NCCL_CUMEM_HOST_ENABLE=0
@@ -251,6 +331,7 @@ plugins:
251331

252332

253333
steps:
334+
{% if skip_image_build != "1" %}
254335
- label: ":docker: build image"
255336
key: image-build
256337
depends_on: ~
@@ -376,6 +457,7 @@ steps:
376457
limit: 2
377458
- exit_status: -10 # Agent was lost
378459
limit: 2
460+
{% endif %}
379461

380462
{% for step in steps %}
381463
{% if step.fast_check_only != true %}
@@ -404,15 +486,23 @@ steps:
404486

405487
{% if ns.blocked == 1 or (step.optional and nightly != "1") %}
406488
- block: "Run {{ step.label }}"
489+
{% if skip_image_build != "1" %}
407490
depends_on: image-build
491+
{% else %}
492+
depends_on: ~
493+
{% endif %}
408494
key: block-{{ step.label | replace(" ", "-") | lower | replace("(", "") | replace(")", "") | replace("%", "") | replace(",", "-") | replace("+", "-") }}
409495
{% endif %}
410496

411497
- label: "{{ step.label }}"
412498
{% if ns.blocked == 1 or (step.optional and nightly != "1") %}
413499
depends_on: block-{{ step.label | replace(" ", "-") | lower | replace("(", "") | replace(")", "") | replace("%", "") | replace(",", "-") | replace("+", "-") }}
414500
{% else %}
501+
{% if skip_image_build != "1" %}
415502
depends_on: {{ "image-build-cpu" if step.no_gpu else "image-build" }}
503+
{% else %}
504+
depends_on: ~
505+
{% endif %}
416506
{% endif %}
417507
soft_fail: {{ step.soft_fail or false }}
418508
{{ render_cuda_config(step, docker_image_cpu if step.no_gpu else docker_image, default_working_dir, hf_home_fsx, hf_home, branch) | indent(4, true) }}

0 commit comments

Comments
 (0)