From b86dee0d8b866c470a40e1a13b47471ea75ba91e Mon Sep 17 00:00:00 2001 From: sammy javed Date: Tue, 23 Jun 2026 12:15:00 -0700 Subject: [PATCH 1/2] ci(deps): add dependency denylist + installed-footprint +5% ratchet --- .github/workflows/installer-ci.yml | 35 +++++++++++++++++++ tests/installer/footprint_baseline.env | 11 ++++++ tests/test_dependency_budget.py | 48 ++++++++++++++++++++++++++ 3 files changed, 94 insertions(+) create mode 100644 tests/installer/footprint_baseline.env create mode 100644 tests/test_dependency_budget.py diff --git a/.github/workflows/installer-ci.yml b/.github/workflows/installer-ci.yml index 0553dbba..175787c8 100644 --- a/.github/workflows/installer-ci.yml +++ b/.github/workflows/installer-ci.yml @@ -9,6 +9,7 @@ on: - "scripts/publish_release.py" - "tests/installer/**" - "pyproject.toml" + - "poetry.lock" - ".github/workflows/installer-ci.yml" concurrency: @@ -71,6 +72,40 @@ jobs: name: pr-wheel path: dist/vastai-*.whl + # Footprint ratchet: install THIS PR's closure and fail if it grew > 5% over + # the committed baseline. A CLI shouldn't quietly balloon (transformers added + # ~138 MiB via #424). On a real increase the dev either slims the PR or bumps + # tests/installer/footprint_baseline.env deliberately — the diff records intent. + # Hermetic uv measure on one canonical platform (linux x86_64); the per-package + # denylist in tests/test_dependency_budget.py covers the cross-platform angle. + footprint-budget: + needs: build-wheel + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - uses: actions/download-artifact@v4 + with: + name: pr-wheel + path: dist + - name: install uv + run: curl -LsSf https://astral.sh/uv/install.sh | sh + - name: measure dependency footprint vs baseline (+5% ratchet) + run: | + . tests/installer/footprint_baseline.env + export PATH="$HOME/.local/bin:$PATH" + uv venv /tmp/fp --python 3.12 --quiet + uv pip install --python /tmp/fp/bin/python --quiet dist/vastai-*.whl + SITE="$(find /tmp/fp/lib -type d -name site-packages)" + BYTES="$(du -sb "$SITE" | cut -f1)" + CEIL=$(( FOOTPRINT_BASELINE_BYTES * 105 / 100 )) + PCT=$(( (BYTES - FOOTPRINT_BASELINE_BYTES) * 100 / FOOTPRINT_BASELINE_BYTES )) + printf 'footprint: %d MiB | baseline: %d MiB | delta: %d%% | ceiling(+5%%): %d MiB\n' \ + $((BYTES/1048576)) $((FOOTPRINT_BASELINE_BYTES/1048576)) "$PCT" $((CEIL/1048576)) + if [ "$BYTES" -gt "$CEIL" ]; then + echo "::error::dependency footprint grew ${PCT}% (>5%). Slim the PR, or bump FOOTPRINT_BASELINE_BYTES in tests/installer/footprint_baseline.env deliberately." + exit 1 + fi + # Real end-to-end install across the OS matrix — the prebuilt-runtime promise. # The release manifest (uv pin, CPython) comes from the published Release; # VASTAI_PIP_SPEC overrides the wheel to this PR's build. Linux variants run in diff --git a/tests/installer/footprint_baseline.env b/tests/installer/footprint_baseline.env new file mode 100644 index 00000000..c0dd388a --- /dev/null +++ b/tests/installer/footprint_baseline.env @@ -0,0 +1,11 @@ +# Baseline installed dependency footprint: site-packages bytes for the vastai +# closure on linux x86_64 (managed CPython 3.12). Read by installer-ci.yml's +# footprint-budget job, which fails CI if a PR grows the closure more than +5%. +# +# When a PR legitimately needs the extra weight, bump this number DELIBERATELY in +# its own reviewed line — that diff is the record that the growth was intentional +# (vs. an accidental heavy dep, the way transformers slipped in via #424). The +# job prints the measured size + delta% to make re-baselining trivial. +# +# ~127 MiB. (Was ~276 MiB before the transformers cluster was dropped.) +FOOTPRINT_BASELINE_BYTES=133476211 diff --git a/tests/test_dependency_budget.py b/tests/test_dependency_budget.py new file mode 100644 index 00000000..4302bb30 --- /dev/null +++ b/tests/test_dependency_budget.py @@ -0,0 +1,48 @@ +"""Guard the CLI's dependency closure against bloat regressions. + +A CLI has no business pulling an ML / scientific-computing stack. `transformers` +once slipped in via a version-bump PR (#424) and silently dragged ~138 MiB of +transitive deps (numpy, tokenizers, hf-xet, ...) into every install. This test +fails the instant any of that re-enters `poetry.lock` — the full *transitive* +closure, since that's where the weight hides. + +This is the cheap, deterministic, platform-agnostic guard (Layer 1). The +installed-size ratchet in installer-ci.yml (Layer 2) is the complementary +backstop for an existing dep ballooning without a new name appearing. +""" + +import pathlib +import tomllib + +LOCK = pathlib.Path(__file__).resolve().parent.parent / "poetry.lock" + +# Packages that must never appear in the CLI's resolved dependency closure. +# Add here (with intent) if one ever becomes genuinely required. +FORBIDDEN = { + "transformers", + "tokenizers", + "huggingface-hub", + "hf-xet", + "hf-transfer", + "safetensors", + "numpy", + "scipy", + "pandas", + "torch", + "tensorflow", + "nltk", +} + + +def _closure_names(): + data = tomllib.loads(LOCK.read_text()) + return {pkg["name"].lower() for pkg in data["package"]} + + +def test_no_heavy_deps_in_closure(): + intruders = sorted(_closure_names() & FORBIDDEN) + assert not intruders, ( + f"heavy dependency re-entered the CLI closure: {intruders}. " + "A CLI should not pull an ML/scientific stack — slim the change, or if " + "this is genuinely required, remove it from FORBIDDEN with justification." + ) From 1b5e220e86fe618cf51eef4af09a45999ba800d5 Mon Sep 17 00:00:00 2001 From: sammy javed Date: Tue, 23 Jun 2026 14:09:00 -0700 Subject: [PATCH 2/2] =?UTF-8?q?ci(deps):=20address=20review=20=E2=80=94=20?= =?UTF-8?q?collect=20dep=20test,=20pin=20uv,=20harden=20footprint=20measur?= =?UTF-8?q?e?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .github/workflows/installer-ci.yml | 8 ++++++-- tests/{ => cli}/test_dependency_budget.py | 22 +++++++++++++++++----- 2 files changed, 23 insertions(+), 7 deletions(-) rename tests/{ => cli}/test_dependency_budget.py (62%) diff --git a/.github/workflows/installer-ci.yml b/.github/workflows/installer-ci.yml index 175787c8..f24b9622 100644 --- a/.github/workflows/installer-ci.yml +++ b/.github/workflows/installer-ci.yml @@ -88,14 +88,18 @@ jobs: name: pr-wheel path: dist - name: install uv - run: curl -LsSf https://astral.sh/uv/install.sh | sh + # Pinned to the same uv the release manifest ships (make_manifest.py). + run: curl -LsSf https://astral.sh/uv/0.11.21/install.sh | sh - name: measure dependency footprint vs baseline (+5% ratchet) run: | + set -euo pipefail . tests/installer/footprint_baseline.env export PATH="$HOME/.local/bin:$PATH" uv venv /tmp/fp --python 3.12 --quiet uv pip install --python /tmp/fp/bin/python --quiet dist/vastai-*.whl - SITE="$(find /tmp/fp/lib -type d -name site-packages)" + # Ask the venv's own interpreter for site-packages — one deterministic + # path, vs. a find that could match zero or several dirs. + SITE="$(/tmp/fp/bin/python -c 'import sysconfig; print(sysconfig.get_paths()["purelib"])')" BYTES="$(du -sb "$SITE" | cut -f1)" CEIL=$(( FOOTPRINT_BASELINE_BYTES * 105 / 100 )) PCT=$(( (BYTES - FOOTPRINT_BASELINE_BYTES) * 100 / FOOTPRINT_BASELINE_BYTES )) diff --git a/tests/test_dependency_budget.py b/tests/cli/test_dependency_budget.py similarity index 62% rename from tests/test_dependency_budget.py rename to tests/cli/test_dependency_budget.py index 4302bb30..830c0229 100644 --- a/tests/test_dependency_budget.py +++ b/tests/cli/test_dependency_budget.py @@ -6,15 +6,27 @@ fails the instant any of that re-enters `poetry.lock` — the full *transitive* closure, since that's where the weight hides. -This is the cheap, deterministic, platform-agnostic guard (Layer 1). The -installed-size ratchet in installer-ci.yml (Layer 2) is the complementary -backstop for an existing dep ballooning without a new name appearing. +Lives under tests/cli/ so the `pytest cli api sdk` invocation in +vast-sdk-testing.yml actually collects it. This is the cheap, deterministic, +platform-agnostic guard (Layer 1); the installed-size ratchet in +installer-ci.yml (Layer 2) is the complementary backstop for an existing dep +ballooning without a new package name appearing. """ import pathlib import tomllib -LOCK = pathlib.Path(__file__).resolve().parent.parent / "poetry.lock" + +def _find_lock() -> pathlib.Path: + """Walk up from this file to the repo's poetry.lock (robust to test location).""" + for parent in pathlib.Path(__file__).resolve().parents: + candidate = parent / "poetry.lock" + if candidate.exists(): + return candidate + raise FileNotFoundError("poetry.lock not found in any parent directory") + + +LOCK = _find_lock() # Packages that must never appear in the CLI's resolved dependency closure. # Add here (with intent) if one ever becomes genuinely required. @@ -35,7 +47,7 @@ def _closure_names(): - data = tomllib.loads(LOCK.read_text()) + data = tomllib.loads(LOCK.read_text(encoding="utf-8")) return {pkg["name"].lower() for pkg in data["package"]}