diff --git a/.github/workflows/installer-ci.yml b/.github/workflows/installer-ci.yml index 0553dbba..f24b9622 100644 --- a/.github/workflows/installer-ci.yml +++ b/.github/workflows/installer-ci.yml @@ -9,6 +9,7 @@ on: - "scripts/publish_release.py" - "tests/installer/**" - "pyproject.toml" + - "poetry.lock" - ".github/workflows/installer-ci.yml" concurrency: @@ -71,6 +72,44 @@ jobs: name: pr-wheel path: dist/vastai-*.whl + # Footprint ratchet: install THIS PR's closure and fail if it grew > 5% over + # the committed baseline. A CLI shouldn't quietly balloon (transformers added + # ~138 MiB via #424). On a real increase the dev either slims the PR or bumps + # tests/installer/footprint_baseline.env deliberately — the diff records intent. + # Hermetic uv measure on one canonical platform (linux x86_64); the per-package + # denylist in tests/test_dependency_budget.py covers the cross-platform angle. + footprint-budget: + needs: build-wheel + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - uses: actions/download-artifact@v4 + with: + name: pr-wheel + path: dist + - name: install uv + # Pinned to the same uv the release manifest ships (make_manifest.py). + run: curl -LsSf https://astral.sh/uv/0.11.21/install.sh | sh + - name: measure dependency footprint vs baseline (+5% ratchet) + run: | + set -euo pipefail + . tests/installer/footprint_baseline.env + export PATH="$HOME/.local/bin:$PATH" + uv venv /tmp/fp --python 3.12 --quiet + uv pip install --python /tmp/fp/bin/python --quiet dist/vastai-*.whl + # Ask the venv's own interpreter for site-packages — one deterministic + # path, vs. a find that could match zero or several dirs. + SITE="$(/tmp/fp/bin/python -c 'import sysconfig; print(sysconfig.get_paths()["purelib"])')" + BYTES="$(du -sb "$SITE" | cut -f1)" + CEIL=$(( FOOTPRINT_BASELINE_BYTES * 105 / 100 )) + PCT=$(( (BYTES - FOOTPRINT_BASELINE_BYTES) * 100 / FOOTPRINT_BASELINE_BYTES )) + printf 'footprint: %d MiB | baseline: %d MiB | delta: %d%% | ceiling(+5%%): %d MiB\n' \ + $((BYTES/1048576)) $((FOOTPRINT_BASELINE_BYTES/1048576)) "$PCT" $((CEIL/1048576)) + if [ "$BYTES" -gt "$CEIL" ]; then + echo "::error::dependency footprint grew ${PCT}% (>5%). Slim the PR, or bump FOOTPRINT_BASELINE_BYTES in tests/installer/footprint_baseline.env deliberately." + exit 1 + fi + # Real end-to-end install across the OS matrix — the prebuilt-runtime promise. # The release manifest (uv pin, CPython) comes from the published Release; # VASTAI_PIP_SPEC overrides the wheel to this PR's build. Linux variants run in diff --git a/tests/cli/test_dependency_budget.py b/tests/cli/test_dependency_budget.py new file mode 100644 index 00000000..830c0229 --- /dev/null +++ b/tests/cli/test_dependency_budget.py @@ -0,0 +1,60 @@ +"""Guard the CLI's dependency closure against bloat regressions. + +A CLI has no business pulling an ML / scientific-computing stack. `transformers` +once slipped in via a version-bump PR (#424) and silently dragged ~138 MiB of +transitive deps (numpy, tokenizers, hf-xet, ...) into every install. This test +fails the instant any of that re-enters `poetry.lock` — the full *transitive* +closure, since that's where the weight hides. + +Lives under tests/cli/ so the `pytest cli api sdk` invocation in +vast-sdk-testing.yml actually collects it. This is the cheap, deterministic, +platform-agnostic guard (Layer 1); the installed-size ratchet in +installer-ci.yml (Layer 2) is the complementary backstop for an existing dep +ballooning without a new package name appearing. +""" + +import pathlib +import tomllib + + +def _find_lock() -> pathlib.Path: + """Walk up from this file to the repo's poetry.lock (robust to test location).""" + for parent in pathlib.Path(__file__).resolve().parents: + candidate = parent / "poetry.lock" + if candidate.exists(): + return candidate + raise FileNotFoundError("poetry.lock not found in any parent directory") + + +LOCK = _find_lock() + +# Packages that must never appear in the CLI's resolved dependency closure. +# Add here (with intent) if one ever becomes genuinely required. +FORBIDDEN = { + "transformers", + "tokenizers", + "huggingface-hub", + "hf-xet", + "hf-transfer", + "safetensors", + "numpy", + "scipy", + "pandas", + "torch", + "tensorflow", + "nltk", +} + + +def _closure_names(): + data = tomllib.loads(LOCK.read_text(encoding="utf-8")) + return {pkg["name"].lower() for pkg in data["package"]} + + +def test_no_heavy_deps_in_closure(): + intruders = sorted(_closure_names() & FORBIDDEN) + assert not intruders, ( + f"heavy dependency re-entered the CLI closure: {intruders}. " + "A CLI should not pull an ML/scientific stack — slim the change, or if " + "this is genuinely required, remove it from FORBIDDEN with justification." + ) diff --git a/tests/installer/footprint_baseline.env b/tests/installer/footprint_baseline.env new file mode 100644 index 00000000..c0dd388a --- /dev/null +++ b/tests/installer/footprint_baseline.env @@ -0,0 +1,11 @@ +# Baseline installed dependency footprint: site-packages bytes for the vastai +# closure on linux x86_64 (managed CPython 3.12). Read by installer-ci.yml's +# footprint-budget job, which fails CI if a PR grows the closure more than +5%. +# +# When a PR legitimately needs the extra weight, bump this number DELIBERATELY in +# its own reviewed line — that diff is the record that the growth was intentional +# (vs. an accidental heavy dep, the way transformers slipped in via #424). The +# job prints the measured size + delta% to make re-baselining trivial. +# +# ~127 MiB. (Was ~276 MiB before the transformers cluster was dropped.) +FOOTPRINT_BASELINE_BYTES=133476211