vllm-project · joerunde · Apr 3, 2025 · Mar 28, 2025 · Mar 28, 2025 · Mar 28, 2025
@@ -1,4 +1,4 @@
-name: ruff
+name: lint code
 
 on:
   # Trigger the workflow on push or pull request,
@@ -36,17 +36,24 @@ jobs:
     steps:
       - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
       - name: Set up Python ${{ matrix.python-version }}
-        uses: actions/setup-python@0b93645e9fea7318ecaed2b359559ac225c90a2b # v5.3.0
+        uses: astral-sh/setup-uv@v5
         with:
           python-version: ${{ matrix.python-version }}
       - name: Install dependencies
-        run: |
-          python -m pip install --upgrade pip
-          pip install -r requirements-lint.txt
+        run: uv sync --frozen --only-group lint
       - name: Analysing the code with ruff
         run: |
           echo "::add-matcher::.github/workflows/matchers/ruff.json"
           ruff check --output-format github .
       - name: Run isort
-        run: |
-          isort . --check-only
+        # using `always()` here ensures all the checks run even if previous
+        # checks fail
+        if: always()
+        run: isort . --check-only
+      - name: run yapf
+        if: always()
+        run: yapf --diff --recursive .
+      - name: Spelling check with codespell
+        if: always()
+        run: codespell --toml pyproject.toml
+
@@ -15,7 +15,7 @@ jobs:
               owner: context.repo.owner,
               repo: context.repo.repo,
               issue_number: context.issue.number,
-              body: "👋 Hi! Thank you for contributing to vLLM support on Spyre.\n Just a reminder: Make sure that your code passes all the linting checks, otherwise your PR won't be able to be merged. To do so, first install the linting requirements, then run `format.sh` and commit the changes:\n```\npip install -r requirements-lint.txt\nbash format.sh\n```\nNow you are good to go 🚀"
+              body: "👋 Hi! Thank you for contributing to vLLM support on Spyre.\n Just a reminder: Make sure that your code passes all the linting checks, otherwise your PR won't be able to be merged. To do so, first install the linting requirements, then run `format.sh` and commit the changes. This can be done with `uv` directly: \n```\nuv sync --frozen --group lint\n```\n Or this can be done with `pip`: \n```\nuv pip compile --group lint > requirements-lint.txt\npip install -r requirements-lint.txt\nbash format.sh\n```\nNow you are good to go 🚀"
             })
         env:
           GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
@@ -12,7 +12,7 @@ jobs:
     - name: Run Spyre tests within docker container
       run: |
         docker run -i --rm --entrypoint /bin/bash vllm-spyre -c '''
-          pip install pytest sentence-transformers pytest-timeout pytest-forked && \
+          source vllm-spyre/.venv/bin/activate && \
           python -c "from transformers import pipeline; pipeline(\"text-generation\", model=\"JackFram/llama-160m\")" && \
           export VARIANT=$(ls /root/.cache/huggingface/hub/models--JackFram--llama-160m/snapshots/) && \
           mkdir -p /models && \

@@ -1 +1,5 @@
 vllm_spyre/model_executor/model_loader/spyre_setup.py
+.venv
+.conda
+.mypy_cache
+.pytest_cache
@@ -15,21 +15,19 @@ RUN microdnf update -y && microdnf install -y \
 RUN ln -sf $(which python${PYTHON_VERSION}) /usr/bin/python && \
     ln -sf $(which pip${PYTHON_VERSION}) /usr/bin/pip
 
-# Download and install vllm ###########################################################
-RUN git clone --depth 1 https://github.com/vllm-project/vllm.git \
-    && cd vllm \
-    && git fetch --tags \
-    && git checkout v0.8.0 \
-    && python -m pip install --upgrade pip \
-    && pip3 install torch=="2.5.1+cpu" --index-url https://download.pytorch.org/whl/cpu \
-    && python use_existing_torch.py \
-    && pip install -r requirements/build.txt \
-    && SETUPTOOLS_SCM_PRETEND_VERSION=0.8.0 VLLM_TARGET_DEVICE=empty pip install --verbose . --no-build-isolation
-
 # Install vllm Spyre plugin ##################################################################
 RUN mkdir /workspace/vllm-spyre
 COPY . /workspace/vllm-spyre
-RUN cd /workspace/vllm-spyre && pip install -v -e .
+# Torch must be installed first
+RUN pip install torch==2.5.1+cpu --index-url https://download.pytorch.org/whl/cpu
+# Install uv
+RUN pip install uv
+# Install the plugin in a new venv, along with dev deps to test with
+RUN cd /workspace/vllm-spyre \
+    && uv venv .venv --system-site-packages \
+    && source .venv/bin/activate \
+    && VLLM_TARGET_DEVICE=empty uv pip install -v -e . --system \
+    && uv sync --frozen --group dev
 ENV VLLM_PLUGINS=spyre
 
 CMD ["/bin/bash"]
@@ -22,13 +22,17 @@ docker run -it --rm vllm-spyre bash
 
 ### In a local environment
 
+We use the [uv](https://docs.astral.sh/uv/) package manager to manage the
+installation of the plugin and its dependencies. `uv` provides advanced
+dependency resolution which is required to properly install dependencies like
+`vllm` without overwriting critical dependencies like `torch`.
+
 ```
-# Install vllm
-pip install vllm==0.8.0
+# Install uv
+pip install uv
 
 # Install vllm-spyre
-cd ..
 git clone https://github.com/vllm-project/vllm-spyre.git
 cd vllm-spyre
-pip install -v -e .
+VLLM_TARGET_DEVICE=empty uv pip install -e .
 ```
@@ -43,7 +43,7 @@ PYMARKDOWNLNT_VERSION=$(pymarkdownlnt version | awk '{print $1}')
 
 # params: tool name, tool version, required version
 tool_version_check() {
-    expected=$(grep "$1" requirements-lint.txt | cut -d'=' -f3)
+    expected=$(grep "\"$1" pyproject.toml | cut -d'=' -f3 | cut -d'"' -f1)
     if [[ "$2" != "$expected" ]]; then
         echo "❓❓Wrong $1 version installed: $expected is required, not $2."
         exit 1

@@ -1,6 +1,46 @@
+[build-system]
+requires = [
+  "setuptools>=48",
+]
+build-backend = "setuptools.build_meta"
+
+[project]
+name = "vllm-spyre"
+description = "vLLM plugin for Spyre hardware support"
+readme = "README.md"
+license = {text = "Apache 2"}
+version = "0.0.0" # TODO dynamic versioning
+dependencies = [
+    "fms-model-optimizer>=0.2.0",
+    "ibm-fms==0.0.8",
+    "vllm",
+]
+
+[project.entry-points."vllm.platform_plugins"]
+spyre = "vllm_spyre:register"
+
+[tool.setuptools.packages.find]
+where = ["."]  # list of folders that contain the packages (["."] by default)
+include = ["vllm_spyre"]  # package names should match these glob patterns (["*"] by default)
+exclude = []  # exclude packages matching these glob patterns (empty by default)
+namespaces = false  # to disable scanning PEP 420 namespaces (true by default)
+
+
 [tool.setuptools_scm]
 # version_file = "vllm_spyre/_version.py" # currently handled by `setup.py:get_version()`
 
+[tool.uv]
+# Never install torch, so that no dependencies can override it.
+# This requires that torch is installed separately in the target environment
+override-dependencies = [
+    "torch; sys_platform == 'never'",
+    "torchaudio; sys_platform == 'never'",
+    "torchvision; sys_platform == 'never'",
+]
+
+[tool.uv.sources]
+vllm = { git = "https://github.com/vllm-project/vllm", rev = "v0.8.0" }
+
 [tool.ruff]
 # Allow lines to be as long as 80.
 line-length = 80
@@ -78,3 +118,26 @@ plugins.md013.enabled = false # line-length
 plugins.md041.enabled = false # first-line-h1
 plugins.md033.enabled = false # inline-html
 plugins.md024.allow_different_nesting = true # no-duplicate-headers
+
+[dependency-groups]
+dev = [
+    "pytest==8.3.4",
+    "pytest-forked==1.6.0",
+    "pytest-timeout==2.3.1",
+    "requests==2.32.3",
+    "sentence-transformers==3.4.1",
+]
+lint = [
+    "clang-format==18.1.5",
+    "codespell==2.3.0",
+    "isort==5.13.2",
+    "mypy==1.11.1",
+    "pymarkdownlnt==0.9.26",
+    "ruff==0.6.5",
+    "toml==0.10.2",
+    "tomli==2.0.2",
+    "types-pyyaml>=6.0.12.20250326",
+    "types-requests>=2.32.0.20250328",
+    "types-setuptools>=77.0.2.20250328",
+    "yapf==0.43.0",
+]
@@ -18,9 +18,9 @@
     " chicken soup for a family of four.", "Hello",
     "What is the weather today like?", "Who are you?"
 ]])
-@pytest.mark.parametrize("warmup_shape", [(64, 20, 4), (64, 20, 8),
-                                          (128, 20, 4), (128, 20, 8)]
-                         )  # (prompt_length/new_tokens/batch_size)
+@pytest.mark.parametrize(
+    "warmup_shape", [(64, 20, 4), (64, 20, 8), (128, 20, 4),
+                     (128, 20, 8)])  # (prompt_length/new_tokens/batch_size)
 @pytest.mark.parametrize("backend", get_spyre_backend_list())
 @pytest.mark.parametrize("vllm_version", ["V0", "V1"])
 def test_output(

@@ -26,8 +26,8 @@
                                      [prompt2, prompt2, prompt2, prompt1],
                                      [prompt2, prompt2, prompt2, prompt2]])
 @pytest.mark.parametrize("stop_last", [True, False])
-@pytest.mark.parametrize("warmup_shape", [(64, 10, 4)]
-                         )  # (prompt_length/new_tokens/batch_size)
+@pytest.mark.parametrize(
+    "warmup_shape", [(64, 10, 4)])  # (prompt_length/new_tokens/batch_size)
 @pytest.mark.parametrize("backend", get_spyre_backend_list())
 @pytest.mark.parametrize("vllm_version", ["V0", "V1"])
 def test_output(

@@ -25,9 +25,10 @@
         " words what you are."
     ]
 ])
-@pytest.mark.parametrize("warmup_shapes",
-                         [[(64, 20, 4)], [(64, 20, 4), (128, 20, 4)]]
-                         )  # (prompt_length/new_tokens/batch_size)
+@pytest.mark.parametrize(
+    "warmup_shapes",
+    [[(64, 20, 4)], [(64, 20, 4),
+                     (128, 20, 4)]])  # (prompt_length/new_tokens/batch_size)
 @pytest.mark.parametrize("backend", get_spyre_backend_list())
 @pytest.mark.parametrize("vllm_version", ["V0", "V1"])
 def test_output(