From eb962702b24b0cd8c226b69f05963ba13370890a Mon Sep 17 00:00:00 2001 From: Christian Kadner Date: Mon, 24 Mar 2025 16:39:29 -0700 Subject: [PATCH 1/2] [CI/BUILD] Use build cache for Spyre tests - Update GH workflow for Spyre tests to ... - build and cache a test image - use the cached test image for both v0 and v1 tests - run v0 and v1 tests in parallel - Add build stages to Dockerfile, with separate stage for tests - Add .dockerignore Signed-off-by: Christian Kadner --- .dockerignore | 8 ++ .github/workflows/test-spyre.yml | 144 ++++++++++++++++++++++++++----- Dockerfile.spyre | 63 ++++++++++++-- 3 files changed, 187 insertions(+), 28 deletions(-) create mode 100644 .dockerignore diff --git a/.dockerignore b/.dockerignore new file mode 100644 index 000000000..a72060d37 --- /dev/null +++ b/.dockerignore @@ -0,0 +1,8 @@ +# exclude any files inside the .git folder to not invalidate docker layer caches +.git + +# exclude any files that are not required in the build image +.github +examples +tools +*.md diff --git a/.github/workflows/test-spyre.yml b/.github/workflows/test-spyre.yml index 1dfec29af..23d771e75 100644 --- a/.github/workflows/test-spyre.yml +++ b/.github/workflows/test-spyre.yml @@ -1,29 +1,133 @@ name: test-sypre -on: pull_request +on: + workflow_dispatch: + push: + paths-ignore: + - "**.md" + pull_request: + paths-ignore: + - "**.md" + +defaults: + run: + shell: bash + +env: + TEST_IMAGE_NAME: "vllm-spyre" jobs: - test-spyre: + build: runs-on: ubuntu-latest + permissions: + packages: write + contents: read + env: + CACHE_IMAGE: "ghcr.io/vllm-project/vllm-spyre:test-cache" + CACHE_REGISTRY: "ghcr.io" + steps: - - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 - - name: Build docker image - run: docker build . -t vllm-spyre -f Dockerfile.spyre - - name: Run Spyre tests within docker container + - name: "Checkout" + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 + + - name: "Setup Docker Buildx" + uses: docker/setup-buildx-action@v3 + + - name: "Log in to cache image container registry" + uses: docker/login-action@v3 + if: github.event_name != 'pull_request' + with: + registry: ${{ env.CACHE_REGISTRY }} + username: ${{ github.repository_owner }} + password: ${{ secrets.GITHUB_TOKEN }} + + - name: "Set build cache target" + run: | + # For push to `main` (PR merged), push new cache image with all layers (cache-mode=max). + # For PR builds, use GitHub action cache which isolates cached layers by PR/branch + # to optimize builds for subsequent pushes to the same PR/branch. + # Do not set a cache-to image for PR builds to not overwrite the `main` cache image + # to not ping-pong cache images for two or more concurrent PRs. + # Do not push cache images for multiple branches to not exceed GitHub package + # usage and traffic limitations. + # NOTE 2024/03/24: GHA cache appears to have issues, cannot use `cache-to: gha,mode=min` + # if `cache-from: registry,...,mode=max` but `cache-to: gha,mode=max` it takes longer than + # uncached build and exhausts GHA cache size, so use cache `type=inline` (no external cache). + if [ "${{ github.event_name }}" == "pull_request" ] + then + #CACHE_TO="type=gha,mode=min" + CACHE_TO="type=inline" + else + CACHE_TO="type=registry,ref=${{ env.CACHE_IMAGE }},mode=max" + fi + echo "CACHE_TO=$CACHE_TO" >> "$GITHUB_ENV" + + - name: "Build test image" + uses: docker/build-push-action@v5 + with: + context: . + file: Dockerfile.spyre + target: "spyre-tests" + tags: ${{ env.TEST_IMAGE_NAME }} + cache-from: | + type=gha + type=registry,ref=${{ env.CACHE_IMAGE }} + cache-to: ${{ env.CACHE_TO }} + outputs: type=docker,dest=${{ runner.temp }}/test_image.tar + + - name: "Upload test image" + uses: actions/upload-artifact@v4 + with: + name: "test-image" + path: ${{ runner.temp }}/test_image.tar + retention-days: 1 + + test-spyre-v0: + runs-on: ubuntu-latest + needs: build + steps: + - name: "Checkout" + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 + + - name: "Download test image" + uses: actions/download-artifact@v4 + with: + name: "test-image" + path: ${{ runner.temp }} + + - name: "Load test image" + run: | + docker load --input ${{ runner.temp }}/test_image.tar + docker image ls -a + + - name: Run Spyre tests in docker container (V0 and eager) + run: | + docker run -i --rm --entrypoint /bin/bash ${{ env.TEST_IMAGE_NAME }} -c ''' + cd vllm-spyre && \ + python -m pytest --timeout=300 tests -v -k "V0 and eager" + ''' + + test-spyre-v1: + runs-on: ubuntu-latest + needs: build + steps: + - name: "Checkout" + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 + + - name: "Download test image" + uses: actions/download-artifact@v4 + with: + name: "test-image" + path: ${{ runner.temp }} + + - name: "Load test image" + run: | + docker load --input ${{ runner.temp }}/test_image.tar + docker image ls -a + + - name: Run Spyre tests in docker container (V1 and eager) run: | - docker run -i --rm --entrypoint /bin/bash vllm-spyre -c ''' - pip install pytest sentence-transformers pytest-timeout pytest-forked && \ - python -c "from transformers import pipeline; pipeline(\"text-generation\", model=\"JackFram/llama-160m\")" && \ - export VARIANT=$(ls /root/.cache/huggingface/hub/models--JackFram--llama-160m/snapshots/) && \ - mkdir -p /models && \ - ln -s /root/.cache/huggingface/hub/models--JackFram--llama-160m/snapshots/${VARIANT} /models/llama-194m && \ - python -c "from sentence_transformers import SentenceTransformer; SentenceTransformer(\"sentence-transformers/all-roberta-large-v1\")" && \ - export VARIANT=$(ls /root/.cache/huggingface/hub/models--sentence-transformers--all-roberta-large-v1/snapshots/) && \ - ln -s /root/.cache/huggingface/hub/models--sentence-transformers--all-roberta-large-v1/snapshots/${VARIANT} /models/all-roberta-large-v1 && \ - export MASTER_PORT=12355 && \ - export MASTER_ADDR=localhost && \ - export DISTRIBUTED_STRATEGY_IGNORE_MODULES=WordEmbedding && \ + docker run -i --rm --entrypoint /bin/bash ${{ env.TEST_IMAGE_NAME }} -c ''' cd vllm-spyre && \ - python -m pytest --timeout=300 tests -v -k "V0 and eager" && \ - python -m pytest --forked --timeout=300 tests -v -k "V1- and eager" + python -m pytest --forked --timeout=300 tests -v -k "V1 and eager" ''' diff --git a/Dockerfile.spyre b/Dockerfile.spyre index cfdbfcc60..b48b9c102 100644 --- a/Dockerfile.spyre +++ b/Dockerfile.spyre @@ -1,21 +1,35 @@ -# Global Args ################################################################# +## Global Args ################################################################# ARG BASE_UBI_IMAGE_TAG=9.4 ARG PYTHON_VERSION=3.12 -# Base Layer ################################################################## + +## Base Layer ################################################################## FROM registry.access.redhat.com/ubi9/ubi-minimal:${BASE_UBI_IMAGE_TAG} AS base + ARG PYTHON_VERSION ENV PYTHON_VERSION=${PYTHON_VERSION} WORKDIR /workspace -# Install some basic utilities ################################################################## -RUN microdnf update -y && microdnf install -y \ - python${PYTHON_VERSION}-devel python${PYTHON_VERSION}-pip python${PYTHON_VERSION}-wheel git vim gcc g++ kmod\ +# Install basic utilities +RUN microdnf update -y \ + && microdnf install -y \ + python${PYTHON_VERSION}-devel \ + python${PYTHON_VERSION}-pip \ + python${PYTHON_VERSION}-wheel \ + git \ + vim \ + gcc \ + g++ \ + kmod \ && microdnf clean all RUN ln -sf $(which python${PYTHON_VERSION}) /usr/bin/python && \ ln -sf $(which pip${PYTHON_VERSION}) /usr/bin/pip -# Download and install vllm ########################################################### + +## vLLM Base ################################################################### +FROM base as vllm-base + +# Download and install vllm RUN git clone --depth 1 https://github.com/vllm-project/vllm.git \ && cd vllm \ && git fetch --tags \ @@ -26,10 +40,43 @@ RUN git clone --depth 1 https://github.com/vllm-project/vllm.git \ && pip install -r requirements/build.txt \ && SETUPTOOLS_SCM_PRETEND_VERSION=0.8.0 VLLM_TARGET_DEVICE=empty pip install --verbose . --no-build-isolation -# Install vllm Spyre plugin ################################################################## + +## Spyre Base ################################################################## +FROM vllm-base as spyre-base + +# Install vllm Spyre plugin RUN mkdir /workspace/vllm-spyre COPY . /workspace/vllm-spyre RUN cd /workspace/vllm-spyre && pip install -v -e . ENV VLLM_PLUGINS=spyre -CMD ["/bin/bash"] \ No newline at end of file + +## Spyre Tests ################################################################# +FROM spyre-base as spyre-tests + +# set environment variables to run tests +ENV MASTER_ADDR=localhost \ + MASTER_PORT=12355 \ + DISTRIBUTED_STRATEGY_IGNORE_MODULES=WordEmbedding + +# Install test dependencies +RUN pip install \ + sentence-transformers \ + pytest \ + pytest-timeout \ + pytest-forked + +# Download models +RUN mkdir -p /models \ + && python -c "from transformers import pipeline; pipeline('text-generation', model='JackFram/llama-160m')" \ + && export VARIANT=$(ls /root/.cache/huggingface/hub/models--JackFram--llama-160m/snapshots/) \ + && ln -s /root/.cache/huggingface/hub/models--JackFram--llama-160m/snapshots/${VARIANT} /models/llama-194m \ + && python -c "from sentence_transformers import SentenceTransformer; SentenceTransformer('sentence-transformers/all-roberta-large-v1')" \ + && export VARIANT=$(ls /root/.cache/huggingface/hub/models--sentence-transformers--all-roberta-large-v1/snapshots/) \ + && ln -s /root/.cache/huggingface/hub/models--sentence-transformers--all-roberta-large-v1/snapshots/${VARIANT} /models/all-roberta-large-v1 + + +## Spyre Release ############################################################### +FROM spyre-base as spyre-release + +CMD ["/bin/bash"] From 10794611cf81648ee3fb29b0beb8056b7eacc407 Mon Sep 17 00:00:00 2001 From: Christian Kadner Date: Thu, 27 Mar 2025 19:19:59 -0700 Subject: [PATCH 2/2] Clean up untagged cache images Signed-off-by: Christian Kadner --- .github/workflows/test-spyre.yml | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/.github/workflows/test-spyre.yml b/.github/workflows/test-spyre.yml index cf5a097d7..8f192c1b3 100644 --- a/.github/workflows/test-spyre.yml +++ b/.github/workflows/test-spyre.yml @@ -24,6 +24,7 @@ jobs: contents: read env: CACHE_IMAGE: "ghcr.io/vllm-project/vllm-spyre:test-cache" + CACHE_PACKAGE_NAME: "vllm-spyre" CACHE_REGISTRY: "ghcr.io" steps: @@ -82,6 +83,14 @@ jobs: path: ${{ runner.temp }}/test_image.tar retention-days: 1 + - name: "Cleanup old cache images" + uses: actions/delete-package-versions@v5 + if: ${{ github.event_name == 'push' }} + with: + package-name: ${{ env.CACHE_PACKAGE_NAME }} + package-type: container + delete-only-untagged-versions: true + test-spyre-v0: runs-on: ubuntu-latest needs: build