diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 828d1a1b8..2f2231276 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -21,6 +21,8 @@ env: VLLM_PLUGINS: "spyre" HF_HUB_CACHE: "${{ github.workspace }}/.cache/huggingface/hub" DEFAULT_HF_MODEL: "ibm-ai-platform/micro-g3.3-8b-instruct-1b" + DEFAULT_HF_MODEL_REV: "6e9c6465a9d7e5e9fa35004a29f0c90befa7d23f" +# DEFAULT_HF_MODEL_REV: "2714578f54cfb744ece40df9326ee0b47e879e03" concurrency: group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }} @@ -44,19 +46,20 @@ jobs: - name: "static batching" markers: "cpu and decoder and not cb and not other_e2e" flags: "--timeout=300" - hf_models: "JackFram/llama-160m" + hf_model: "JackFram/llama-160m" - name: "fp8" markers: "cpu and quantized and multi" flags: "--timeout=600 -k 'basic and test_output' --durations=0" - hf_models: "ibm-ai-platform/micro-g3.3-8b-instruct-1b-FP8" + hf_model: "ibm-ai-platform/micro-g3.3-8b-instruct-1b-FP8" + hf_model_rev: "main" - name: "embedding" markers: "cpu and embedding" flags: "--timeout=300" - hf_models: "sentence-transformers/all-roberta-large-v1" + hf_model: "sentence-transformers/all-roberta-large-v1" - name: "scoring" markers: "cpu and scoring" flags: "--timeout=300" - hf_models: "cross-encoder/stsb-roberta-large" + hf_model: "cross-encoder/stsb-roberta-large" - name: "continuous batching" markers: "cpu and cb" flags: "--timeout=300 --durations=0 -s" @@ -80,7 +83,8 @@ jobs: name: "backward compat" markers: "compat or (cpu and basic)" flags: "--timeout=300" - hf_models: "micro-g3.3_roberta-large" + hf_model_2: "sentence-transformers/all-roberta-large-v1" + hf_model_2_rev: "main" os: "ubuntu-latest" python_version: "3.12" # Exclude vLLM:main if PR does NOT have "ready" label AND auto-merge is not enabled @@ -150,86 +154,88 @@ jobs: # overwritten. uv pip install -v . - # Standardize model name for cache keys - - name: Standardize HF model name + - name: "Standardize HF model names for caching" id: standardize-names run: | - model="${{ matrix.test_suite.hf_models || env.DEFAULT_HF_MODEL }}" - if [[ "$model" == "micro-g3.3_roberta-large" ]]; then - echo "model_key=micro-g3.3_roberta-large" >> "$GITHUB_ENV" - echo "model_path=${HF_HUB_CACHE}" >> "$GITHUB_ENV" + if [[ -n "${{ matrix.test_suite.hf_model }}" ]]; then + model="${{ matrix.test_suite.hf_model }}" + revision="${{ matrix.test_suite.hf_model_rev }}" else - # replace / with -- - safe_name="${model//\//--}" - echo "model_key=$safe_name" >> "$GITHUB_ENV" - echo "model_path=${HF_HUB_CACHE}/models--$safe_name" >> "$GITHUB_ENV" + model="${{ env.DEFAULT_HF_MODEL }}" + revision="${{ env.DEFAULT_HF_MODEL_REV }}" + fi + # replace '/' with '--' + safe_name="${model//\//--}" + echo "model_key=${safe_name}_${revision}" >> "$GITHUB_ENV" + echo "model_path=${HF_HUB_CACHE}/models--$safe_name" >> "$GITHUB_ENV" + + if [[ -n "${{ matrix.test_suite.hf_model_2 }}" ]]; then + model_2="${{ matrix.test_suite.hf_model_2 }}" + revision_2="${{ matrix.test_suite.hf_model_2_rev}}" + # replace '/' with '--' + safe_name_2="${model_2//\//--}" + echo "model_2_key=${safe_name_2}_${revision_2}" >> "$GITHUB_ENV" + echo "model_2_path=${HF_HUB_CACHE}/models--${safe_name_2}" >> "$GITHUB_ENV" fi - - name: "Restore HF models cache" - id: cache_restore - if: steps.changed-src-files.outputs.any_changed == 'true' - uses: actions/cache/restore@v4 - with: - path: ${{ env.model_path }} - key: ${{ runner.os }}-hf-model-${{ env.model_key }} +# - name: "Restore HF models cache" +# id: cache_restore +# if: steps.changed-src-files.outputs.any_changed == 'true' +# uses: actions/cache/restore@v4 +# with: +# path: ${{ env.model_path }} +# key: ${{ runner.os }}-hf-model-${{ env.model_key }} +# +# - name: "Restore HF models cache for additional model" +# id: cache_restore_2 +# if: ( steps.changed-src-files.outputs.any_changed == 'true' && matrix.test_suite.hf_model_2 ) +# uses: actions/cache/restore@v4 +# with: +# path: ${{ env.model_2_path }} +# key: ${{ runner.os }}-hf-model-${{ env.model_2_key }} - name: "Download HF models" - if: ( steps.changed-src-files.outputs.any_changed == 'true' && steps.cache_restore.outputs.cache-hit != 'true' ) +# if: ( steps.changed-src-files.outputs.any_changed == 'true' && (steps.cache_restore.outputs.cache-hit != 'true' || steps.cache_restore_2.outputs.cache-hit != 'true')) run: | - # We are caching HF models (HF_HUB_CACHE) for reliability rather than speed, since HF downloads are flaky for concurrent jobs. - # Be careful when adding models to the cache here, as the GHA cache is limited to 10 GB. - # If a new model is added here, a new hash key is generated. The previous cache blob can then - # be removed by an admin or can be left to expire after 7 days. - - download_tinygranite() { - python -c "from transformers import pipeline, AutoTokenizer; pipeline('text-generation', model='$1'); tokenizer=AutoTokenizer.from_pretrained('$1')" - } - download_roberta_large() { - python -c "from sentence_transformers import SentenceTransformer; SentenceTransformer('$1')" - } - # tinyllama used for static batching tests because static batching is _too slow_ - download_tinyllama() { - python -c "from transformers import pipeline; pipeline('text-generation', model='$1')" - } - - hf_models="${{ matrix.test_suite.hf_models || env.DEFAULT_HF_MODEL }}" - - if [[ "$hf_models" == "micro-g3.3_roberta-large" ]]; then - models=("ibm-ai-platform/micro-g3.3-8b-instruct-1b" "sentence-transformers/all-roberta-large-v1") + # We are caching HF models (HF_HUB_CACHE) for reliability rather than + # speed, since HF downloads are flaky for concurrent jobs. + # Be careful when adding models to the cache here, as the GHA cache is + # limited to 10 GB. + # If a new model is added here, a new hash key is generated. The + # previous cache blob can then be removed by an admin or can be left + # to expire after 7 days. + + if [[ -n "${{ matrix.test_suite.hf_model }}" ]]; then + hf_model="${{ matrix.test_suite.hf_model }}" + hf_revision="${{ matrix.test_suite.hf_model_rev }}" else - models=("$hf_models") + hf_model="${{ env.DEFAULT_HF_MODEL }}" + hf_revision="${{ env.DEFAULT_HF_MODEL_REV }}" fi + hf_model_2="${{ matrix.test_suite.hf_model_2 }}" + hf_revision_2="${{ matrix.test_suite.hf_model_2_rev }}" - for model in "${models[@]}"; do - echo "Downloading $model ..." - case "$model" in - "ibm-ai-platform/micro-g3.3-8b-instruct-1b"*) - download_tinygranite "$model" & - ;; - "JackFram/llama-160m") - download_tinyllama "$model" & - ;; - "sentence-transformers/all-roberta-large-v1") - download_roberta_large "$model" & - ;; - "cross-encoder/stsb-roberta-large") - download_roberta_large "$model" & - ;; - *) - echo "No download method found for: $model"; - exit 1 - ;; - esac - done + python3 tools/download_model.py -m "$hf_model" -r "${hf_revision:-main}" & + if [[ -n "$hf_model_2" ]]; then + python3 tools/download_model.py -m "$hf_model_2" -r "${hf_revision_2:-main}" & + fi + wait - - name: "Save HF models cache" - if: ( steps.changed-src-files.outputs.any_changed == 'true' && github.event_name != 'pull_request' && steps.cache_restore.outputs.cache-hit != 'true' ) - uses: actions/cache/save@v4 - with: - path: ${{ env.model_path }} - key: ${{ runner.os }}-hf-model-${{ env.model_key }} +# - name: "Save HF models cache" +# if: ( steps.changed-src-files.outputs.any_changed == 'true' && github.event_name != 'pull_request' && steps.cache_restore.outputs.cache-hit != 'true' ) +# uses: actions/cache/save@v4 +# with: +# path: ${{ env.model_path }} +# key: ${{ runner.os }}-hf-model-${{ env.model_key }} +# +# - name: "Save HF models cache for additional model" +# if: ( steps.changed-src-files.outputs.any_changed == 'true' && matrix.test_suite.hf_model_2 && github.event_name != 'pull_request' && steps.cache_restore_2.outputs.cache-hit != 'true' ) +# uses: actions/cache/save@v4 +# with: +# path: ${{ env.model_2_path }} +# key: ${{ runner.os }}-hf-model-${{ env.model_2_key }} - name: "Run tests" if: steps.changed-src-files.outputs.any_changed == 'true' diff --git a/tools/download_model.py b/tools/download_model.py new file mode 100755 index 000000000..c68e4d6ed --- /dev/null +++ b/tools/download_model.py @@ -0,0 +1,67 @@ +#!/usr/bin/env python3 +"""Download a model from HuggingFace with revision. + +> python3 tools/download_model.py -m [-r ] + +""" + +import argparse +import logging + + +def download_granite_or_llama(hf_model_id: str, revision: str = "main"): + from transformers import pipeline + pipeline('text-generation', model=hf_model_id, revision=revision) + + +def download_roberta(hf_model_id: str, revision: str = "main"): + from sentence_transformers import SentenceTransformer + SentenceTransformer(hf_model_id, revision=revision) + + +download_methods = { + "ibm-ai-platform/micro-g3.3-8b-instruct-1b": download_granite_or_llama, + "ibm-ai-platform/micro-g3.3-8b-instruct-1b-FP8": download_granite_or_llama, + "JackFram/llama-160m": download_granite_or_llama, + "cross-encoder/stsb-roberta-large": download_roberta, + "sentence-transformers/all-roberta-large-v1": download_roberta, +} + + +def download_model_with_revision(hf_model_id: str, revision: str = "main"): + if hf_model_id in download_methods: + download_method = download_methods.get(hf_model_id) + logging.info("Downloading model '%s' with revision '%s' ...", + hf_model_id, revision) + download_method(hf_model_id, revision) + logging.info("Model '%s' with revision '%s' downloaded.", hf_model_id, + revision) + else: + logging.error( + "No `download_method` found for model '%s'." + " Supported models: %s", hf_model_id, + str(list(download_methods.keys()))) + exit(1) + + +def main(): + parser = argparse.ArgumentParser() + parser.add_argument('-m', + dest='hf_model_id', + help='HuggingFace model ID.') + parser.add_argument('-r', + dest='revision', + default="main", + help='Git tag, hash, or branch.') + + args, _extra_args = parser.parse_known_args() + + if args.hf_model_id: + download_model_with_revision(args.hf_model_id, args.revision) + else: + logging.error("Need to specify a model ID with -model.") + exit(1) + + +if __name__ == '__main__': + main()