From 87b339dc248e22f2e11922e643f40cad56fb4f04 Mon Sep 17 00:00:00 2001 From: Prashant Gupta Date: Wed, 1 Oct 2025 11:41:06 -0700 Subject: [PATCH 01/12] =?UTF-8?q?=E2=8F=AA=20revert=20back=20to=203-layer?= =?UTF-8?q?=20micro=20model?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Prashant Gupta --- tests/spyre_util.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/spyre_util.py b/tests/spyre_util.py index 2c5ab652..1b6cb582 100644 --- a/tests/spyre_util.py +++ b/tests/spyre_util.py @@ -265,7 +265,7 @@ def _default_test_models(isEmbeddings=False, isScoring=False): # the test command includes `-m quantized`. tinygranite = ModelInfo( name="ibm-ai-platform/micro-g3.3-8b-instruct-1b", - revision="6e9c6465a9d7e5e9fa35004a29f0c90befa7d23f") + revision="2714578f54cfb744ece40df9326ee0b47e879e03") tinygranite_fp8 = ModelInfo( name="ibm-ai-platform/micro-g3.3-8b-instruct-1b-FP8", revision="0dff8bacb968836dbbc7c2895c6d9ead0a05dc9e", From 71748185ac7f267484c6348bf9818e285f55ca4e Mon Sep 17 00:00:00 2001 From: Prashant Gupta Date: Wed, 1 Oct 2025 11:53:21 -0700 Subject: [PATCH 02/12] =?UTF-8?q?=F0=9F=90=9B=20download=20the=20right=20r?= =?UTF-8?q?evision?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Prashant Gupta --- .github/workflows/test.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 828d1a1b..d7c886ad 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -182,7 +182,7 @@ jobs: # be removed by an admin or can be left to expire after 7 days. download_tinygranite() { - python -c "from transformers import pipeline, AutoTokenizer; pipeline('text-generation', model='$1'); tokenizer=AutoTokenizer.from_pretrained('$1')" + python -c "from transformers import pipeline, AutoTokenizer; pipeline('text-generation', model='$1', revision="2714578f54cfb744ece40df9326ee0b47e879e03"); tokenizer=AutoTokenizer.from_pretrained('$1')" } download_roberta_large() { python -c "from sentence_transformers import SentenceTransformer; SentenceTransformer('$1')" From 5aeeb885f74314c52be312a97df58920661e66ca Mon Sep 17 00:00:00 2001 From: Prashant Gupta Date: Wed, 1 Oct 2025 11:56:49 -0700 Subject: [PATCH 03/12] =?UTF-8?q?=F0=9F=8E=A8=20fmt?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Prashant Gupta --- .github/workflows/test.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index d7c886ad..5afd9cfc 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -182,7 +182,7 @@ jobs: # be removed by an admin or can be left to expire after 7 days. download_tinygranite() { - python -c "from transformers import pipeline, AutoTokenizer; pipeline('text-generation', model='$1', revision="2714578f54cfb744ece40df9326ee0b47e879e03"); tokenizer=AutoTokenizer.from_pretrained('$1')" + python -c "from transformers import pipeline, AutoTokenizer; pipeline('text-generation', model='$1', revision='2714578f54cfb744ece40df9326ee0b47e879e03'); tokenizer=AutoTokenizer.from_pretrained('$1')" } download_roberta_large() { python -c "from sentence_transformers import SentenceTransformer; SentenceTransformer('$1')" From 38acec80e85f7cd5122bde643bbd60e4d47ed2d1 Mon Sep 17 00:00:00 2001 From: Prashant Gupta Date: Wed, 1 Oct 2025 12:06:37 -0700 Subject: [PATCH 04/12] =?UTF-8?q?=F0=9F=90=9B=20download=20the=20right=20m?= =?UTF-8?q?odel?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Prashant Gupta --- .github/workflows/test.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 5afd9cfc..c0e1c53d 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -182,7 +182,7 @@ jobs: # be removed by an admin or can be left to expire after 7 days. download_tinygranite() { - python -c "from transformers import pipeline, AutoTokenizer; pipeline('text-generation', model='$1', revision='2714578f54cfb744ece40df9326ee0b47e879e03'); tokenizer=AutoTokenizer.from_pretrained('$1')" + python -c "from transformers import pipeline; pipeline('text-generation', model='$1', revision='2714578f54cfb744ece40df9326ee0b47e879e03');" } download_roberta_large() { python -c "from sentence_transformers import SentenceTransformer; SentenceTransformer('$1')" From 49c507747114fca3e70ef4254e18b9ed7ee48d40 Mon Sep 17 00:00:00 2001 From: Prashant Gupta Date: Wed, 1 Oct 2025 12:32:45 -0700 Subject: [PATCH 05/12] =?UTF-8?q?=F0=9F=9A=A7=20flip=20condition=20so=20th?= =?UTF-8?q?at=20we=20download=20the=20right=20model?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Prashant Gupta --- .github/workflows/test.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index c0e1c53d..f1cf9dd3 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -174,7 +174,7 @@ jobs: key: ${{ runner.os }}-hf-model-${{ env.model_key }} - name: "Download HF models" - if: ( steps.changed-src-files.outputs.any_changed == 'true' && steps.cache_restore.outputs.cache-hit != 'true' ) + if: ( steps.changed-src-files.outputs.any_changed == 'true' && steps.cache_restore.outputs.cache-hit == 'true' ) run: | # We are caching HF models (HF_HUB_CACHE) for reliability rather than speed, since HF downloads are flaky for concurrent jobs. # Be careful when adding models to the cache here, as the GHA cache is limited to 10 GB. From 61391b5775c5ffa96143cdf256644804f159c200 Mon Sep 17 00:00:00 2001 From: Christian Kadner Date: Wed, 1 Oct 2025 21:18:48 -0700 Subject: [PATCH 06/12] run test on all branches Signed-off-by: Christian Kadner --- .github/workflows/test.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index f1cf9dd3..9fe1262a 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -7,10 +7,10 @@ on: pull_request: # add labeled and unlabeled to the default types (runs when label is added) types: [opened, synchronize, reopened, labeled, unlabeled, auto_merge_enabled] - branches: [main] +# branches: [main] push: - branches: [main] +# branches: [main] workflow_dispatch: From 7b1eae0298b776e607ea3a7748e5cf93bdcd2632 Mon Sep 17 00:00:00 2001 From: Christian Kadner Date: Thu, 2 Oct 2025 02:27:57 -0700 Subject: [PATCH 07/12] disentangle model caches, add model revisions Signed-off-by: Christian Kadner --- .github/workflows/test.yml | 121 +++++++++++++++++++++++-------------- 1 file changed, 76 insertions(+), 45 deletions(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 9fe1262a..6b416291 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -7,10 +7,10 @@ on: pull_request: # add labeled and unlabeled to the default types (runs when label is added) types: [opened, synchronize, reopened, labeled, unlabeled, auto_merge_enabled] -# branches: [main] + branches: [main] push: -# branches: [main] + branches: [main] workflow_dispatch: @@ -21,6 +21,8 @@ env: VLLM_PLUGINS: "spyre" HF_HUB_CACHE: "${{ github.workspace }}/.cache/huggingface/hub" DEFAULT_HF_MODEL: "ibm-ai-platform/micro-g3.3-8b-instruct-1b" +# DEFAULT_HF_MODEL_REV: "6e9c6465a9d7e5e9fa35004a29f0c90befa7d23f" +# DEFAULT_HF_MODEL_REV: "2714578f54cfb744ece40df9326ee0b47e879e03" concurrency: group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }} @@ -44,19 +46,19 @@ jobs: - name: "static batching" markers: "cpu and decoder and not cb and not other_e2e" flags: "--timeout=300" - hf_models: "JackFram/llama-160m" + hf_model: "JackFram/llama-160m" - name: "fp8" markers: "cpu and quantized and multi" flags: "--timeout=600 -k 'basic and test_output' --durations=0" - hf_models: "ibm-ai-platform/micro-g3.3-8b-instruct-1b-FP8" + hf_model: "ibm-ai-platform/micro-g3.3-8b-instruct-1b-FP8" - name: "embedding" markers: "cpu and embedding" flags: "--timeout=300" - hf_models: "sentence-transformers/all-roberta-large-v1" + hf_model: "sentence-transformers/all-roberta-large-v1" - name: "scoring" markers: "cpu and scoring" flags: "--timeout=300" - hf_models: "cross-encoder/stsb-roberta-large" + hf_model: "cross-encoder/stsb-roberta-large" - name: "continuous batching" markers: "cpu and cb" flags: "--timeout=300 --durations=0 -s" @@ -80,7 +82,7 @@ jobs: name: "backward compat" markers: "compat or (cpu and basic)" flags: "--timeout=300" - hf_models: "micro-g3.3_roberta-large" + hf_model_2: "sentence-transformers/all-roberta-large-v1" os: "ubuntu-latest" python_version: "3.12" # Exclude vLLM:main if PR does NOT have "ready" label AND auto-merge is not enabled @@ -150,19 +152,28 @@ jobs: # overwritten. uv pip install -v . - # Standardize model name for cache keys - - name: Standardize HF model name + - name: "Standardize HF model names for caching" id: standardize-names run: | - model="${{ matrix.test_suite.hf_models || env.DEFAULT_HF_MODEL }}" - if [[ "$model" == "micro-g3.3_roberta-large" ]]; then - echo "model_key=micro-g3.3_roberta-large" >> "$GITHUB_ENV" - echo "model_path=${HF_HUB_CACHE}" >> "$GITHUB_ENV" + if [[ -n "${{ matrix.test_suite.hf_model }}" ]]; then + model="${{ matrix.test_suite.hf_model }}" + revision="${{ matrix.test_suite.hf_model_rev }}" else - # replace / with -- - safe_name="${model//\//--}" - echo "model_key=$safe_name" >> "$GITHUB_ENV" - echo "model_path=${HF_HUB_CACHE}/models--$safe_name" >> "$GITHUB_ENV" + model="${{ env.DEFAULT_HF_MODEL }}" + revision="${{ env.DEFAULT_HF_MODEL_REV }}" + fi + # replace '/' with '--' + safe_name="${model//\//--}" + echo "model_key=${safe_name}_${revision}" >> "$GITHUB_ENV" + echo "model_path=${HF_HUB_CACHE}/models--$safe_name" >> "$GITHUB_ENV" + + if [[ -n "${{ matrix.test_suite.hf_model_2 }}" ]]; then + model_2="${{ matrix.test_suite.hf_model_2 }}" + revision_2="${{ matrix.test_suite.hf_model_2_rev}}" + # replace '/' with '--' + safe_name_2="${model_2//\//--}" + echo "model_key_2=${safe_name_2}_${revision_2}" >> "$GITHUB_ENV" + echo "model_path_2=${HF_HUB_CACHE}/models--${safe_name_2}" >> "$GITHUB_ENV" fi - name: "Restore HF models cache" @@ -173,55 +184,68 @@ jobs: path: ${{ env.model_path }} key: ${{ runner.os }}-hf-model-${{ env.model_key }} + - name: "Restore HF models cache for additional model" + id: cache_restore_2 + if: ( steps.changed-src-files.outputs.any_changed == 'true' && matrix.test_suite.hf_model_2 ) + uses: actions/cache/restore@v4 + with: + path: ${{ env.model_path_2 }} + key: ${{ runner.os }}-hf-model-${{ env.model_key_2 }} + - name: "Download HF models" - if: ( steps.changed-src-files.outputs.any_changed == 'true' && steps.cache_restore.outputs.cache-hit == 'true' ) + if: ( steps.changed-src-files.outputs.any_changed == 'true' && (steps.cache_restore.outputs.cache-hit != 'true' || steps.cache_restore_2.outputs.cache-hit != 'true')) run: | - # We are caching HF models (HF_HUB_CACHE) for reliability rather than speed, since HF downloads are flaky for concurrent jobs. - # Be careful when adding models to the cache here, as the GHA cache is limited to 10 GB. - # If a new model is added here, a new hash key is generated. The previous cache blob can then - # be removed by an admin or can be left to expire after 7 days. + # We are caching HF models (HF_HUB_CACHE) for reliability rather than + # speed, since HF downloads are flaky for concurrent jobs. + # Be careful when adding models to the cache here, as the GHA cache is + # limited to 10 GB. + # If a new model is added here, a new hash key is generated. The + # previous cache blob can then be removed by an admin or can be left + # to expire after 7 days. - download_tinygranite() { - python -c "from transformers import pipeline; pipeline('text-generation', model='$1', revision='2714578f54cfb744ece40df9326ee0b47e879e03');" + download_granite_or_llama() { + python -c "from transformers import pipeline; pipeline('text-generation', model='$1', revision='${2:-main}');" } download_roberta_large() { - python -c "from sentence_transformers import SentenceTransformer; SentenceTransformer('$1')" - } - # tinyllama used for static batching tests because static batching is _too slow_ - download_tinyllama() { - python -c "from transformers import pipeline; pipeline('text-generation', model='$1')" + python -c "from sentence_transformers import SentenceTransformer; SentenceTransformer('$1', revision='${2:-main}')" } - - hf_models="${{ matrix.test_suite.hf_models || env.DEFAULT_HF_MODEL }}" - - if [[ "$hf_models" == "micro-g3.3_roberta-large" ]]; then - models=("ibm-ai-platform/micro-g3.3-8b-instruct-1b" "sentence-transformers/all-roberta-large-v1") - else - models=("$hf_models") - fi - - for model in "${models[@]}"; do - echo "Downloading $model ..." + download_model_with_revision() { + model="$1" + revision="$2" + echo "Downloading '$model' with revision '$2' ..." case "$model" in "ibm-ai-platform/micro-g3.3-8b-instruct-1b"*) - download_tinygranite "$model" & + download_granite_or_llama "$model" $revision ;; "JackFram/llama-160m") - download_tinyllama "$model" & + download_granite_or_llama "$model" $revision ;; "sentence-transformers/all-roberta-large-v1") - download_roberta_large "$model" & + download_roberta_large "$model" $revision ;; "cross-encoder/stsb-roberta-large") - download_roberta_large "$model" & + download_roberta_large "$model" $revision ;; *) echo "No download method found for: $model"; exit 1 ;; esac - done + } + if [[ -n "${{ matrix.test_suite.hf_model }}" ]]; then + hf_model="${{ matrix.test_suite.hf_model }}" + hf_revision="${{ matrix.test_suite.hf_model_rev }}" + else + hf_model="${{ env.DEFAULT_HF_MODEL }}" + hf_revision="${{ env.DEFAULT_HF_MODEL_REV }}" + fi + hf_model_2="${{ matrix.test_suite.hf_model_2 }}" + hf_revision_2="${{ matrix.test_suite.hf_model_2_rev }}" + download_model_with_revision "$hf_model" $hf_revision & + if [[ -n "$hf_model_2" ]]; then + download_model_with_revision "$hf_model_2" $hf_revision_2 & + fi wait - name: "Save HF models cache" @@ -231,6 +255,13 @@ jobs: path: ${{ env.model_path }} key: ${{ runner.os }}-hf-model-${{ env.model_key }} + - name: "Save HF models cache for additional model" + if: ( steps.changed-src-files.outputs.any_changed == 'true' && matrix.test_suite.hf_model_2 && github.event_name != 'pull_request' && steps.cache_restore_2.outputs.cache-hit != 'true' ) + uses: actions/cache/save@v4 + with: + path: ${{ env.model_path_2 }} + key: ${{ runner.os }}-hf-model-${{ env.model_key_2 }} + - name: "Run tests" if: steps.changed-src-files.outputs.any_changed == 'true' env: From bd880264351fdb5046a36b2eca17fbd9323af6ef Mon Sep 17 00:00:00 2001 From: Christian Kadner Date: Thu, 2 Oct 2025 02:45:23 -0700 Subject: [PATCH 08/12] test with revisions Signed-off-by: Christian Kadner --- .github/workflows/test.yml | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 6b416291..6b221e53 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -22,7 +22,7 @@ env: HF_HUB_CACHE: "${{ github.workspace }}/.cache/huggingface/hub" DEFAULT_HF_MODEL: "ibm-ai-platform/micro-g3.3-8b-instruct-1b" # DEFAULT_HF_MODEL_REV: "6e9c6465a9d7e5e9fa35004a29f0c90befa7d23f" -# DEFAULT_HF_MODEL_REV: "2714578f54cfb744ece40df9326ee0b47e879e03" + DEFAULT_HF_MODEL_REV: "2714578f54cfb744ece40df9326ee0b47e879e03" concurrency: group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }} @@ -51,6 +51,7 @@ jobs: markers: "cpu and quantized and multi" flags: "--timeout=600 -k 'basic and test_output' --durations=0" hf_model: "ibm-ai-platform/micro-g3.3-8b-instruct-1b-FP8" + hf_model_rev: "main" - name: "embedding" markers: "cpu and embedding" flags: "--timeout=300" @@ -83,6 +84,7 @@ jobs: markers: "compat or (cpu and basic)" flags: "--timeout=300" hf_model_2: "sentence-transformers/all-roberta-large-v1" + hf_model_2_rev: "main" os: "ubuntu-latest" python_version: "3.12" # Exclude vLLM:main if PR does NOT have "ready" label AND auto-merge is not enabled From db65341c8078a406beedc65f684da99e06f8b9f1 Mon Sep 17 00:00:00 2001 From: Christian Kadner Date: Thu, 2 Oct 2025 03:02:20 -0700 Subject: [PATCH 09/12] shellcheck Signed-off-by: Christian Kadner --- .github/workflows/test.yml | 32 ++++++++++++++++---------------- 1 file changed, 16 insertions(+), 16 deletions(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 6b221e53..567f8180 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -174,8 +174,8 @@ jobs: revision_2="${{ matrix.test_suite.hf_model_2_rev}}" # replace '/' with '--' safe_name_2="${model_2//\//--}" - echo "model_key_2=${safe_name_2}_${revision_2}" >> "$GITHUB_ENV" - echo "model_path_2=${HF_HUB_CACHE}/models--${safe_name_2}" >> "$GITHUB_ENV" + echo "model_2_key=${safe_name_2}_${revision_2}" >> "$GITHUB_ENV" + echo "model_2_path=${HF_HUB_CACHE}/models--${safe_name_2}" >> "$GITHUB_ENV" fi - name: "Restore HF models cache" @@ -191,8 +191,8 @@ jobs: if: ( steps.changed-src-files.outputs.any_changed == 'true' && matrix.test_suite.hf_model_2 ) uses: actions/cache/restore@v4 with: - path: ${{ env.model_path_2 }} - key: ${{ runner.os }}-hf-model-${{ env.model_key_2 }} + path: ${{ env.model_2_path }} + key: ${{ runner.os }}-hf-model-${{ env.model_2_key }} - name: "Download HF models" if: ( steps.changed-src-files.outputs.any_changed == 'true' && (steps.cache_restore.outputs.cache-hit != 'true' || steps.cache_restore_2.outputs.cache-hit != 'true')) @@ -206,27 +206,27 @@ jobs: # to expire after 7 days. download_granite_or_llama() { - python -c "from transformers import pipeline; pipeline('text-generation', model='$1', revision='${2:-main}');" + python -c "from transformers import pipeline; pipeline('text-generation', model='$1', revision='$2');" } download_roberta_large() { - python -c "from sentence_transformers import SentenceTransformer; SentenceTransformer('$1', revision='${2:-main}')" + python -c "from sentence_transformers import SentenceTransformer; SentenceTransformer('$1', revision='$2')" } download_model_with_revision() { model="$1" - revision="$2" - echo "Downloading '$model' with revision '$2' ..." + revision="${2:-main}" + echo "Downloading '$model' with revision '$revision' ..." case "$model" in "ibm-ai-platform/micro-g3.3-8b-instruct-1b"*) - download_granite_or_llama "$model" $revision + download_granite_or_llama "$model" "$revision" ;; "JackFram/llama-160m") - download_granite_or_llama "$model" $revision + download_granite_or_llama "$model" "$revision" ;; "sentence-transformers/all-roberta-large-v1") - download_roberta_large "$model" $revision + download_roberta_large "$model" "$revision" ;; "cross-encoder/stsb-roberta-large") - download_roberta_large "$model" $revision + download_roberta_large "$model" "$revision" ;; *) echo "No download method found for: $model"; @@ -244,9 +244,9 @@ jobs: hf_model_2="${{ matrix.test_suite.hf_model_2 }}" hf_revision_2="${{ matrix.test_suite.hf_model_2_rev }}" - download_model_with_revision "$hf_model" $hf_revision & + download_model_with_revision "$hf_model" "${hf_revision:-main}" & if [[ -n "$hf_model_2" ]]; then - download_model_with_revision "$hf_model_2" $hf_revision_2 & + download_model_with_revision "$hf_model_2" "${hf_revision_2:-main}" & fi wait @@ -261,8 +261,8 @@ jobs: if: ( steps.changed-src-files.outputs.any_changed == 'true' && matrix.test_suite.hf_model_2 && github.event_name != 'pull_request' && steps.cache_restore_2.outputs.cache-hit != 'true' ) uses: actions/cache/save@v4 with: - path: ${{ env.model_path_2 }} - key: ${{ runner.os }}-hf-model-${{ env.model_key_2 }} + path: ${{ env.model_2_path }} + key: ${{ runner.os }}-hf-model-${{ env.model_2_key }} - name: "Run tests" if: steps.changed-src-files.outputs.any_changed == 'true' From 8a52aa7f1336a5547d5de19acec17c25bfe33bd1 Mon Sep 17 00:00:00 2001 From: Christian Kadner Date: Wed, 8 Oct 2025 23:32:12 -0800 Subject: [PATCH 10/12] move model download logic into a Python script Signed-off-by: Christian Kadner --- .github/workflows/test.yml | 37 +++----------------- tools/download_model.py | 69 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 74 insertions(+), 32 deletions(-) create mode 100755 tools/download_model.py diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 567f8180..61227216 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -204,36 +204,7 @@ jobs: # If a new model is added here, a new hash key is generated. The # previous cache blob can then be removed by an admin or can be left # to expire after 7 days. - - download_granite_or_llama() { - python -c "from transformers import pipeline; pipeline('text-generation', model='$1', revision='$2');" - } - download_roberta_large() { - python -c "from sentence_transformers import SentenceTransformer; SentenceTransformer('$1', revision='$2')" - } - download_model_with_revision() { - model="$1" - revision="${2:-main}" - echo "Downloading '$model' with revision '$revision' ..." - case "$model" in - "ibm-ai-platform/micro-g3.3-8b-instruct-1b"*) - download_granite_or_llama "$model" "$revision" - ;; - "JackFram/llama-160m") - download_granite_or_llama "$model" "$revision" - ;; - "sentence-transformers/all-roberta-large-v1") - download_roberta_large "$model" "$revision" - ;; - "cross-encoder/stsb-roberta-large") - download_roberta_large "$model" "$revision" - ;; - *) - echo "No download method found for: $model"; - exit 1 - ;; - esac - } + if [[ -n "${{ matrix.test_suite.hf_model }}" ]]; then hf_model="${{ matrix.test_suite.hf_model }}" hf_revision="${{ matrix.test_suite.hf_model_rev }}" @@ -244,10 +215,12 @@ jobs: hf_model_2="${{ matrix.test_suite.hf_model_2 }}" hf_revision_2="${{ matrix.test_suite.hf_model_2_rev }}" - download_model_with_revision "$hf_model" "${hf_revision:-main}" & + python3 tools/download_model.py -m "$hf_model" "${hf_revision:-main}" & + if [[ -n "$hf_model_2" ]]; then - download_model_with_revision "$hf_model_2" "${hf_revision_2:-main}" & + python3 tools/download_model.py -m "$hf_model_2" "${hf_revision_2:-main}" & fi + wait - name: "Save HF models cache" diff --git a/tools/download_model.py b/tools/download_model.py new file mode 100755 index 00000000..1cff100a --- /dev/null +++ b/tools/download_model.py @@ -0,0 +1,69 @@ +#!/usr/bin/env python3 +"""Download a model from HuggingFace with revision. + +> python3 tools/download_model.py -m [-r ] + +""" + +import argparse +import logging + + +def download_granite_or_llama(hf_model_id: str, revision: str = "main"): + from transformers import pipeline + pipeline('text-generation', model=hf_model_id, revision=revision) + + +def download_roberta(hf_model_id: str, revision: str = "main"): + from sentence_transformers import SentenceTransformer + SentenceTransformer(hf_model_id, revision=revision) + + +download_methods = { + "ibm-ai-platform/micro-g3.3-8b-instruct-1b": download_granite_or_llama, + "ibm-ai-platform/micro-g3.3-8b-instruct-1b-FP8": download_granite_or_llama, + "JackFram/llama-160m": download_granite_or_llama, + "cross-encoder/stsb-roberta-large": download_roberta, + "sentence-transformers/all-roberta-large-v1": download_roberta, +} + + +def download_model_with_revision(hf_model_id: str, revision: str = "main"): + if hf_model_id in download_methods: + download_method = download_methods.get(hf_model_id) + logging.info("Downloading model '%s' with revision '%s' ...", + hf_model_id, revision) + download_method(hf_model_id, revision) + logging.info("Model '%s' with revision '%s' downloaded.", hf_model_id, + revision) + else: + logging.error( + "No `download_method` found for model '%s'." + " Supported models: %s", hf_model_id, + str(list(download_methods.keys()))) + exit(1) + + +def main(): + parser = argparse.ArgumentParser() + parser.add_argument('-model', + '-m', + dest='hf_model_id', + help='HuggingFace model ID.') + parser.add_argument('-revision', + '-r', + dest='revision', + default="main", + help='Git tag, hash, or branch.') + + args, _extra_args = parser.parse_known_args() + + if args.hf_model_id: + download_model_with_revision(args.hf_model_id, args.revision) + else: + logging.error("Need to specify a model ID with -model.") + exit(1) + + +if __name__ == '__main__': + main() From 7e717c25873db61d5d550b295712ed3fec7d7e30 Mon Sep 17 00:00:00 2001 From: Christian Kadner Date: Mon, 13 Oct 2025 10:46:09 -0700 Subject: [PATCH 11/12] revert revision for tiny granite Signed-off-by: Christian Kadner --- .github/workflows/test.yml | 4 ++-- tests/spyre_util.py | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 61227216..fa59d0d1 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -21,8 +21,8 @@ env: VLLM_PLUGINS: "spyre" HF_HUB_CACHE: "${{ github.workspace }}/.cache/huggingface/hub" DEFAULT_HF_MODEL: "ibm-ai-platform/micro-g3.3-8b-instruct-1b" -# DEFAULT_HF_MODEL_REV: "6e9c6465a9d7e5e9fa35004a29f0c90befa7d23f" - DEFAULT_HF_MODEL_REV: "2714578f54cfb744ece40df9326ee0b47e879e03" + DEFAULT_HF_MODEL_REV: "6e9c6465a9d7e5e9fa35004a29f0c90befa7d23f" +# DEFAULT_HF_MODEL_REV: "2714578f54cfb744ece40df9326ee0b47e879e03" concurrency: group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }} diff --git a/tests/spyre_util.py b/tests/spyre_util.py index 1b6cb582..2c5ab652 100644 --- a/tests/spyre_util.py +++ b/tests/spyre_util.py @@ -265,7 +265,7 @@ def _default_test_models(isEmbeddings=False, isScoring=False): # the test command includes `-m quantized`. tinygranite = ModelInfo( name="ibm-ai-platform/micro-g3.3-8b-instruct-1b", - revision="2714578f54cfb744ece40df9326ee0b47e879e03") + revision="6e9c6465a9d7e5e9fa35004a29f0c90befa7d23f") tinygranite_fp8 = ModelInfo( name="ibm-ai-platform/micro-g3.3-8b-instruct-1b-FP8", revision="0dff8bacb968836dbbc7c2895c6d9ead0a05dc9e", From fc227514c13eff3577aab7763366b5d516726624 Mon Sep 17 00:00:00 2001 From: Christian Kadner Date: Mon, 13 Oct 2025 12:10:31 -0700 Subject: [PATCH 12/12] test without cache Signed-off-by: Christian Kadner --- .github/workflows/test.yml | 62 +++++++++++++++++++------------------- tools/download_model.py | 6 ++-- 2 files changed, 33 insertions(+), 35 deletions(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index fa59d0d1..2f223127 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -178,24 +178,24 @@ jobs: echo "model_2_path=${HF_HUB_CACHE}/models--${safe_name_2}" >> "$GITHUB_ENV" fi - - name: "Restore HF models cache" - id: cache_restore - if: steps.changed-src-files.outputs.any_changed == 'true' - uses: actions/cache/restore@v4 - with: - path: ${{ env.model_path }} - key: ${{ runner.os }}-hf-model-${{ env.model_key }} - - - name: "Restore HF models cache for additional model" - id: cache_restore_2 - if: ( steps.changed-src-files.outputs.any_changed == 'true' && matrix.test_suite.hf_model_2 ) - uses: actions/cache/restore@v4 - with: - path: ${{ env.model_2_path }} - key: ${{ runner.os }}-hf-model-${{ env.model_2_key }} +# - name: "Restore HF models cache" +# id: cache_restore +# if: steps.changed-src-files.outputs.any_changed == 'true' +# uses: actions/cache/restore@v4 +# with: +# path: ${{ env.model_path }} +# key: ${{ runner.os }}-hf-model-${{ env.model_key }} +# +# - name: "Restore HF models cache for additional model" +# id: cache_restore_2 +# if: ( steps.changed-src-files.outputs.any_changed == 'true' && matrix.test_suite.hf_model_2 ) +# uses: actions/cache/restore@v4 +# with: +# path: ${{ env.model_2_path }} +# key: ${{ runner.os }}-hf-model-${{ env.model_2_key }} - name: "Download HF models" - if: ( steps.changed-src-files.outputs.any_changed == 'true' && (steps.cache_restore.outputs.cache-hit != 'true' || steps.cache_restore_2.outputs.cache-hit != 'true')) +# if: ( steps.changed-src-files.outputs.any_changed == 'true' && (steps.cache_restore.outputs.cache-hit != 'true' || steps.cache_restore_2.outputs.cache-hit != 'true')) run: | # We are caching HF models (HF_HUB_CACHE) for reliability rather than # speed, since HF downloads are flaky for concurrent jobs. @@ -215,27 +215,27 @@ jobs: hf_model_2="${{ matrix.test_suite.hf_model_2 }}" hf_revision_2="${{ matrix.test_suite.hf_model_2_rev }}" - python3 tools/download_model.py -m "$hf_model" "${hf_revision:-main}" & + python3 tools/download_model.py -m "$hf_model" -r "${hf_revision:-main}" & if [[ -n "$hf_model_2" ]]; then - python3 tools/download_model.py -m "$hf_model_2" "${hf_revision_2:-main}" & + python3 tools/download_model.py -m "$hf_model_2" -r "${hf_revision_2:-main}" & fi wait - - name: "Save HF models cache" - if: ( steps.changed-src-files.outputs.any_changed == 'true' && github.event_name != 'pull_request' && steps.cache_restore.outputs.cache-hit != 'true' ) - uses: actions/cache/save@v4 - with: - path: ${{ env.model_path }} - key: ${{ runner.os }}-hf-model-${{ env.model_key }} - - - name: "Save HF models cache for additional model" - if: ( steps.changed-src-files.outputs.any_changed == 'true' && matrix.test_suite.hf_model_2 && github.event_name != 'pull_request' && steps.cache_restore_2.outputs.cache-hit != 'true' ) - uses: actions/cache/save@v4 - with: - path: ${{ env.model_2_path }} - key: ${{ runner.os }}-hf-model-${{ env.model_2_key }} +# - name: "Save HF models cache" +# if: ( steps.changed-src-files.outputs.any_changed == 'true' && github.event_name != 'pull_request' && steps.cache_restore.outputs.cache-hit != 'true' ) +# uses: actions/cache/save@v4 +# with: +# path: ${{ env.model_path }} +# key: ${{ runner.os }}-hf-model-${{ env.model_key }} +# +# - name: "Save HF models cache for additional model" +# if: ( steps.changed-src-files.outputs.any_changed == 'true' && matrix.test_suite.hf_model_2 && github.event_name != 'pull_request' && steps.cache_restore_2.outputs.cache-hit != 'true' ) +# uses: actions/cache/save@v4 +# with: +# path: ${{ env.model_2_path }} +# key: ${{ runner.os }}-hf-model-${{ env.model_2_key }} - name: "Run tests" if: steps.changed-src-files.outputs.any_changed == 'true' diff --git a/tools/download_model.py b/tools/download_model.py index 1cff100a..c68e4d6e 100755 --- a/tools/download_model.py +++ b/tools/download_model.py @@ -46,12 +46,10 @@ def download_model_with_revision(hf_model_id: str, revision: str = "main"): def main(): parser = argparse.ArgumentParser() - parser.add_argument('-model', - '-m', + parser.add_argument('-m', dest='hf_model_id', help='HuggingFace model ID.') - parser.add_argument('-revision', - '-r', + parser.add_argument('-r', dest='revision', default="main", help='Git tag, hash, or branch.')