Skip to content
148 changes: 77 additions & 71 deletions .github/workflows/test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,8 @@ env:
VLLM_PLUGINS: "spyre"
HF_HUB_CACHE: "${{ github.workspace }}/.cache/huggingface/hub"
DEFAULT_HF_MODEL: "ibm-ai-platform/micro-g3.3-8b-instruct-1b"
DEFAULT_HF_MODEL_REV: "6e9c6465a9d7e5e9fa35004a29f0c90befa7d23f"
# DEFAULT_HF_MODEL_REV: "2714578f54cfb744ece40df9326ee0b47e879e03"

concurrency:
group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }}
Expand All @@ -44,19 +46,20 @@ jobs:
- name: "static batching"
markers: "cpu and decoder and not cb and not other_e2e"
flags: "--timeout=300"
hf_models: "JackFram/llama-160m"
hf_model: "JackFram/llama-160m"
- name: "fp8"
markers: "cpu and quantized and multi"
flags: "--timeout=600 -k 'basic and test_output' --durations=0"
hf_models: "ibm-ai-platform/micro-g3.3-8b-instruct-1b-FP8"
hf_model: "ibm-ai-platform/micro-g3.3-8b-instruct-1b-FP8"
hf_model_rev: "main"
- name: "embedding"
markers: "cpu and embedding"
flags: "--timeout=300"
hf_models: "sentence-transformers/all-roberta-large-v1"
hf_model: "sentence-transformers/all-roberta-large-v1"
- name: "scoring"
markers: "cpu and scoring"
flags: "--timeout=300"
hf_models: "cross-encoder/stsb-roberta-large"
hf_model: "cross-encoder/stsb-roberta-large"
- name: "continuous batching"
markers: "cpu and cb"
flags: "--timeout=300 --durations=0 -s"
Expand All @@ -80,7 +83,8 @@ jobs:
name: "backward compat"
markers: "compat or (cpu and basic)"
flags: "--timeout=300"
hf_models: "micro-g3.3_roberta-large"
hf_model_2: "sentence-transformers/all-roberta-large-v1"
hf_model_2_rev: "main"
os: "ubuntu-latest"
python_version: "3.12"
# Exclude vLLM:main if PR does NOT have "ready" label AND auto-merge is not enabled
Expand Down Expand Up @@ -150,86 +154,88 @@ jobs:
# overwritten.
uv pip install -v .

# Standardize model name for cache keys
- name: Standardize HF model name
- name: "Standardize HF model names for caching"
id: standardize-names
run: |
model="${{ matrix.test_suite.hf_models || env.DEFAULT_HF_MODEL }}"
if [[ "$model" == "micro-g3.3_roberta-large" ]]; then
echo "model_key=micro-g3.3_roberta-large" >> "$GITHUB_ENV"
echo "model_path=${HF_HUB_CACHE}" >> "$GITHUB_ENV"
if [[ -n "${{ matrix.test_suite.hf_model }}" ]]; then
model="${{ matrix.test_suite.hf_model }}"
revision="${{ matrix.test_suite.hf_model_rev }}"
else
# replace / with --
safe_name="${model//\//--}"
echo "model_key=$safe_name" >> "$GITHUB_ENV"
echo "model_path=${HF_HUB_CACHE}/models--$safe_name" >> "$GITHUB_ENV"
model="${{ env.DEFAULT_HF_MODEL }}"
revision="${{ env.DEFAULT_HF_MODEL_REV }}"
fi
# replace '/' with '--'
safe_name="${model//\//--}"
echo "model_key=${safe_name}_${revision}" >> "$GITHUB_ENV"
echo "model_path=${HF_HUB_CACHE}/models--$safe_name" >> "$GITHUB_ENV"

if [[ -n "${{ matrix.test_suite.hf_model_2 }}" ]]; then
model_2="${{ matrix.test_suite.hf_model_2 }}"
revision_2="${{ matrix.test_suite.hf_model_2_rev}}"
# replace '/' with '--'
safe_name_2="${model_2//\//--}"
echo "model_2_key=${safe_name_2}_${revision_2}" >> "$GITHUB_ENV"
echo "model_2_path=${HF_HUB_CACHE}/models--${safe_name_2}" >> "$GITHUB_ENV"
fi

- name: "Restore HF models cache"
id: cache_restore
if: steps.changed-src-files.outputs.any_changed == 'true'
uses: actions/cache/restore@v4
with:
path: ${{ env.model_path }}
key: ${{ runner.os }}-hf-model-${{ env.model_key }}
# - name: "Restore HF models cache"
# id: cache_restore
# if: steps.changed-src-files.outputs.any_changed == 'true'
# uses: actions/cache/restore@v4
# with:
# path: ${{ env.model_path }}
# key: ${{ runner.os }}-hf-model-${{ env.model_key }}
#
# - name: "Restore HF models cache for additional model"
# id: cache_restore_2
# if: ( steps.changed-src-files.outputs.any_changed == 'true' && matrix.test_suite.hf_model_2 )
# uses: actions/cache/restore@v4
# with:
# path: ${{ env.model_2_path }}
# key: ${{ runner.os }}-hf-model-${{ env.model_2_key }}

- name: "Download HF models"
if: ( steps.changed-src-files.outputs.any_changed == 'true' && steps.cache_restore.outputs.cache-hit != 'true' )
# if: ( steps.changed-src-files.outputs.any_changed == 'true' && (steps.cache_restore.outputs.cache-hit != 'true' || steps.cache_restore_2.outputs.cache-hit != 'true'))
run: |
# We are caching HF models (HF_HUB_CACHE) for reliability rather than speed, since HF downloads are flaky for concurrent jobs.
# Be careful when adding models to the cache here, as the GHA cache is limited to 10 GB.
# If a new model is added here, a new hash key is generated. The previous cache blob can then
# be removed by an admin or can be left to expire after 7 days.

download_tinygranite() {
python -c "from transformers import pipeline, AutoTokenizer; pipeline('text-generation', model='$1'); tokenizer=AutoTokenizer.from_pretrained('$1')"
}
download_roberta_large() {
python -c "from sentence_transformers import SentenceTransformer; SentenceTransformer('$1')"
}
# tinyllama used for static batching tests because static batching is _too slow_
download_tinyllama() {
python -c "from transformers import pipeline; pipeline('text-generation', model='$1')"
}

hf_models="${{ matrix.test_suite.hf_models || env.DEFAULT_HF_MODEL }}"

if [[ "$hf_models" == "micro-g3.3_roberta-large" ]]; then
models=("ibm-ai-platform/micro-g3.3-8b-instruct-1b" "sentence-transformers/all-roberta-large-v1")
# We are caching HF models (HF_HUB_CACHE) for reliability rather than
# speed, since HF downloads are flaky for concurrent jobs.
# Be careful when adding models to the cache here, as the GHA cache is
# limited to 10 GB.
# If a new model is added here, a new hash key is generated. The
# previous cache blob can then be removed by an admin or can be left
# to expire after 7 days.

if [[ -n "${{ matrix.test_suite.hf_model }}" ]]; then
hf_model="${{ matrix.test_suite.hf_model }}"
hf_revision="${{ matrix.test_suite.hf_model_rev }}"
else
models=("$hf_models")
hf_model="${{ env.DEFAULT_HF_MODEL }}"
hf_revision="${{ env.DEFAULT_HF_MODEL_REV }}"
fi
hf_model_2="${{ matrix.test_suite.hf_model_2 }}"
hf_revision_2="${{ matrix.test_suite.hf_model_2_rev }}"

for model in "${models[@]}"; do
echo "Downloading $model ..."
case "$model" in
"ibm-ai-platform/micro-g3.3-8b-instruct-1b"*)
download_tinygranite "$model" &
;;
"JackFram/llama-160m")
download_tinyllama "$model" &
;;
"sentence-transformers/all-roberta-large-v1")
download_roberta_large "$model" &
;;
"cross-encoder/stsb-roberta-large")
download_roberta_large "$model" &
;;
*)
echo "No download method found for: $model";
exit 1
;;
esac
done
python3 tools/download_model.py -m "$hf_model" -r "${hf_revision:-main}" &

if [[ -n "$hf_model_2" ]]; then
python3 tools/download_model.py -m "$hf_model_2" -r "${hf_revision_2:-main}" &
fi

wait

- name: "Save HF models cache"
if: ( steps.changed-src-files.outputs.any_changed == 'true' && github.event_name != 'pull_request' && steps.cache_restore.outputs.cache-hit != 'true' )
uses: actions/cache/save@v4
with:
path: ${{ env.model_path }}
key: ${{ runner.os }}-hf-model-${{ env.model_key }}
# - name: "Save HF models cache"
# if: ( steps.changed-src-files.outputs.any_changed == 'true' && github.event_name != 'pull_request' && steps.cache_restore.outputs.cache-hit != 'true' )
# uses: actions/cache/save@v4
# with:
# path: ${{ env.model_path }}
# key: ${{ runner.os }}-hf-model-${{ env.model_key }}
#
# - name: "Save HF models cache for additional model"
# if: ( steps.changed-src-files.outputs.any_changed == 'true' && matrix.test_suite.hf_model_2 && github.event_name != 'pull_request' && steps.cache_restore_2.outputs.cache-hit != 'true' )
# uses: actions/cache/save@v4
# with:
# path: ${{ env.model_2_path }}
# key: ${{ runner.os }}-hf-model-${{ env.model_2_key }}

- name: "Run tests"
if: steps.changed-src-files.outputs.any_changed == 'true'
Expand Down
67 changes: 67 additions & 0 deletions tools/download_model.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
#!/usr/bin/env python3
"""Download a model from HuggingFace with revision.

> python3 tools/download_model.py -m <HF-model-id> [-r <git-tag-or-hash>]

"""

import argparse
import logging


def download_granite_or_llama(hf_model_id: str, revision: str = "main"):
from transformers import pipeline
pipeline('text-generation', model=hf_model_id, revision=revision)


def download_roberta(hf_model_id: str, revision: str = "main"):
from sentence_transformers import SentenceTransformer
SentenceTransformer(hf_model_id, revision=revision)


download_methods = {
"ibm-ai-platform/micro-g3.3-8b-instruct-1b": download_granite_or_llama,
"ibm-ai-platform/micro-g3.3-8b-instruct-1b-FP8": download_granite_or_llama,
"JackFram/llama-160m": download_granite_or_llama,
"cross-encoder/stsb-roberta-large": download_roberta,
"sentence-transformers/all-roberta-large-v1": download_roberta,
}


def download_model_with_revision(hf_model_id: str, revision: str = "main"):
if hf_model_id in download_methods:
download_method = download_methods.get(hf_model_id)
logging.info("Downloading model '%s' with revision '%s' ...",
hf_model_id, revision)
download_method(hf_model_id, revision)
logging.info("Model '%s' with revision '%s' downloaded.", hf_model_id,
revision)
else:
logging.error(
"No `download_method` found for model '%s'."
" Supported models: %s", hf_model_id,
str(list(download_methods.keys())))
exit(1)


def main():
parser = argparse.ArgumentParser()
parser.add_argument('-m',
dest='hf_model_id',
help='HuggingFace model ID.')
parser.add_argument('-r',
dest='revision',
default="main",
help='Git tag, hash, or branch.')

args, _extra_args = parser.parse_known_args()

if args.hf_model_id:
download_model_with_revision(args.hf_model_id, args.revision)
else:
logging.error("Need to specify a model ID with -model.")
exit(1)


if __name__ == '__main__':
main()
Loading