IBM
diff --git a/‎.github/workflows/catalog_preparation.yml‎
Lines changed: 2 additions & 6 deletions b/‎.github/workflows/catalog_preparation.yml‎
Lines changed: 2 additions & 6 deletions
diff --git a/‎.github/workflows/examples_tests.yml‎
Lines changed: 2 additions & 13 deletions b/‎.github/workflows/examples_tests.yml‎
Lines changed: 2 additions & 13 deletions
diff --git a/‎.github/workflows/library_eager_execution_tests.yml‎
Lines changed: 3 additions & 8 deletions b/‎.github/workflows/library_eager_execution_tests.yml‎
Lines changed: 3 additions & 8 deletions
diff --git a/‎.github/workflows/library_tests.yml‎
Lines changed: 2 additions & 9 deletions b/‎.github/workflows/library_tests.yml‎
Lines changed: 2 additions & 9 deletions
diff --git a/‎.github/workflows/performance.yml‎
Lines changed: 2 additions & 2 deletions b/‎.github/workflows/performance.yml‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎examples/evaluate_benchmark_with_custom_provider.py‎
Lines changed: 10 additions & 7 deletions b/‎examples/evaluate_benchmark_with_custom_provider.py‎
Lines changed: 10 additions & 7 deletions
diff --git a/‎examples/evaluate_llama_vision_benchmark.py‎
Lines changed: 3 additions & 1 deletion b/‎examples/evaluate_llama_vision_benchmark.py‎
Lines changed: 3 additions & 1 deletion
diff --git a/‎examples/evaluate_vision_default_benchmark.py‎
Lines changed: 3 additions & 1 deletion b/‎examples/evaluate_vision_default_benchmark.py‎
Lines changed: 3 additions & 1 deletion
diff --git a/‎examples/evaluate_vision_full_benchmark.py‎
Lines changed: 3 additions & 1 deletion b/‎examples/evaluate_vision_full_benchmark.py‎
Lines changed: 3 additions & 1 deletion
diff --git a/‎prepare/cards/fin_qa.py‎
Lines changed: 2 additions & 1 deletion b/‎prepare/cards/fin_qa.py‎
Lines changed: 2 additions & 1 deletion
@@ -37,12 +37,8 @@ jobs:
         python-version: '3.9'
         cache: 'pip'
 
-    - run: echo "blis==0" > constraints.txt
-    - run: curl -LsSf https://astral.sh/uv/install.sh | sh
-    - run: uv pip install --upgrade --system torch --index-url https://download.pytorch.org/whl/cpu
-    - run: uv pip install --system -c constraints.txt -e ".[tests]"
-    - run: |
-        pip install --only-binary :all: spacy
+    - name: Install Dependencies
+      run: bash utils/install.sh
 
     - name:  Hugging Face Login
       run: |
 
@@ -38,19 +38,8 @@ jobs:
       with:
         python-version: '3.10'
 
-    - run: echo "blis==0" > constraints.txt
-    - run: curl -LsSf https://astral.sh/uv/install.sh | sh
-    - run: uv pip install --upgrade --system torch --index-url https://download.pytorch.org/whl/cpu
-    - run: uv pip install --system -c constraints.txt -e ".[tests]"
-    - run: |
-        pip install --only-binary :all: spacy
-        pip install networkx==3.2.1
-
-    - name: Install spacy (binary)
-      run: |
-        pip install --only-binary :all: spacy
-        pip install networkx==3.2.1
-        pip install datasets==3.5.1
+    - name: Install Dependencies
+      run: bash utils/install.sh
 
     - name: Hugging Face Login
       run: |
 
@@ -34,14 +34,9 @@ jobs:
         python-version: '3.9'
         cache: 'pip'
 
-    - run: echo "blis==0" > constraints.txt
-    - run: curl -LsSf https://astral.sh/uv/install.sh | sh
-    - run: uv pip install --upgrade --system torch --index-url https://download.pytorch.org/whl/cpu
-    - run: uv pip install --system -c constraints.txt -e ".[tests]"
-    - run: |
-        pip install --only-binary :all: spacy
-        pip install networkx==3.2.1
-        
+    - name: Install Dependencies
+      run: bash utils/install.sh
+
     - run: pip install coverage[toml]
 
     - name: Run Tests
 
@@ -33,15 +33,8 @@ jobs:
         python-version: '3.9'
         cache: 'pip'
 
-    - run: echo "blis==0" > constraints.txt
-    - run: curl -LsSf https://astral.sh/uv/install.sh | sh
-    - run: uv pip install --upgrade --system torch --index-url https://download.pytorch.org/whl/cpu
-    - run: uv pip install --system -c constraints.txt -e ".[tests]"
-    - run: |
-        pip install --only-binary :all: spacy
-        pip install networkx==3.2.1
-
-    - run: pip install coverage[toml]
+    - name: Install Dependencies
+      run: bash utils/install.sh
 
     - name: Run Tests
       run: coverage run -m unittest discover -s tests/library -p "test_*.py"
 
@@ -30,13 +30,13 @@ jobs:
       with:
         python-version: '3.10'
 
-    - name: Install Requirements
+    - name: Install Dependencies
       run: |
         curl -LsSf https://astral.sh/uv/install.sh | sh
         uv pip install --system ".[tests,watsonx,inference-tests]"
         uv pip install --system litellm
         uv pip install --system diskcache
-        
+
     - name:  Hugging Face Login
       run: |
         for i in {1..5}; do
 
@@ -1,14 +1,17 @@
 from unitxt import evaluate, load_dataset
 from unitxt.inference import CrossProviderInferenceEngine
 
-data = load_dataset(
-    "benchmarks.glue[max_samples_per_subset=5, format=formats.chat_api, system_prompt=system_prompts.general.be_concise]",
+dataset = load_dataset(
+    "benchmarks.glue",
+    format="formats.chat_api",
+    system_prompt="system_prompts.general.be_concise",
+    max_samples_per_subset=5,
     split="test",
     use_cache=True,
 )
 
 model = CrossProviderInferenceEngine(
-    model="llama-3-8b-instruct", temperature=0.0, top_p=1.0, provider="watsonx"
+    model="llama-3-2-3b-instruct", temperature=0.0, top_p=1.0, provider="watsonx"
 )
 """
 We are using a CrossProviderInferenceEngine inference engine that supply api access to provider such as:
@@ -18,12 +21,12 @@
 about the the open ai api arguments the CrossProviderInferenceEngine follows.
 """
 
-predictions = model(data)
+predictions = model(dataset)
 
-results = evaluate(predictions=predictions, data=data)
+results = evaluate(predictions=predictions, data=dataset)
 
 print("Global Results:")
 print(results.global_scores.summary)
 
-print("Instance Results:")
-print(results.instance_scores.summary)
+print("Subsets Results:")
+print(results.subsets_scores.summary)
@@ -6,7 +6,9 @@
     allow_unverified_code=True,
 ):
     test_dataset = load_dataset(
-        "benchmarks.llama_vision[format=formats.chat_api,max_samples_per_subset=30]",
+        "benchmarks.llama_vision",
+        format="formats.chat_api",
+        max_samples_per_subset=30,
         split="test",
         use_cache=False,
     )
 
@@ -8,7 +8,9 @@
     allow_unverified_code=True,
 ):
     test_dataset = load_dataset(
-        "benchmarks.vision_default[format=formats.chat_api,max_samples_per_subset=30]",
+        "benchmarks.vision_default",
+        format="formats.chat_api",
+        max_samples_per_subset=30,
         split="test",
     )
 
 
@@ -8,7 +8,9 @@
     allow_unverified_code=True,
 ):
     test_dataset = load_dataset(
-        "benchmarks.vision_full[format=formats.chat_api,max_samples_per_subset=512]",
+        "benchmarks.vision_full",
+        format="formats.chat_api",
+        max_samples_per_subset=512,
         split="test",
     )
 
 
@@ -22,7 +22,8 @@
             "train": url.format("train"),
             "validation": url.format("dev"),
             "test": url.format("test"),
-        }
+        },
+        data_classification_policy=["public"],
     ),
     preprocess_steps=[
         Copy(field="qa/question", to_field="question"),