IBM · dafnapension · Aug 12, 2025 · Aug 12, 2025 · Aug 13, 2025 · Aug 13, 2025
diff --git a/prepare/cards/bfcl.py b/prepare/cards/bfcl.py
@@ -5,9 +5,11 @@
 from unitxt.operators import (
     Copy,
     ExecuteExpression,
+    FilterByExpression,
+    FixJsonSchemaOfParameterTypes,
     Set,
 )
-from unitxt.stream_operators import JoinStreams
+from unitxt.stream_operators import DeleteSplits, JoinStreams
 from unitxt.test_utils.card import test_card
 
 base_path = "https://raw.githubusercontent.com/ShishirPatil/gorilla/70b6a4a2144597b1f99d1f4d3185d35d7ee532a4/berkeley-function-call-leaderboard/data/"
@@ -31,9 +33,10 @@
                     on="id",
                     new_stream_name="test",
                 ),
+                DeleteSplits(splits=["questions", "answers"]),
                 Copy(field="question/0/0/content", to_field="query"),
                 Copy(field="function", to_field="tools"),
-                "operators.fix_json_schema",
+                FixJsonSchemaOfParameterTypes(main_field="tools"),
                 # Process ground truth data in this dataset, which is a provided as a list of options per field,
                 # and convert it into a list of explicit tool calls
                 #
@@ -100,11 +103,15 @@
                     on="id",
                     new_stream_name="test",
                 ),
+                DeleteSplits(splits=["questions", "answers"]),
                 Copy(field="question/*/0", to_field="dialog"),
                 Copy(field="function", to_field="tools"),
-                "operators.fix_json_schema",
+                FixJsonSchemaOfParameterTypes(main_field="tools"),
+                FilterByExpression(
+                    expression="all(isinstance(v, dict) for d in ground_truth for k, v in d.items())"
+                ),
                 ExecuteExpression(
-                    expression='[{"name": k, "arguments": dict(zip(v.keys(), vals))} for d in ground_truth for k, v in d.items() for vals in itertools.product(*v.values())]',
+                    expression='[{"name": k, "arguments": dict(zip(v.keys(), vals))} for d in ground_truth for k, v in d.items() for vals in itertools.product(*[[w for w in vval if w != ""] for vval in v.values()])]',
                     to_field="reference_calls",
                     imports_list=["itertools"],
                 ),
@@ -138,52 +145,52 @@
         test_card(card, strict=False)
         add_to_catalog(card, f"cards.bfcl.multi_turn.{subset}_v3", overwrite=True)
 
-        for subset in [
-            "live_relevance",
-            "live_irrelevance",
-        ]:
-            card = TaskCard(
-                loader=LoadJsonFile(
-                    files={
-                        "test": base_path + f"BFCL_v3_{subset}.json",
-                    },
-                    lines=True,
-                    data_classification_policy=["public"],
-                ),
-                preprocess_steps=[
-                    Copy(field="question/*/0", to_field="dialog"),
-                    Copy(field="function", to_field="tools"),
-                    "operators.fix_json_schema",
-                    Set(fields={"reference_calls": []}),
-                ],
-                task="tasks.tool_calling.multi_turn",
-                templates=["templates.tool_calling.multi_turn"],
-                __description__=(
-                    """The Berkeley function calling leaderboard is a live leaderboard to evaluate the ability of different LLMs to call functions (also referred to as tools). We built this dataset from our learnings to be representative of most users' function calling use-cases, for example, in agents, as a part of enterprise workflows, etc. To this end, our evaluation dataset spans diverse categories, and across multiple languages."""
-                ),
-                __title__=f"""Berkeley Function Calling Leaderboard (Multi Turn Setup) - {subset.replace("_", " ").title()} V3""",
-                __tags__={
-                    "annotations_creators": "expert-generated",
-                    "language": ["en"],
-                    "license": "apache-2.0",
-                    "size_categories": ["10K<n<100K"],
-                    "task_categories": [
-                        "question-answering",
-                        "reading-comprehension",
-                        "tool-calling",
-                        "multi-turn-tool-calling",
-                    ],
-                    "task_ids": [
-                        "tool-calling",
-                        "multi-turn-tool-calling",
-                        "reading-comprehension",
-                    ],
+    for subset in [
+        "live_relevance",
+        "live_irrelevance",
+    ]:
+        card = TaskCard(
+            loader=LoadJsonFile(
+                files={
+                    "test": base_path + f"BFCL_v3_{subset}.json",
                 },
-            )
+                lines=True,
+                data_classification_policy=["public"],
+            ),
+            preprocess_steps=[
+                Copy(field="question/*/0", to_field="dialog"),
+                Copy(field="function", to_field="tools"),
+                FixJsonSchemaOfParameterTypes(main_field="tools"),
+                Set(fields={"reference_calls": []}),
+            ],
+            task="tasks.tool_calling.multi_turn",
+            templates=["templates.tool_calling.multi_turn"],
+            __description__=(
+                """The Berkeley function calling leaderboard is a live leaderboard to evaluate the ability of different LLMs to call functions (also referred to as tools). We built this dataset from our learnings to be representative of most users' function calling use-cases, for example, in agents, as a part of enterprise workflows, etc. To this end, our evaluation dataset spans diverse categories, and across multiple languages."""
+            ),
+            __title__=f"""Berkeley Function Calling Leaderboard (Multi Turn Setup) - {subset.replace("_", " ").title()} V3""",
+            __tags__={
+                "annotations_creators": "expert-generated",
+                "language": ["en"],
+                "license": "apache-2.0",
+                "size_categories": ["10K<n<100K"],
+                "task_categories": [
+                    "question-answering",
+                    "reading-comprehension",
+                    "tool-calling",
+                    "multi-turn-tool-calling",
+                ],
+                "task_ids": [
+                    "tool-calling",
+                    "multi-turn-tool-calling",
+                    "reading-comprehension",
+                ],
+            },
+        )
 
-            # Test and add the card to the catalog
-            test_card(card, strict=False)
-            add_to_catalog(card, f"cards.bfcl.multi_turn.{subset}_v3", overwrite=True)
+        # Test and add the card to the catalog
+        test_card(card, strict=False)
+        add_to_catalog(card, f"cards.bfcl.multi_turn.{subset}_v3", overwrite=True)
 
     # card = TaskCard(
     #     loader=LoadJsonFile(

diff --git a/prepare/operators/fix_json_schema.py b/prepare/operators/fix_json_schema.py
@@ -4,6 +4,7 @@
 operator = RecursiveReplace(
     key="type",
     map_values={
+        "": "object",
         "dict": "object",
         "float": "number",
         "tuple": "array",

diff --git a/src/unitxt/catalog/cards/bfcl/multi_turn/java_v3.json b/src/unitxt/catalog/cards/bfcl/multi_turn/java_v3.json
@@ -20,6 +20,13 @@
             "on": "id",
             "new_stream_name": "test"
         },
+        {
+            "__type__": "delete_splits",
+            "splits": [
+                "questions",
+                "answers"
+            ]
+        },
         {
             "__type__": "copy",
             "field": "question/*/0",
@@ -30,10 +37,17 @@
             "field": "function",
             "to_field": "tools"
         },
-        "operators.fix_json_schema",
+        {
+            "__type__": "fix_json_schema_of_parameter_types",
+            "main_field": "tools"
+        },
+        {
+            "__type__": "filter_by_expression",
+            "expression": "all(isinstance(v, dict) for d in ground_truth for k, v in d.items())"
+        },
         {
             "__type__": "execute_expression",
-            "expression": "[{\"name\": k, \"arguments\": dict(zip(v.keys(), vals))} for d in ground_truth for k, v in d.items() for vals in itertools.product(*v.values())]",
+            "expression": "[{\"name\": k, \"arguments\": dict(zip(v.keys(), vals))} for d in ground_truth for k, v in d.items() for vals in itertools.product(*[[w for w in vval if w != \"\"] for vval in v.values()])]",
             "to_field": "reference_calls",
             "imports_list": [
                 "itertools"

diff --git a/src/unitxt/catalog/cards/bfcl/multi_turn/javascript_v3.json b/src/unitxt/catalog/cards/bfcl/multi_turn/javascript_v3.json
@@ -20,6 +20,13 @@
             "on": "id",
             "new_stream_name": "test"
         },
+        {
+            "__type__": "delete_splits",
+            "splits": [
+                "questions",
+                "answers"
+            ]
+        },
         {
             "__type__": "copy",
             "field": "question/*/0",
@@ -30,10 +37,17 @@
             "field": "function",
             "to_field": "tools"
         },
-        "operators.fix_json_schema",
+        {
+            "__type__": "fix_json_schema_of_parameter_types",
+            "main_field": "tools"
+        },
+        {
+            "__type__": "filter_by_expression",
+            "expression": "all(isinstance(v, dict) for d in ground_truth for k, v in d.items())"
+        },
         {
             "__type__": "execute_expression",
-            "expression": "[{\"name\": k, \"arguments\": dict(zip(v.keys(), vals))} for d in ground_truth for k, v in d.items() for vals in itertools.product(*v.values())]",
+            "expression": "[{\"name\": k, \"arguments\": dict(zip(v.keys(), vals))} for d in ground_truth for k, v in d.items() for vals in itertools.product(*[[w for w in vval if w != \"\"] for vval in v.values()])]",
             "to_field": "reference_calls",
             "imports_list": [
                 "itertools"

diff --git a/src/unitxt/catalog/cards/bfcl/multi_turn/live_irrelevance_v3.json b/src/unitxt/catalog/cards/bfcl/multi_turn/live_irrelevance_v3.json
@@ -21,7 +21,10 @@
             "field": "function",
             "to_field": "tools"
         },
-        "operators.fix_json_schema",
+        {
+            "__type__": "fix_json_schema_of_parameter_types",
+            "main_field": "tools"
+        },
         {
             "__type__": "set",
             "fields": {

diff --git a/src/unitxt/catalog/cards/bfcl/multi_turn/live_multiple_v3.json b/src/unitxt/catalog/cards/bfcl/multi_turn/live_multiple_v3.json
@@ -20,6 +20,13 @@
             "on": "id",
             "new_stream_name": "test"
         },
+        {
+            "__type__": "delete_splits",
+            "splits": [
+                "questions",
+                "answers"
+            ]
+        },
         {
             "__type__": "copy",
             "field": "question/*/0",
@@ -30,10 +37,17 @@
             "field": "function",
             "to_field": "tools"
         },
-        "operators.fix_json_schema",
+        {
+            "__type__": "fix_json_schema_of_parameter_types",
+            "main_field": "tools"
+        },
+        {
+            "__type__": "filter_by_expression",
+            "expression": "all(isinstance(v, dict) for d in ground_truth for k, v in d.items())"
+        },
         {
             "__type__": "execute_expression",
-            "expression": "[{\"name\": k, \"arguments\": dict(zip(v.keys(), vals))} for d in ground_truth for k, v in d.items() for vals in itertools.product(*v.values())]",
+            "expression": "[{\"name\": k, \"arguments\": dict(zip(v.keys(), vals))} for d in ground_truth for k, v in d.items() for vals in itertools.product(*[[w for w in vval if w != \"\"] for vval in v.values()])]",
             "to_field": "reference_calls",
             "imports_list": [
                 "itertools"

diff --git a/src/unitxt/catalog/cards/bfcl/multi_turn/live_parallel_multiple_v3.json b/src/unitxt/catalog/cards/bfcl/multi_turn/live_parallel_multiple_v3.json
@@ -20,6 +20,13 @@
             "on": "id",
             "new_stream_name": "test"
         },
+        {
+            "__type__": "delete_splits",
+            "splits": [
+                "questions",
+                "answers"
+            ]
+        },
         {
             "__type__": "copy",
             "field": "question/*/0",
@@ -30,10 +37,17 @@
             "field": "function",
             "to_field": "tools"
         },
-        "operators.fix_json_schema",
+        {
+            "__type__": "fix_json_schema_of_parameter_types",
+            "main_field": "tools"
+        },
+        {
+            "__type__": "filter_by_expression",
+            "expression": "all(isinstance(v, dict) for d in ground_truth for k, v in d.items())"
+        },
         {
             "__type__": "execute_expression",
-            "expression": "[{\"name\": k, \"arguments\": dict(zip(v.keys(), vals))} for d in ground_truth for k, v in d.items() for vals in itertools.product(*v.values())]",
+            "expression": "[{\"name\": k, \"arguments\": dict(zip(v.keys(), vals))} for d in ground_truth for k, v in d.items() for vals in itertools.product(*[[w for w in vval if w != \"\"] for vval in v.values()])]",
             "to_field": "reference_calls",
             "imports_list": [
                 "itertools"

diff --git a/src/unitxt/catalog/cards/bfcl/multi_turn/live_parallel_v3.json b/src/unitxt/catalog/cards/bfcl/multi_turn/live_parallel_v3.json
@@ -20,6 +20,13 @@
             "on": "id",
             "new_stream_name": "test"
         },
+        {
+            "__type__": "delete_splits",
+            "splits": [
+                "questions",
+                "answers"
+            ]
+        },
         {
             "__type__": "copy",
             "field": "question/*/0",
@@ -30,10 +37,17 @@
             "field": "function",
             "to_field": "tools"
         },
-        "operators.fix_json_schema",
+        {
+            "__type__": "fix_json_schema_of_parameter_types",
+            "main_field": "tools"
+        },
+        {
+            "__type__": "filter_by_expression",
+            "expression": "all(isinstance(v, dict) for d in ground_truth for k, v in d.items())"
+        },
         {
             "__type__": "execute_expression",
-            "expression": "[{\"name\": k, \"arguments\": dict(zip(v.keys(), vals))} for d in ground_truth for k, v in d.items() for vals in itertools.product(*v.values())]",
+            "expression": "[{\"name\": k, \"arguments\": dict(zip(v.keys(), vals))} for d in ground_truth for k, v in d.items() for vals in itertools.product(*[[w for w in vval if w != \"\"] for vval in v.values()])]",
             "to_field": "reference_calls",
             "imports_list": [
                 "itertools"

diff --git a/src/unitxt/catalog/cards/bfcl/multi_turn/live_relevance_v3.json b/src/unitxt/catalog/cards/bfcl/multi_turn/live_relevance_v3.json
@@ -21,7 +21,10 @@
             "field": "function",
             "to_field": "tools"
         },
-        "operators.fix_json_schema",
+        {
+            "__type__": "fix_json_schema_of_parameter_types",
+            "main_field": "tools"
+        },
         {
             "__type__": "set",
             "fields": {

diff --git a/src/unitxt/catalog/cards/bfcl/multi_turn/live_simple_v3.json b/src/unitxt/catalog/cards/bfcl/multi_turn/live_simple_v3.json
@@ -20,6 +20,13 @@
             "on": "id",
             "new_stream_name": "test"
         },
+        {
+            "__type__": "delete_splits",
+            "splits": [
+                "questions",
+                "answers"
+            ]
+        },
         {
             "__type__": "copy",
             "field": "question/*/0",
@@ -30,10 +37,17 @@
             "field": "function",
             "to_field": "tools"
         },
-        "operators.fix_json_schema",
+        {
+            "__type__": "fix_json_schema_of_parameter_types",
+            "main_field": "tools"
+        },
+        {
+            "__type__": "filter_by_expression",
+            "expression": "all(isinstance(v, dict) for d in ground_truth for k, v in d.items())"
+        },
         {
             "__type__": "execute_expression",
-            "expression": "[{\"name\": k, \"arguments\": dict(zip(v.keys(), vals))} for d in ground_truth for k, v in d.items() for vals in itertools.product(*v.values())]",
+            "expression": "[{\"name\": k, \"arguments\": dict(zip(v.keys(), vals))} for d in ground_truth for k, v in d.items() for vals in itertools.product(*[[w for w in vval if w != \"\"] for vval in v.values()])]",
             "to_field": "reference_calls",
             "imports_list": [
                 "itertools"