From 859af00699e50b0b9530e0e6956886dd992db6c2 Mon Sep 17 00:00:00 2001 From: dafnapension Date: Tue, 12 Aug 2025 22:41:49 +0300 Subject: [PATCH 1/5] remove ''-s from values of properties of tools Signed-off-by: dafnapension --- prepare/cards/bfcl.py | 90 +++++++++---------- .../cards/bfcl/multi_turn/java_v3.json | 2 +- .../cards/bfcl/multi_turn/javascript_v3.json | 2 +- .../bfcl/multi_turn/live_multiple_v3.json | 2 +- .../multi_turn/live_parallel_multiple_v3.json | 2 +- .../bfcl/multi_turn/live_parallel_v3.json | 2 +- .../cards/bfcl/multi_turn/live_simple_v3.json | 2 +- .../cards/bfcl/multi_turn/multiple_v3.json | 2 +- .../bfcl/multi_turn/parallel_multiple_v3.json | 2 +- .../cards/bfcl/multi_turn/parallel_v3.json | 2 +- .../cards/bfcl/multi_turn/simple_v3.json | 2 +- 11 files changed, 55 insertions(+), 55 deletions(-) diff --git a/prepare/cards/bfcl.py b/prepare/cards/bfcl.py index 1b5159937c..9fcafafe68 100644 --- a/prepare/cards/bfcl.py +++ b/prepare/cards/bfcl.py @@ -104,7 +104,7 @@ Copy(field="function", to_field="tools"), "operators.fix_json_schema", ExecuteExpression( - expression='[{"name": k, "arguments": dict(zip(v.keys(), vals))} for d in ground_truth for k, v in d.items() for vals in itertools.product(*v.values())]', + expression='[{"name": k, "arguments": dict(zip(v.keys(), vals))} for d in ground_truth for k, v in d.items() for vals in itertools.product(*[[w for w in vval if w != ""] for vval in v.values()])]', to_field="reference_calls", imports_list=["itertools"], ), @@ -138,52 +138,52 @@ test_card(card, strict=False) add_to_catalog(card, f"cards.bfcl.multi_turn.{subset}_v3", overwrite=True) - for subset in [ - "live_relevance", - "live_irrelevance", - ]: - card = TaskCard( - loader=LoadJsonFile( - files={ - "test": base_path + f"BFCL_v3_{subset}.json", - }, - lines=True, - data_classification_policy=["public"], - ), - preprocess_steps=[ - Copy(field="question/*/0", to_field="dialog"), - Copy(field="function", to_field="tools"), - "operators.fix_json_schema", - Set(fields={"reference_calls": []}), - ], - task="tasks.tool_calling.multi_turn", - templates=["templates.tool_calling.multi_turn"], - __description__=( - """The Berkeley function calling leaderboard is a live leaderboard to evaluate the ability of different LLMs to call functions (also referred to as tools). We built this dataset from our learnings to be representative of most users' function calling use-cases, for example, in agents, as a part of enterprise workflows, etc. To this end, our evaluation dataset spans diverse categories, and across multiple languages.""" - ), - __title__=f"""Berkeley Function Calling Leaderboard (Multi Turn Setup) - {subset.replace("_", " ").title()} V3""", - __tags__={ - "annotations_creators": "expert-generated", - "language": ["en"], - "license": "apache-2.0", - "size_categories": ["10K Date: Tue, 12 Aug 2025 23:04:04 +0300 Subject: [PATCH 2/5] ipynb Signed-off-by: dafnapension --- bfcl.ipynb | 94 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 94 insertions(+) create mode 100644 bfcl.ipynb diff --git a/bfcl.ipynb b/bfcl.ipynb new file mode 100644 index 0000000000..ac4afce55b --- /dev/null +++ b/bfcl.ipynb @@ -0,0 +1,94 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "id": "6bf26409-1391-48a0-9846-64ff6f024392", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Already on 'main'\n", + "Your branch is up to date with 'origin/main'.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "None of PyTorch, TensorFlow >= 2.0, or Flax have been found. Models won't be available and only tokenizers, configuration and file/data utilities can be used.\n", + "None of PyTorch, TensorFlow >= 2.0, or Flax have been found. Models won't be available and only tokenizers, configuration and file/data utilities can be used.\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Template was not specified in recipe, using the first template from the card by default.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "IOPub data rate exceeded.\n", + "The Jupyter server will temporarily stop sending output\n", + "to the client in order to avoid crashing it.\n", + "To change this limit, set the config variable\n", + "`--ServerApp.iopub_data_rate_limit`.\n", + "\n", + "Current values:\n", + "ServerApp.iopub_data_rate_limit=1000000.0 (bytes/sec)\n", + "ServerApp.rate_limit_window=3.0 (secs)\n", + "\n" + ] + } + ], + "source": [ + "!git checkout main\n", + "import json\n", + "from unitxt.api import load_recipe\n", + "from unitxt.settings_utils import get_settings\n", + "settings = get_settings()\n", + "settings.allow_unverified_code=True\n", + "recipe = load_recipe(card=\"cards.bfcl.multi_turn.live_multiple_v3\")\n", + "ms = recipe()\n", + "print(ms)\n", + "for i, instance in enumerate(ms[\"test\"]):\n", + " if i == 916:\n", + " print(f\"size of instance is {len(json.dumps(instance))}\")\n", + " sizes = {field: len(json.dumps(instance[field])) for field in instance}\n", + " print(f\"sizes of all fields in instance: {sizes}\")\n", + " task_data = json.loads(instance[\"task_data\"])\n", + " reference_calls = task_data[\"reference_calls\"]\n", + " print(f\"number of reference_calls generated by the reg expression is: {len(reference_calls)}\")\n", + " print(f\"one reference call:\\n{reference_calls[0]}\")\n", + " print(f\"another reference call:\\n{reference_calls[30303]}\")\n", + " break" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "virtual310", + "language": "python", + "name": "virtual310" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.16" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} From 1fc158a6f0be3438fd661f7c839e4401fa32acfa Mon Sep 17 00:00:00 2001 From: dafnapension Date: Wed, 13 Aug 2025 13:56:54 +0300 Subject: [PATCH 3/5] complete small fixes to allow all cards to pass through _source_to_dataset Signed-off-by: dafnapension --- prepare/cards/bfcl.py | 8 +++++++- prepare/operators/fix_json_schema.py | 1 + src/unitxt/catalog/cards/bfcl/multi_turn/java_v3.json | 11 +++++++++++ .../catalog/cards/bfcl/multi_turn/javascript_v3.json | 11 +++++++++++ .../cards/bfcl/multi_turn/live_multiple_v3.json | 11 +++++++++++ .../bfcl/multi_turn/live_parallel_multiple_v3.json | 11 +++++++++++ .../cards/bfcl/multi_turn/live_parallel_v3.json | 11 +++++++++++ .../catalog/cards/bfcl/multi_turn/live_simple_v3.json | 11 +++++++++++ .../catalog/cards/bfcl/multi_turn/multiple_v3.json | 11 +++++++++++ .../cards/bfcl/multi_turn/parallel_multiple_v3.json | 11 +++++++++++ .../catalog/cards/bfcl/multi_turn/parallel_v3.json | 11 +++++++++++ .../catalog/cards/bfcl/multi_turn/simple_v3.json | 11 +++++++++++ src/unitxt/catalog/cards/bfcl/simple_v3.json | 7 +++++++ src/unitxt/catalog/operators/fix_json_schema.json | 1 + 14 files changed, 126 insertions(+), 1 deletion(-) diff --git a/prepare/cards/bfcl.py b/prepare/cards/bfcl.py index 9fcafafe68..88ffe322c2 100644 --- a/prepare/cards/bfcl.py +++ b/prepare/cards/bfcl.py @@ -5,9 +5,10 @@ from unitxt.operators import ( Copy, ExecuteExpression, + FilterByExpression, Set, ) -from unitxt.stream_operators import JoinStreams +from unitxt.stream_operators import DeleteSplits, JoinStreams from unitxt.test_utils.card import test_card base_path = "https://raw.githubusercontent.com/ShishirPatil/gorilla/70b6a4a2144597b1f99d1f4d3185d35d7ee532a4/berkeley-function-call-leaderboard/data/" @@ -31,6 +32,7 @@ on="id", new_stream_name="test", ), + DeleteSplits(splits=["questions", "answers"]), Copy(field="question/0/0/content", to_field="query"), Copy(field="function", to_field="tools"), "operators.fix_json_schema", @@ -100,9 +102,13 @@ on="id", new_stream_name="test", ), + DeleteSplits(splits=["questions", "answers"]), Copy(field="question/*/0", to_field="dialog"), Copy(field="function", to_field="tools"), "operators.fix_json_schema", + FilterByExpression( + expression="all(isinstance(v, dict) for d in ground_truth for k, v in d.items())" + ), ExecuteExpression( expression='[{"name": k, "arguments": dict(zip(v.keys(), vals))} for d in ground_truth for k, v in d.items() for vals in itertools.product(*[[w for w in vval if w != ""] for vval in v.values()])]', to_field="reference_calls", diff --git a/prepare/operators/fix_json_schema.py b/prepare/operators/fix_json_schema.py index 736570dc86..3e18b12389 100644 --- a/prepare/operators/fix_json_schema.py +++ b/prepare/operators/fix_json_schema.py @@ -4,6 +4,7 @@ operator = RecursiveReplace( key="type", map_values={ + "": "object", "dict": "object", "float": "number", "tuple": "array", diff --git a/src/unitxt/catalog/cards/bfcl/multi_turn/java_v3.json b/src/unitxt/catalog/cards/bfcl/multi_turn/java_v3.json index e8885a15dd..6c58309204 100644 --- a/src/unitxt/catalog/cards/bfcl/multi_turn/java_v3.json +++ b/src/unitxt/catalog/cards/bfcl/multi_turn/java_v3.json @@ -20,6 +20,13 @@ "on": "id", "new_stream_name": "test" }, + { + "__type__": "delete_splits", + "splits": [ + "questions", + "answers" + ] + }, { "__type__": "copy", "field": "question/*/0", @@ -31,6 +38,10 @@ "to_field": "tools" }, "operators.fix_json_schema", + { + "__type__": "filter_by_expression", + "expression": "all(isinstance(v, dict) for d in ground_truth for k, v in d.items())" + }, { "__type__": "execute_expression", "expression": "[{\"name\": k, \"arguments\": dict(zip(v.keys(), vals))} for d in ground_truth for k, v in d.items() for vals in itertools.product(*[[w for w in vval if w != \"\"] for vval in v.values()])]", diff --git a/src/unitxt/catalog/cards/bfcl/multi_turn/javascript_v3.json b/src/unitxt/catalog/cards/bfcl/multi_turn/javascript_v3.json index c14b52b82c..dc42a7369a 100644 --- a/src/unitxt/catalog/cards/bfcl/multi_turn/javascript_v3.json +++ b/src/unitxt/catalog/cards/bfcl/multi_turn/javascript_v3.json @@ -20,6 +20,13 @@ "on": "id", "new_stream_name": "test" }, + { + "__type__": "delete_splits", + "splits": [ + "questions", + "answers" + ] + }, { "__type__": "copy", "field": "question/*/0", @@ -31,6 +38,10 @@ "to_field": "tools" }, "operators.fix_json_schema", + { + "__type__": "filter_by_expression", + "expression": "all(isinstance(v, dict) for d in ground_truth for k, v in d.items())" + }, { "__type__": "execute_expression", "expression": "[{\"name\": k, \"arguments\": dict(zip(v.keys(), vals))} for d in ground_truth for k, v in d.items() for vals in itertools.product(*[[w for w in vval if w != \"\"] for vval in v.values()])]", diff --git a/src/unitxt/catalog/cards/bfcl/multi_turn/live_multiple_v3.json b/src/unitxt/catalog/cards/bfcl/multi_turn/live_multiple_v3.json index ddce4bcd23..87bc54c1ea 100644 --- a/src/unitxt/catalog/cards/bfcl/multi_turn/live_multiple_v3.json +++ b/src/unitxt/catalog/cards/bfcl/multi_turn/live_multiple_v3.json @@ -20,6 +20,13 @@ "on": "id", "new_stream_name": "test" }, + { + "__type__": "delete_splits", + "splits": [ + "questions", + "answers" + ] + }, { "__type__": "copy", "field": "question/*/0", @@ -31,6 +38,10 @@ "to_field": "tools" }, "operators.fix_json_schema", + { + "__type__": "filter_by_expression", + "expression": "all(isinstance(v, dict) for d in ground_truth for k, v in d.items())" + }, { "__type__": "execute_expression", "expression": "[{\"name\": k, \"arguments\": dict(zip(v.keys(), vals))} for d in ground_truth for k, v in d.items() for vals in itertools.product(*[[w for w in vval if w != \"\"] for vval in v.values()])]", diff --git a/src/unitxt/catalog/cards/bfcl/multi_turn/live_parallel_multiple_v3.json b/src/unitxt/catalog/cards/bfcl/multi_turn/live_parallel_multiple_v3.json index 24983540a9..3a902a7682 100644 --- a/src/unitxt/catalog/cards/bfcl/multi_turn/live_parallel_multiple_v3.json +++ b/src/unitxt/catalog/cards/bfcl/multi_turn/live_parallel_multiple_v3.json @@ -20,6 +20,13 @@ "on": "id", "new_stream_name": "test" }, + { + "__type__": "delete_splits", + "splits": [ + "questions", + "answers" + ] + }, { "__type__": "copy", "field": "question/*/0", @@ -31,6 +38,10 @@ "to_field": "tools" }, "operators.fix_json_schema", + { + "__type__": "filter_by_expression", + "expression": "all(isinstance(v, dict) for d in ground_truth for k, v in d.items())" + }, { "__type__": "execute_expression", "expression": "[{\"name\": k, \"arguments\": dict(zip(v.keys(), vals))} for d in ground_truth for k, v in d.items() for vals in itertools.product(*[[w for w in vval if w != \"\"] for vval in v.values()])]", diff --git a/src/unitxt/catalog/cards/bfcl/multi_turn/live_parallel_v3.json b/src/unitxt/catalog/cards/bfcl/multi_turn/live_parallel_v3.json index cd827e2d93..23b826eada 100644 --- a/src/unitxt/catalog/cards/bfcl/multi_turn/live_parallel_v3.json +++ b/src/unitxt/catalog/cards/bfcl/multi_turn/live_parallel_v3.json @@ -20,6 +20,13 @@ "on": "id", "new_stream_name": "test" }, + { + "__type__": "delete_splits", + "splits": [ + "questions", + "answers" + ] + }, { "__type__": "copy", "field": "question/*/0", @@ -31,6 +38,10 @@ "to_field": "tools" }, "operators.fix_json_schema", + { + "__type__": "filter_by_expression", + "expression": "all(isinstance(v, dict) for d in ground_truth for k, v in d.items())" + }, { "__type__": "execute_expression", "expression": "[{\"name\": k, \"arguments\": dict(zip(v.keys(), vals))} for d in ground_truth for k, v in d.items() for vals in itertools.product(*[[w for w in vval if w != \"\"] for vval in v.values()])]", diff --git a/src/unitxt/catalog/cards/bfcl/multi_turn/live_simple_v3.json b/src/unitxt/catalog/cards/bfcl/multi_turn/live_simple_v3.json index 06d8a3b605..1645870c72 100644 --- a/src/unitxt/catalog/cards/bfcl/multi_turn/live_simple_v3.json +++ b/src/unitxt/catalog/cards/bfcl/multi_turn/live_simple_v3.json @@ -20,6 +20,13 @@ "on": "id", "new_stream_name": "test" }, + { + "__type__": "delete_splits", + "splits": [ + "questions", + "answers" + ] + }, { "__type__": "copy", "field": "question/*/0", @@ -31,6 +38,10 @@ "to_field": "tools" }, "operators.fix_json_schema", + { + "__type__": "filter_by_expression", + "expression": "all(isinstance(v, dict) for d in ground_truth for k, v in d.items())" + }, { "__type__": "execute_expression", "expression": "[{\"name\": k, \"arguments\": dict(zip(v.keys(), vals))} for d in ground_truth for k, v in d.items() for vals in itertools.product(*[[w for w in vval if w != \"\"] for vval in v.values()])]", diff --git a/src/unitxt/catalog/cards/bfcl/multi_turn/multiple_v3.json b/src/unitxt/catalog/cards/bfcl/multi_turn/multiple_v3.json index ea7d513bf4..7cafbe5f02 100644 --- a/src/unitxt/catalog/cards/bfcl/multi_turn/multiple_v3.json +++ b/src/unitxt/catalog/cards/bfcl/multi_turn/multiple_v3.json @@ -20,6 +20,13 @@ "on": "id", "new_stream_name": "test" }, + { + "__type__": "delete_splits", + "splits": [ + "questions", + "answers" + ] + }, { "__type__": "copy", "field": "question/*/0", @@ -31,6 +38,10 @@ "to_field": "tools" }, "operators.fix_json_schema", + { + "__type__": "filter_by_expression", + "expression": "all(isinstance(v, dict) for d in ground_truth for k, v in d.items())" + }, { "__type__": "execute_expression", "expression": "[{\"name\": k, \"arguments\": dict(zip(v.keys(), vals))} for d in ground_truth for k, v in d.items() for vals in itertools.product(*[[w for w in vval if w != \"\"] for vval in v.values()])]", diff --git a/src/unitxt/catalog/cards/bfcl/multi_turn/parallel_multiple_v3.json b/src/unitxt/catalog/cards/bfcl/multi_turn/parallel_multiple_v3.json index 803bd5ab1b..2bfad4208e 100644 --- a/src/unitxt/catalog/cards/bfcl/multi_turn/parallel_multiple_v3.json +++ b/src/unitxt/catalog/cards/bfcl/multi_turn/parallel_multiple_v3.json @@ -20,6 +20,13 @@ "on": "id", "new_stream_name": "test" }, + { + "__type__": "delete_splits", + "splits": [ + "questions", + "answers" + ] + }, { "__type__": "copy", "field": "question/*/0", @@ -31,6 +38,10 @@ "to_field": "tools" }, "operators.fix_json_schema", + { + "__type__": "filter_by_expression", + "expression": "all(isinstance(v, dict) for d in ground_truth for k, v in d.items())" + }, { "__type__": "execute_expression", "expression": "[{\"name\": k, \"arguments\": dict(zip(v.keys(), vals))} for d in ground_truth for k, v in d.items() for vals in itertools.product(*[[w for w in vval if w != \"\"] for vval in v.values()])]", diff --git a/src/unitxt/catalog/cards/bfcl/multi_turn/parallel_v3.json b/src/unitxt/catalog/cards/bfcl/multi_turn/parallel_v3.json index 2be2d0edb3..ca9e3a98ed 100644 --- a/src/unitxt/catalog/cards/bfcl/multi_turn/parallel_v3.json +++ b/src/unitxt/catalog/cards/bfcl/multi_turn/parallel_v3.json @@ -20,6 +20,13 @@ "on": "id", "new_stream_name": "test" }, + { + "__type__": "delete_splits", + "splits": [ + "questions", + "answers" + ] + }, { "__type__": "copy", "field": "question/*/0", @@ -31,6 +38,10 @@ "to_field": "tools" }, "operators.fix_json_schema", + { + "__type__": "filter_by_expression", + "expression": "all(isinstance(v, dict) for d in ground_truth for k, v in d.items())" + }, { "__type__": "execute_expression", "expression": "[{\"name\": k, \"arguments\": dict(zip(v.keys(), vals))} for d in ground_truth for k, v in d.items() for vals in itertools.product(*[[w for w in vval if w != \"\"] for vval in v.values()])]", diff --git a/src/unitxt/catalog/cards/bfcl/multi_turn/simple_v3.json b/src/unitxt/catalog/cards/bfcl/multi_turn/simple_v3.json index 76735ef7bf..521a85d17d 100644 --- a/src/unitxt/catalog/cards/bfcl/multi_turn/simple_v3.json +++ b/src/unitxt/catalog/cards/bfcl/multi_turn/simple_v3.json @@ -20,6 +20,13 @@ "on": "id", "new_stream_name": "test" }, + { + "__type__": "delete_splits", + "splits": [ + "questions", + "answers" + ] + }, { "__type__": "copy", "field": "question/*/0", @@ -31,6 +38,10 @@ "to_field": "tools" }, "operators.fix_json_schema", + { + "__type__": "filter_by_expression", + "expression": "all(isinstance(v, dict) for d in ground_truth for k, v in d.items())" + }, { "__type__": "execute_expression", "expression": "[{\"name\": k, \"arguments\": dict(zip(v.keys(), vals))} for d in ground_truth for k, v in d.items() for vals in itertools.product(*[[w for w in vval if w != \"\"] for vval in v.values()])]", diff --git a/src/unitxt/catalog/cards/bfcl/simple_v3.json b/src/unitxt/catalog/cards/bfcl/simple_v3.json index b68303eb8c..a26999596b 100644 --- a/src/unitxt/catalog/cards/bfcl/simple_v3.json +++ b/src/unitxt/catalog/cards/bfcl/simple_v3.json @@ -20,6 +20,13 @@ "on": "id", "new_stream_name": "test" }, + { + "__type__": "delete_splits", + "splits": [ + "questions", + "answers" + ] + }, { "__type__": "copy", "field": "question/0/0/content", diff --git a/src/unitxt/catalog/operators/fix_json_schema.json b/src/unitxt/catalog/operators/fix_json_schema.json index 9254ab2b2f..5790cf8df5 100644 --- a/src/unitxt/catalog/operators/fix_json_schema.json +++ b/src/unitxt/catalog/operators/fix_json_schema.json @@ -2,6 +2,7 @@ "__type__": "recursive_replace", "key": "type", "map_values": { + "": "object", "dict": "object", "float": "number", "tuple": "array", From 3bc266adb0577fea460cb2e80cb98800fd141bde Mon Sep 17 00:00:00 2001 From: dafnapension Date: Wed, 13 Aug 2025 14:01:48 +0300 Subject: [PATCH 4/5] remove the generating example of exploding instance size Signed-off-by: dafnapension --- bfcl.ipynb | 94 ------------------------------------------------------ 1 file changed, 94 deletions(-) delete mode 100644 bfcl.ipynb diff --git a/bfcl.ipynb b/bfcl.ipynb deleted file mode 100644 index ac4afce55b..0000000000 --- a/bfcl.ipynb +++ /dev/null @@ -1,94 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": null, - "id": "6bf26409-1391-48a0-9846-64ff6f024392", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Already on 'main'\n", - "Your branch is up to date with 'origin/main'.\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "None of PyTorch, TensorFlow >= 2.0, or Flax have been found. Models won't be available and only tokenizers, configuration and file/data utilities can be used.\n", - "None of PyTorch, TensorFlow >= 2.0, or Flax have been found. Models won't be available and only tokenizers, configuration and file/data utilities can be used.\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Template was not specified in recipe, using the first template from the card by default.\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "IOPub data rate exceeded.\n", - "The Jupyter server will temporarily stop sending output\n", - "to the client in order to avoid crashing it.\n", - "To change this limit, set the config variable\n", - "`--ServerApp.iopub_data_rate_limit`.\n", - "\n", - "Current values:\n", - "ServerApp.iopub_data_rate_limit=1000000.0 (bytes/sec)\n", - "ServerApp.rate_limit_window=3.0 (secs)\n", - "\n" - ] - } - ], - "source": [ - "!git checkout main\n", - "import json\n", - "from unitxt.api import load_recipe\n", - "from unitxt.settings_utils import get_settings\n", - "settings = get_settings()\n", - "settings.allow_unverified_code=True\n", - "recipe = load_recipe(card=\"cards.bfcl.multi_turn.live_multiple_v3\")\n", - "ms = recipe()\n", - "print(ms)\n", - "for i, instance in enumerate(ms[\"test\"]):\n", - " if i == 916:\n", - " print(f\"size of instance is {len(json.dumps(instance))}\")\n", - " sizes = {field: len(json.dumps(instance[field])) for field in instance}\n", - " print(f\"sizes of all fields in instance: {sizes}\")\n", - " task_data = json.loads(instance[\"task_data\"])\n", - " reference_calls = task_data[\"reference_calls\"]\n", - " print(f\"number of reference_calls generated by the reg expression is: {len(reference_calls)}\")\n", - " print(f\"one reference call:\\n{reference_calls[0]}\")\n", - " print(f\"another reference call:\\n{reference_calls[30303]}\")\n", - " break" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "virtual310", - "language": "python", - "name": "virtual310" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.10.16" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} From e98faa719a937338df68e5ca2f8cc0c5ae75b320 Mon Sep 17 00:00:00 2001 From: dafnapension Date: Thu, 14 Aug 2025 23:04:08 +0300 Subject: [PATCH 5/5] use the same json schema fixer as xlam does, an operator entered main with xlam Signed-off-by: dafnapension --- prepare/cards/bfcl.py | 7 ++++--- src/unitxt/catalog/cards/bfcl/multi_turn/java_v3.json | 5 ++++- .../catalog/cards/bfcl/multi_turn/javascript_v3.json | 5 ++++- .../catalog/cards/bfcl/multi_turn/live_irrelevance_v3.json | 5 ++++- .../catalog/cards/bfcl/multi_turn/live_multiple_v3.json | 5 ++++- .../cards/bfcl/multi_turn/live_parallel_multiple_v3.json | 5 ++++- .../catalog/cards/bfcl/multi_turn/live_parallel_v3.json | 5 ++++- .../catalog/cards/bfcl/multi_turn/live_relevance_v3.json | 5 ++++- .../catalog/cards/bfcl/multi_turn/live_simple_v3.json | 5 ++++- src/unitxt/catalog/cards/bfcl/multi_turn/multiple_v3.json | 5 ++++- .../cards/bfcl/multi_turn/parallel_multiple_v3.json | 5 ++++- src/unitxt/catalog/cards/bfcl/multi_turn/parallel_v3.json | 5 ++++- src/unitxt/catalog/cards/bfcl/multi_turn/simple_v3.json | 5 ++++- src/unitxt/catalog/cards/bfcl/simple_v3.json | 5 ++++- 14 files changed, 56 insertions(+), 16 deletions(-) diff --git a/prepare/cards/bfcl.py b/prepare/cards/bfcl.py index 88ffe322c2..2d8ae2ddff 100644 --- a/prepare/cards/bfcl.py +++ b/prepare/cards/bfcl.py @@ -6,6 +6,7 @@ Copy, ExecuteExpression, FilterByExpression, + FixJsonSchemaOfParameterTypes, Set, ) from unitxt.stream_operators import DeleteSplits, JoinStreams @@ -35,7 +36,7 @@ DeleteSplits(splits=["questions", "answers"]), Copy(field="question/0/0/content", to_field="query"), Copy(field="function", to_field="tools"), - "operators.fix_json_schema", + FixJsonSchemaOfParameterTypes(main_field="tools"), # Process ground truth data in this dataset, which is a provided as a list of options per field, # and convert it into a list of explicit tool calls # @@ -105,7 +106,7 @@ DeleteSplits(splits=["questions", "answers"]), Copy(field="question/*/0", to_field="dialog"), Copy(field="function", to_field="tools"), - "operators.fix_json_schema", + FixJsonSchemaOfParameterTypes(main_field="tools"), FilterByExpression( expression="all(isinstance(v, dict) for d in ground_truth for k, v in d.items())" ), @@ -159,7 +160,7 @@ preprocess_steps=[ Copy(field="question/*/0", to_field="dialog"), Copy(field="function", to_field="tools"), - "operators.fix_json_schema", + FixJsonSchemaOfParameterTypes(main_field="tools"), Set(fields={"reference_calls": []}), ], task="tasks.tool_calling.multi_turn", diff --git a/src/unitxt/catalog/cards/bfcl/multi_turn/java_v3.json b/src/unitxt/catalog/cards/bfcl/multi_turn/java_v3.json index 6c58309204..0de39fe97a 100644 --- a/src/unitxt/catalog/cards/bfcl/multi_turn/java_v3.json +++ b/src/unitxt/catalog/cards/bfcl/multi_turn/java_v3.json @@ -37,7 +37,10 @@ "field": "function", "to_field": "tools" }, - "operators.fix_json_schema", + { + "__type__": "fix_json_schema_of_parameter_types", + "main_field": "tools" + }, { "__type__": "filter_by_expression", "expression": "all(isinstance(v, dict) for d in ground_truth for k, v in d.items())" diff --git a/src/unitxt/catalog/cards/bfcl/multi_turn/javascript_v3.json b/src/unitxt/catalog/cards/bfcl/multi_turn/javascript_v3.json index dc42a7369a..3fddfb537c 100644 --- a/src/unitxt/catalog/cards/bfcl/multi_turn/javascript_v3.json +++ b/src/unitxt/catalog/cards/bfcl/multi_turn/javascript_v3.json @@ -37,7 +37,10 @@ "field": "function", "to_field": "tools" }, - "operators.fix_json_schema", + { + "__type__": "fix_json_schema_of_parameter_types", + "main_field": "tools" + }, { "__type__": "filter_by_expression", "expression": "all(isinstance(v, dict) for d in ground_truth for k, v in d.items())" diff --git a/src/unitxt/catalog/cards/bfcl/multi_turn/live_irrelevance_v3.json b/src/unitxt/catalog/cards/bfcl/multi_turn/live_irrelevance_v3.json index 58b8edfdba..4e9395e930 100644 --- a/src/unitxt/catalog/cards/bfcl/multi_turn/live_irrelevance_v3.json +++ b/src/unitxt/catalog/cards/bfcl/multi_turn/live_irrelevance_v3.json @@ -21,7 +21,10 @@ "field": "function", "to_field": "tools" }, - "operators.fix_json_schema", + { + "__type__": "fix_json_schema_of_parameter_types", + "main_field": "tools" + }, { "__type__": "set", "fields": { diff --git a/src/unitxt/catalog/cards/bfcl/multi_turn/live_multiple_v3.json b/src/unitxt/catalog/cards/bfcl/multi_turn/live_multiple_v3.json index 87bc54c1ea..7448701678 100644 --- a/src/unitxt/catalog/cards/bfcl/multi_turn/live_multiple_v3.json +++ b/src/unitxt/catalog/cards/bfcl/multi_turn/live_multiple_v3.json @@ -37,7 +37,10 @@ "field": "function", "to_field": "tools" }, - "operators.fix_json_schema", + { + "__type__": "fix_json_schema_of_parameter_types", + "main_field": "tools" + }, { "__type__": "filter_by_expression", "expression": "all(isinstance(v, dict) for d in ground_truth for k, v in d.items())" diff --git a/src/unitxt/catalog/cards/bfcl/multi_turn/live_parallel_multiple_v3.json b/src/unitxt/catalog/cards/bfcl/multi_turn/live_parallel_multiple_v3.json index 3a902a7682..0b3fc19b89 100644 --- a/src/unitxt/catalog/cards/bfcl/multi_turn/live_parallel_multiple_v3.json +++ b/src/unitxt/catalog/cards/bfcl/multi_turn/live_parallel_multiple_v3.json @@ -37,7 +37,10 @@ "field": "function", "to_field": "tools" }, - "operators.fix_json_schema", + { + "__type__": "fix_json_schema_of_parameter_types", + "main_field": "tools" + }, { "__type__": "filter_by_expression", "expression": "all(isinstance(v, dict) for d in ground_truth for k, v in d.items())" diff --git a/src/unitxt/catalog/cards/bfcl/multi_turn/live_parallel_v3.json b/src/unitxt/catalog/cards/bfcl/multi_turn/live_parallel_v3.json index 23b826eada..def93e12b4 100644 --- a/src/unitxt/catalog/cards/bfcl/multi_turn/live_parallel_v3.json +++ b/src/unitxt/catalog/cards/bfcl/multi_turn/live_parallel_v3.json @@ -37,7 +37,10 @@ "field": "function", "to_field": "tools" }, - "operators.fix_json_schema", + { + "__type__": "fix_json_schema_of_parameter_types", + "main_field": "tools" + }, { "__type__": "filter_by_expression", "expression": "all(isinstance(v, dict) for d in ground_truth for k, v in d.items())" diff --git a/src/unitxt/catalog/cards/bfcl/multi_turn/live_relevance_v3.json b/src/unitxt/catalog/cards/bfcl/multi_turn/live_relevance_v3.json index b8bbd35491..a2d954b562 100644 --- a/src/unitxt/catalog/cards/bfcl/multi_turn/live_relevance_v3.json +++ b/src/unitxt/catalog/cards/bfcl/multi_turn/live_relevance_v3.json @@ -21,7 +21,10 @@ "field": "function", "to_field": "tools" }, - "operators.fix_json_schema", + { + "__type__": "fix_json_schema_of_parameter_types", + "main_field": "tools" + }, { "__type__": "set", "fields": { diff --git a/src/unitxt/catalog/cards/bfcl/multi_turn/live_simple_v3.json b/src/unitxt/catalog/cards/bfcl/multi_turn/live_simple_v3.json index 1645870c72..bd1590a088 100644 --- a/src/unitxt/catalog/cards/bfcl/multi_turn/live_simple_v3.json +++ b/src/unitxt/catalog/cards/bfcl/multi_turn/live_simple_v3.json @@ -37,7 +37,10 @@ "field": "function", "to_field": "tools" }, - "operators.fix_json_schema", + { + "__type__": "fix_json_schema_of_parameter_types", + "main_field": "tools" + }, { "__type__": "filter_by_expression", "expression": "all(isinstance(v, dict) for d in ground_truth for k, v in d.items())" diff --git a/src/unitxt/catalog/cards/bfcl/multi_turn/multiple_v3.json b/src/unitxt/catalog/cards/bfcl/multi_turn/multiple_v3.json index 7cafbe5f02..6f95116a9a 100644 --- a/src/unitxt/catalog/cards/bfcl/multi_turn/multiple_v3.json +++ b/src/unitxt/catalog/cards/bfcl/multi_turn/multiple_v3.json @@ -37,7 +37,10 @@ "field": "function", "to_field": "tools" }, - "operators.fix_json_schema", + { + "__type__": "fix_json_schema_of_parameter_types", + "main_field": "tools" + }, { "__type__": "filter_by_expression", "expression": "all(isinstance(v, dict) for d in ground_truth for k, v in d.items())" diff --git a/src/unitxt/catalog/cards/bfcl/multi_turn/parallel_multiple_v3.json b/src/unitxt/catalog/cards/bfcl/multi_turn/parallel_multiple_v3.json index 2bfad4208e..fabc321a4f 100644 --- a/src/unitxt/catalog/cards/bfcl/multi_turn/parallel_multiple_v3.json +++ b/src/unitxt/catalog/cards/bfcl/multi_turn/parallel_multiple_v3.json @@ -37,7 +37,10 @@ "field": "function", "to_field": "tools" }, - "operators.fix_json_schema", + { + "__type__": "fix_json_schema_of_parameter_types", + "main_field": "tools" + }, { "__type__": "filter_by_expression", "expression": "all(isinstance(v, dict) for d in ground_truth for k, v in d.items())" diff --git a/src/unitxt/catalog/cards/bfcl/multi_turn/parallel_v3.json b/src/unitxt/catalog/cards/bfcl/multi_turn/parallel_v3.json index ca9e3a98ed..8f09f6a05d 100644 --- a/src/unitxt/catalog/cards/bfcl/multi_turn/parallel_v3.json +++ b/src/unitxt/catalog/cards/bfcl/multi_turn/parallel_v3.json @@ -37,7 +37,10 @@ "field": "function", "to_field": "tools" }, - "operators.fix_json_schema", + { + "__type__": "fix_json_schema_of_parameter_types", + "main_field": "tools" + }, { "__type__": "filter_by_expression", "expression": "all(isinstance(v, dict) for d in ground_truth for k, v in d.items())" diff --git a/src/unitxt/catalog/cards/bfcl/multi_turn/simple_v3.json b/src/unitxt/catalog/cards/bfcl/multi_turn/simple_v3.json index 521a85d17d..0996641441 100644 --- a/src/unitxt/catalog/cards/bfcl/multi_turn/simple_v3.json +++ b/src/unitxt/catalog/cards/bfcl/multi_turn/simple_v3.json @@ -37,7 +37,10 @@ "field": "function", "to_field": "tools" }, - "operators.fix_json_schema", + { + "__type__": "fix_json_schema_of_parameter_types", + "main_field": "tools" + }, { "__type__": "filter_by_expression", "expression": "all(isinstance(v, dict) for d in ground_truth for k, v in d.items())" diff --git a/src/unitxt/catalog/cards/bfcl/simple_v3.json b/src/unitxt/catalog/cards/bfcl/simple_v3.json index a26999596b..d4cebe10b3 100644 --- a/src/unitxt/catalog/cards/bfcl/simple_v3.json +++ b/src/unitxt/catalog/cards/bfcl/simple_v3.json @@ -37,7 +37,10 @@ "field": "function", "to_field": "tools" }, - "operators.fix_json_schema", + { + "__type__": "fix_json_schema_of_parameter_types", + "main_field": "tools" + }, { "__type__": "execute_expression", "expression": "[{\"name\": k, \"arguments\": dict(zip(v.keys(), vals))} for d in ground_truth for k, v in d.items() for vals in itertools.product(*v.values())]",