Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
36 changes: 36 additions & 0 deletions prepare/benchmarks/safety.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
from unitxt.benchmark import Benchmark
from unitxt.catalog import add_to_catalog
from unitxt.standard import DatasetRecipe

MAX_TEST_INSTANCES = 1000

benchmark = Benchmark(
subsets={
"attaq": DatasetRecipe(
card="cards.safety.attaq_gg",
template_card_index="default",
group_by=["label"],
max_test_instances=MAX_TEST_INSTANCES,
),
"provoq": DatasetRecipe(
card="cards.safety.provoq_gg",
template_card_index="default",
group_by=["group"],
max_test_instances=MAX_TEST_INSTANCES,
),
"airbench": DatasetRecipe(
card="cards.safety.airbench2024",
template_card_index="default",
group_by=["l2-name"],
max_test_instances=MAX_TEST_INSTANCES,
),
"ailuminate": DatasetRecipe(
card="cards.safety.mlcommons_ailuminate",
template_card_index="default",
group_by=["hazard"],
max_test_instances=MAX_TEST_INSTANCES,
),
}
)

add_to_catalog(benchmark, "benchmarks.safety", overwrite=True)
5 changes: 3 additions & 2 deletions prepare/cards/attaq.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,9 @@
Shuffle(page_size=2800),
],
task=Task(
input_fields=["input"],
reference_fields=["label"],
input_fields={"input": str},
reference_fields={"label": str},
prediction_type=str,
metrics=["metrics.safety_metric"],
),
templates=[
Expand Down
1 change: 1 addition & 0 deletions prepare/cards/safety/airbench2024.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@
},
reference_fields={},
prediction_type=str,
# TODO: The current implementation uses Granite Guardian as judge. Future versions could use a custom judge and use the judge_prompt field.
metrics=[
"metrics.granite_guardian.assistant_risk.harm[prediction_type=str,user_message_field=prompt,assistant_message_field=prediction]",
],
Expand Down
27 changes: 27 additions & 0 deletions prepare/cards/safety/attaq_gg.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
from unitxt import add_to_catalog
from unitxt.blocks import InputOutputTemplate, LoadHF, Task, TaskCard, TemplatesDict
from unitxt.operators import Shuffle
from unitxt.splitters import RenameSplits
from unitxt.test_utils.card import test_card

card = TaskCard(
loader=LoadHF(path="ibm/AttaQ"),
preprocess_steps=[
RenameSplits(mapper={"train": "test"}),
Shuffle(page_size=2800),
],
task=Task(
input_fields={"input": str},
reference_fields={"label": str},
prediction_type=str,
metrics=[
"metrics.granite_guardian.assistant_risk.harm[prediction_type=str,user_message_field=input,assistant_message_field=prediction]",
],
),
templates=TemplatesDict(
{"default": InputOutputTemplate(input_format="{input}", output_format="")}
),
)

test_card(card, strict=False, demos_taken_from="test", num_demos=0)
add_to_catalog(card, "cards.safety.attaq_gg", overwrite=True)
4 changes: 3 additions & 1 deletion prepare/cards/safety/mlcommons_ailuminate.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,9 @@
},
reference_fields={},
prediction_type=str,
metrics=["metrics.llm_as_judge.safety.llamaguard"],
metrics=[
"metrics.granite_guardian.assistant_risk.harm[prediction_type=str,user_message_field=prompt_text,assistant_message_field=prediction]"
],
),
templates=TemplatesDict(
{
Expand Down
31 changes: 31 additions & 0 deletions prepare/cards/safety/provoq_gg.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
from unitxt import add_to_catalog
from unitxt.blocks import InputOutputTemplate, LoadHF, Task, TaskCard, TemplatesDict
from unitxt.test_utils.card import test_card

card = TaskCard(
loader=LoadHF(path="IBM/ProvoQ", data_classification_policy=["public"]),
task=Task(
input_fields={"question": str, "stigma": str, "group": str},
reference_fields={},
prediction_type=str,
metrics=[
"metrics.granite_guardian.assistant_risk.harm[prediction_type=str,user_message_field=question,assistant_message_field=prediction]",
],
),
templates=TemplatesDict(
{"default": InputOutputTemplate(input_format="{question}", output_format="")}
),
__description__="The ProvoQ dataset is designed to evaluate the sensitivity of large language models (LLMs) to stigma-related topics. It contains a comprehensive set of human-curated provocative questions that systematically target minority-stigma pairs in the United States, creating a diverse and nuanced set of questions that reflect these sensitive topics. The dataset aims to support research in understanding and mitigating biases in AI systems, particularly in the context of minority groups. While most questions are toxic, others may seem benign but potentially elicit harmful responses. The dataset contains questions in text format, organized by minority-stigma pairs.",
__tags__={
"languages": ["english"],
},
)

test_card(
card,
strict=False,
demos_taken_from="test",
num_demos=0,
)

add_to_catalog(card, "cards.safety.provoq_gg", overwrite=True)
41 changes: 41 additions & 0 deletions src/unitxt/catalog/benchmarks/safety.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
{
"__type__": "benchmark",
"subsets": {
"attaq": {
"__type__": "dataset_recipe",
"card": "cards.safety.attaq_gg",
"template_card_index": "default",
"group_by": [
"label"
],
"max_test_instances": 1000
},
"provoq": {
"__type__": "dataset_recipe",
"card": "cards.safety.provoq_gg",
"template_card_index": "default",
"group_by": [
"group"
],
"max_test_instances": 1000
},
"airbench": {
"__type__": "dataset_recipe",
"card": "cards.safety.airbench2024",
"template_card_index": "default",
"group_by": [
"l2-name"
],
"max_test_instances": 1000
},
"ailuminate": {
"__type__": "dataset_recipe",
"card": "cards.safety.mlcommons_ailuminate",
"template_card_index": "default",
"group_by": [
"hazard"
],
"max_test_instances": 1000
}
}
}
13 changes: 7 additions & 6 deletions src/unitxt/catalog/cards/attaq.json
Original file line number Diff line number Diff line change
Expand Up @@ -18,12 +18,13 @@
],
"task": {
"__type__": "task",
"input_fields": [
"input"
],
"reference_fields": [
"label"
],
"input_fields": {
"input": "str"
},
"reference_fields": {
"label": "str"
},
"prediction_type": "str",
"metrics": [
"metrics.safety_metric"
]
Expand Down
42 changes: 42 additions & 0 deletions src/unitxt/catalog/cards/safety/attaq_gg.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
{
"__type__": "task_card",
"loader": {
"__type__": "load_hf",
"path": "ibm/AttaQ"
},
"preprocess_steps": [
{
"__type__": "rename_splits",
"mapper": {
"train": "test"
}
},
{
"__type__": "shuffle",
"page_size": 2800
}
],
"task": {
"__type__": "task",
"input_fields": {
"input": "str"
},
"reference_fields": {
"label": "str"
},
"prediction_type": "str",
"metrics": [
"metrics.granite_guardian.assistant_risk.harm[prediction_type=str,user_message_field=input,assistant_message_field=prediction]"
]
},
"templates": {
"__type__": "templates_dict",
"items": {
"default": {
"__type__": "input_output_template",
"input_format": "{input}",
"output_format": ""
}
}
}
}
2 changes: 1 addition & 1 deletion src/unitxt/catalog/cards/safety/mlcommons_ailuminate.json
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@
"reference_fields": {},
"prediction_type": "str",
"metrics": [
"metrics.llm_as_judge.safety.llamaguard"
"metrics.granite_guardian.assistant_risk.harm[prediction_type=str,user_message_field=prompt_text,assistant_message_field=prediction]"
]
},
"templates": {
Expand Down
39 changes: 39 additions & 0 deletions src/unitxt/catalog/cards/safety/provoq_gg.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
{
"__type__": "task_card",
"loader": {
"__type__": "load_hf",
"path": "IBM/ProvoQ",
"data_classification_policy": [
"public"
]
},
"task": {
"__type__": "task",
"input_fields": {
"question": "str",
"stigma": "str",
"group": "str"
},
"reference_fields": {},
"prediction_type": "str",
"metrics": [
"metrics.granite_guardian.assistant_risk.harm[prediction_type=str,user_message_field=question,assistant_message_field=prediction]"
]
},
"templates": {
"__type__": "templates_dict",
"items": {
"default": {
"__type__": "input_output_template",
"input_format": "{question}",
"output_format": ""
}
}
},
"__description__": "The ProvoQ dataset is designed to evaluate the sensitivity of large language models (LLMs) to stigma-related topics. It contains a comprehensive set of human-curated provocative questions that systematically target minority-stigma pairs in the United States, creating a diverse and nuanced set of questions that reflect these sensitive topics. The dataset aims to support research in understanding and mitigating biases in AI systems, particularly in the context of minority groups. While most questions are toxic, others may seem benign but potentially elicit harmful responses. The dataset contains questions in text format, organized by minority-stigma pairs.",
"__tags__": {
"languages": [
"english"
]
}
}
2 changes: 1 addition & 1 deletion src/unitxt/processors.py
Original file line number Diff line number Diff line change
Expand Up @@ -326,7 +326,7 @@ def process_value(self, text: Any) -> Any:
try:
return float(match.group(1)) * 0.25 - 0.25
except:
return np.NaN
return np.nan


class ExtractMtBenchLabelJudgment(FieldOperator):
Expand Down